package it.unimi.di.law.bubing;

import com.google.common.base.Charsets;
import com.google.common.collect.Iterators;
import com.google.common.primitives.Ints;
import it.unimi.di.law.bubing.parser.Parser;
import it.unimi.di.law.bubing.spam.SpamDetector;
import it.unimi.di.law.bubing.store.Store;
import it.unimi.di.law.bubing.util.BURL;
import it.unimi.di.law.warc.filters.Filter;
import it.unimi.di.law.warc.filters.Filters;
import it.unimi.di.law.warc.filters.URIResponse;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.io.LineIterator;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.lang.ObjectParser;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.lang.reflect.Field;
import java.lang.reflect.InvocationTargetException;
import java.net.InetAddress;
import java.net.URI;
import java.net.URL;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.TreeMap;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.regex.Pattern;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.http.conn.DnsResolver;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/bubing/RuntimeConfiguration.class */
public class RuntimeConfiguration {
    public static final boolean FETCH_ROBOTS = true;
    public final String name;
    public final String group;
    public final int weight;
    public final int maxUrlsPerSchemeAuthority;
    public volatile int fetchingThreads;
    public volatile int parsingThreads;
    public volatile int dnsThreads;
    public volatile Filter<URI> fetchFilter;
    public volatile Filter<URI> scheduleFilter;
    public volatile Filter<URIResponse> parseFilter;
    public volatile Filter<URIResponse> followFilter;
    public volatile Filter<URIResponse> storeFilter;
    public volatile long keepAliveTime;
    public volatile long schemeAuthorityDelay;
    public volatile long ipDelay;
    public volatile long maxUrls;
    public final double bloomFilterPrecision;
    public final Iterator<URI> seed;
    public final IntOpenHashSet blackListedIPv4Addresses;
    public final ReadWriteLock blackListedIPv4Lock;
    public final IntOpenHashSet blackListedHostHashes;
    public final ReadWriteLock blackListedHostHashesLock;
    public volatile int socketTimeout;
    public volatile int connectionTimeout;
    public final int fetchDataBufferByteSize;
    public final String proxyHost;
    public final int proxyPort;
    public final String cookiePolicy;
    public final int cookieMaxByteSize;
    public final String userAgent;
    public final String userAgentFrom;
    public volatile long robotsExpiration;
    public final File rootDir;
    public final File storeDir;
    public final File responseCacheDir;
    public final File sieveDir;
    public final File frontierDir;
    public volatile int responseBodyMaxByteSize;
    public final String digestAlgorithm;
    public final boolean startPaused;
    public final Class<? extends Store> storeClass;
    public volatile long workbenchMaxByteSize;
    public final long virtualizerMaxByteSize;
    public volatile long urlCacheMaxByteSize;
    public final int sieveSize;
    public final int sieveStoreIOBufferByteSize;
    public final int sieveAuxFileIOBufferByteSize;
    public final int dnsCacheMaxSize;
    public final long dnsPositiveTtl;
    public final long dnsNegativeTtl;
    public final boolean crawlIsNew;
    public final SpamDetector<?> spamDetector;
    public final int spamDetectionThreshold;
    public final int spamDetectionPeriodicity;
    public final ArrayList<Parser<?>> parsers;
    public volatile boolean paused;
    public volatile boolean stopping;
    public final DnsResolver dnsResolver;
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) RuntimeConfiguration.class);
    public static final Pattern DOTTED_ADDRESS = Pattern.compile("(([0-9A-Fa-f]+[:])*[0-9A-Fa-f]+)|((((0x[0-9A-Fa-f]+)|([0-9]+))\\.)*((0x[0-9A-Fa-f]+)|([0-9]+)))");

    /* JADX INFO: Access modifiers changed from: private */
    public static URI handleSeedURL(MutableString mutableString) {
        URI parse = BURL.parse(mutableString);
        if (parse == null) {
            LOGGER.error("The seed URL " + ((Object) mutableString) + " is malformed");
            return null;
        }
        if (parse.isAbsolute()) {
            return parse;
        }
        LOGGER.error("The seed URL " + ((Object) mutableString) + " is relative");
        return null;
    }

    private int handleIPv4(String str) throws ConfigurationException {
        try {
            if (!DOTTED_ADDRESS.matcher(str).matches()) {
                throw new ConfigurationException("Malformed IPv4 " + str + " for blacklisting");
            }
            byte[] address = InetAddress.getByName(str).getAddress();
            if (address.length > 4) {
                throw new UnknownHostException("Not IPv4");
            }
            return Ints.fromByteArray(address);
        } catch (UnknownHostException e) {
            throw new ConfigurationException("Malformed IPv4 " + str + " for blacklisting", e);
        }
    }

    public void addBlackListedIPv4(String str) throws ConfigurationException, FileNotFoundException {
        if (str.length() == 0) {
            return;
        }
        if (!str.startsWith("file:")) {
            this.blackListedIPv4Addresses.add(handleIPv4(str));
            return;
        }
        LineIterator lineIterator = new LineIterator(new FastBufferedReader(new InputStreamReader(new FileInputStream(str.substring(5)), Charsets.ISO_8859_1)));
        while (lineIterator.hasNext()) {
            MutableString next = lineIterator.next();
            if (next.length() > 0) {
                this.blackListedIPv4Addresses.add(handleIPv4(next.toString()));
            }
        }
    }

    public void addBlackListedHost(String str) throws ConfigurationException, FileNotFoundException {
        if (str.length() == 0) {
            return;
        }
        if (!str.startsWith("file:")) {
            this.blackListedHostHashes.add(str.trim().hashCode());
            return;
        }
        LineIterator lineIterator = new LineIterator(new FastBufferedReader(new InputStreamReader(new FileInputStream(str.substring(5)), Charsets.ISO_8859_1)));
        while (lineIterator.hasNext()) {
            this.blackListedHostHashes.add(lineIterator.next().toString().trim().hashCode());
        }
    }

    public RuntimeConfiguration(StartupConfiguration startupConfiguration) throws ConfigurationException, IOException {
        try {
            this.crawlIsNew = startupConfiguration.crawlIsNew;
            this.name = startupConfiguration.name;
            this.group = startupConfiguration.group;
            this.weight = startupConfiguration.weight;
            this.maxUrlsPerSchemeAuthority = startupConfiguration.maxUrlsPerSchemeAuthority;
            this.fetchingThreads = startupConfiguration.fetchingThreads;
            this.parsingThreads = startupConfiguration.parsingThreads;
            this.dnsThreads = startupConfiguration.dnsThreads;
            this.fetchFilter = startupConfiguration.fetchFilter;
            this.scheduleFilter = startupConfiguration.scheduleFilter;
            this.parseFilter = startupConfiguration.parseFilter;
            this.followFilter = startupConfiguration.followFilter;
            this.storeFilter = startupConfiguration.storeFilter;
            this.keepAliveTime = startupConfiguration.keepAliveTime;
            this.schemeAuthorityDelay = startupConfiguration.schemeAuthorityDelay;
            this.ipDelay = startupConfiguration.ipDelay;
            this.maxUrls = startupConfiguration.maxUrls;
            this.bloomFilterPrecision = startupConfiguration.bloomFilterPrecision;
            this.startPaused = startupConfiguration.startPaused;
            this.storeClass = startupConfiguration.storeClass;
            this.workbenchMaxByteSize = startupConfiguration.workbenchMaxByteSize;
            this.virtualizerMaxByteSize = startupConfiguration.virtualizerMaxByteSize;
            this.urlCacheMaxByteSize = startupConfiguration.urlCacheMaxByteSize;
            this.sieveSize = startupConfiguration.sieveSize & (-8);
            this.sieveStoreIOBufferByteSize = startupConfiguration.sieveStoreIOBufferByteSize & (-8);
            this.sieveAuxFileIOBufferByteSize = startupConfiguration.sieveStoreIOBufferByteSize & (-8);
            this.dnsCacheMaxSize = startupConfiguration.dnsCacheMaxSize;
            this.dnsPositiveTtl = startupConfiguration.dnsPositiveTtl;
            this.dnsNegativeTtl = startupConfiguration.dnsNegativeTtl;
            try {
                this.dnsResolver = startupConfiguration.dnsResolverClass.getConstructor(new Class[0]).newInstance(new Object[0]);
                if (startupConfiguration.spamDetectorUri.length() > 0) {
                    InputStream openStream = new URL(startupConfiguration.spamDetectorUri).openStream();
                    this.spamDetector = (SpamDetector) BinIO.loadObject(openStream);
                    openStream.close();
                } else {
                    this.spamDetector = null;
                }
                this.spamDetectionThreshold = startupConfiguration.spamDetectionThreshold;
                this.spamDetectionPeriodicity = startupConfiguration.spamDetectionPeriodicity;
                ArrayList arrayList = new ArrayList();
                for (String str : startupConfiguration.seed) {
                    if (str.length() != 0) {
                        if (str.startsWith("file:")) {
                            final LineIterator lineIterator = new LineIterator(new FastBufferedReader(new InputStreamReader(new FileInputStream(str.substring(5)), Charsets.ISO_8859_1)));
                            arrayList.add(new Iterator<URI>() { // from class: it.unimi.di.law.bubing.RuntimeConfiguration.1
                                @Override // java.util.Iterator
                                public boolean hasNext() {
                                    return lineIterator.hasNext();
                                }

                                /* JADX WARN: Can't rename method to resolve collision */
                                @Override // java.util.Iterator
                                public URI next() {
                                    return RuntimeConfiguration.handleSeedURL(lineIterator.next());
                                }

                                @Override // java.util.Iterator
                                public void remove() {
                                    throw new UnsupportedOperationException();
                                }
                            });
                        } else {
                            arrayList.add(Iterators.singletonIterator(handleSeedURL(new MutableString(str))));
                        }
                    }
                }
                this.blackListedIPv4Addresses = new IntOpenHashSet();
                for (String str2 : startupConfiguration.blackListedIPv4Addresses) {
                    addBlackListedIPv4(str2);
                }
                this.blackListedIPv4Lock = new ReentrantReadWriteLock();
                this.blackListedHostHashes = new IntOpenHashSet();
                for (String str3 : startupConfiguration.blackListedHosts) {
                    addBlackListedHost(str3);
                }
                this.blackListedHostHashesLock = new ReentrantReadWriteLock();
                this.seed = Iterators.concat(arrayList.iterator());
                this.socketTimeout = startupConfiguration.socketTimeout;
                this.connectionTimeout = startupConfiguration.connectionTimeout;
                this.rootDir = new File(startupConfiguration.rootDir);
                this.storeDir = StartupConfiguration.subDir(startupConfiguration.rootDir, startupConfiguration.storeDir);
                this.responseCacheDir = StartupConfiguration.subDir(startupConfiguration.rootDir, startupConfiguration.responseCacheDir);
                this.sieveDir = StartupConfiguration.subDir(startupConfiguration.rootDir, startupConfiguration.sieveDir);
                this.frontierDir = StartupConfiguration.subDir(startupConfiguration.rootDir, startupConfiguration.frontierDir);
                this.fetchDataBufferByteSize = startupConfiguration.fetchDataBufferByteSize;
                this.proxyHost = startupConfiguration.proxyHost;
                this.proxyPort = startupConfiguration.proxyPort;
                this.cookiePolicy = startupConfiguration.cookiePolicy;
                this.cookieMaxByteSize = startupConfiguration.cookieMaxByteSize;
                this.userAgent = startupConfiguration.userAgent;
                this.userAgentFrom = startupConfiguration.userAgentFrom;
                this.robotsExpiration = startupConfiguration.robotsExpiration;
                this.responseBodyMaxByteSize = startupConfiguration.responseBodyMaxByteSize;
                this.digestAlgorithm = startupConfiguration.digestAlgorithm;
                this.parsers = parsersFromSpecs(startupConfiguration.parserSpec);
                this.paused = this.startPaused;
                if (this.sieveSize == 0 && this.followFilter != Filters.FALSE) {
                    throw new ConfigurationException("Without a sieve you must specify a FALSE follow filter");
                }
            } catch (Exception e) {
                throw new ConfigurationException(e.getMessage(), e);
            }
        } catch (ClassNotFoundException e2) {
            throw new ConfigurationException(e2);
        } catch (IllegalAccessException e3) {
            throw new ConfigurationException(e3);
        } catch (IllegalArgumentException e4) {
            throw new ConfigurationException(e4);
        } catch (InstantiationException e5) {
            throw new ConfigurationException(e5);
        } catch (NoSuchMethodException e6) {
            throw new ConfigurationException(e6);
        } catch (InvocationTargetException e7) {
            throw new ConfigurationException(e7);
        }
    }

    public void ensureNotPaused() throws InterruptedException {
        if (this.paused) {
            boolean z = false;
            synchronized (this) {
                while (this.paused) {
                    LOGGER.info("Detected pause--going to wait...");
                    z = true;
                    wait();
                }
                if (z) {
                    LOGGER.info("Pause terminated.");
                }
            }
        }
    }

    public String toString() {
        Class<?> cls = getClass();
        TreeMap treeMap = new TreeMap();
        for (Field field : cls.getDeclaredFields()) {
            if (!ReadWriteLock.class.isAssignableFrom(field.getClass()) && (field.getModifiers() & 8) == 0) {
                try {
                    treeMap.put(field.getName(), field.get(this));
                } catch (IllegalAccessException e) {
                    treeMap.put(field.getName(), "<THIS SHOULD NOT HAPPEN>");
                }
            }
        }
        return treeMap.toString();
    }

    /* JADX WARN: Multi-variable type inference failed */
    public static ArrayList<Parser<?>> parsersFromSpecs(String[] strArr) throws IllegalArgumentException, ClassNotFoundException, IllegalAccessException, InvocationTargetException, InstantiationException, NoSuchMethodException, IOException {
        ArrayList<Parser<?>> arrayList = new ArrayList<>();
        for (String str : strArr) {
            arrayList.add(ObjectParser.fromSpec(str, Parser.class, new String[]{"it.unimi.di.law.bubing.parser"}));
        }
        return arrayList;
    }
}
