package it.unimi.dsi.jai4j.jgroups.example;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.di.law.bubing.Agent;
import it.unimi.dsi.fastutil.objects.Object2ObjectMap;
import it.unimi.dsi.fastutil.objects.Object2ObjectMaps;
import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.objects.ObjectCollection;
import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
import it.unimi.dsi.fastutil.objects.ObjectSet;
import it.unimi.dsi.fastutil.objects.ObjectSets;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.jai4j.ConsistentHashAssignmentStrategy;
import it.unimi.dsi.jai4j.JobListener;
import it.unimi.dsi.jai4j.dropping.DiscardMessagesStrategy;
import it.unimi.dsi.jai4j.dropping.DroppingThreadFactory;
import it.unimi.dsi.jai4j.dropping.ImmediateDroppingThreadFactory;
import it.unimi.dsi.jai4j.dropping.PendingMessagesStrategy;
import it.unimi.dsi.jai4j.jgroups.JGroupsJobManager;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.parser.BulletParser;
import it.unimi.dsi.parser.callback.LinkExtractor;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Iterator;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import javax.management.MalformedObjectNameException;
import org.jgroups.JChannel;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.softee.management.annotation.Description;
import org.softee.management.annotation.MBean;
import org.softee.management.annotation.ManagedAttribute;
import org.softee.management.annotation.ManagedOperation;
import org.softee.management.exception.ManagementException;

@MBean
@Description("JAI4J Crawler")
/* loaded from: input_file:WEB-INF/lib/jai4j-2.0.11.jar:it/unimi/dsi/jai4j/jgroups/example/Crawler.class */
public class Crawler extends JGroupsJobManager<Job> implements JobListener<Job>, Runnable {
    private static Logger LOGGER = LoggerFactory.getLogger((Class<?>) Crawler.class);
    public static final int MAX_SIZE = 1000000;
    protected final Object2ObjectMap<Thread, URI> currentCrawledURIs;
    protected final ObjectSet<URI> visitedURIs;
    protected final ObjectSet<URI> enqueued;
    protected final LinkedBlockingQueue<URI> visitQueue;
    protected volatile int count;
    protected volatile boolean stopped;
    protected final int maxPages;
    protected final int urlDelay;
    protected final String domain;
    private final Thread[] threads;

    /* loaded from: input_file:WEB-INF/lib/jai4j-2.0.11.jar:it/unimi/dsi/jai4j/jgroups/example/Crawler$Job.class */
    public static final class Job implements it.unimi.dsi.jai4j.Job {
        public final URI uri;

        public Job(String str) throws URISyntaxException {
            this.uri = new URI(str);
        }

        public Job(URI uri) {
            this.uri = uri;
        }

        @Override // it.unimi.dsi.jai4j.Job
        public long hash64() {
            return this.uri.hashCode() | (this.uri.hashCode() << 32);
        }

        public String toString() {
            return this.uri.toString();
        }
    }

    public Crawler(InetSocketAddress inetSocketAddress, DroppingThreadFactory<Job> droppingThreadFactory, PendingMessagesStrategy<Job> pendingMessagesStrategy, String str, JChannel jChannel, String str2, BlockingQueue<Job> blockingQueue, int i, int i2, int i3, String str3) throws NullPointerException, ManagementException, MalformedObjectNameException, MalformedURLException {
        super(str, 1, inetSocketAddress, jChannel, str2, new ConsistentHashAssignmentStrategy(), blockingQueue, droppingThreadFactory, pendingMessagesStrategy);
        this.maxPages = i2;
        this.urlDelay = i3;
        this.domain = str3;
        this.currentCrawledURIs = Object2ObjectMaps.synchronize(new Object2ObjectOpenHashMap());
        this.visitedURIs = ObjectSets.synchronize(new ObjectOpenHashSet());
        this.enqueued = ObjectSets.synchronize(new ObjectOpenHashSet());
        this.visitQueue = new LinkedBlockingQueue<>();
        setListener(this);
        this.threads = new Thread[i];
        int i4 = i;
        while (true) {
            int i5 = i4;
            i4--;
            if (i5 == 0) {
                register();
                return;
            }
            this.threads[i4] = new Thread(this, Crawler.class.getSimpleName() + " thread " + i4);
        }
    }

    @Override // it.unimi.dsi.jai4j.JobListener
    public void receive(Job job) {
        if (accept(job.uri) && this.enqueued.add(job.uri)) {
            this.visitQueue.add(job.uri);
        }
    }

    @Override // it.unimi.dsi.jai4j.JobManager
    public byte[] toByteArray(Job job) throws IllegalArgumentException {
        return job.uri.toASCIIString().getBytes();
    }

    @Override // it.unimi.dsi.jai4j.jgroups.JGroupsJobManager, it.unimi.dsi.jai4j.JobManager
    public Job fromByteArray(byte[] bArr) throws IllegalArgumentException {
        try {
            return new Job(new String(bArr));
        } catch (URISyntaxException e) {
            throw new IllegalArgumentException(e);
        }
    }

    @Override // it.unimi.dsi.jai4j.jgroups.JGroupsJobManager, it.unimi.dsi.jai4j.JobManager
    public Job fromString(String str) {
        try {
            return new Job(str);
        } catch (URISyntaxException e) {
            throw new IllegalArgumentException(e);
        }
    }

    @Override // it.unimi.dsi.jai4j.JobManager
    public Job fromByteArray(byte[] bArr, int i) throws IllegalArgumentException {
        try {
            return new Job(new String(bArr, i, bArr.length - i));
        } catch (URISyntaxException e) {
            throw new IllegalArgumentException(e);
        }
    }

    @ManagedAttribute
    @Description("Pages currently crawled by all threads")
    public Set<URI> getCurrentPages() {
        return new ObjectOpenHashSet((ObjectCollection) this.currentCrawledURIs.values());
    }

    @ManagedAttribute
    @Description("Number of crawled pages")
    public int getNumberOfCrawledPages() {
        return this.count;
    }

    @ManagedOperation
    @Description("Stop crawling")
    public synchronized void stop() {
        this.stopped = true;
        for (Thread thread : this.threads) {
            thread.interrupt();
        }
    }

    private boolean accept(URI uri) {
        return uri.isAbsolute() && (uri.getPath() == null || uri.getPath().endsWith(".html") || uri.getPath().endsWith(".htm") || uri.getPath().endsWith(".php") || uri.getPath().endsWith("/")) && uri.getHost() != null && uri.getHost().endsWith(this.domain) && !this.visitedURIs.contains(uri);
    }

    private void connectAndGo() throws IllegalStateException, IOException, IllegalArgumentException, InterruptedException {
        connect();
        for (Thread thread : this.threads) {
            thread.start();
        }
        for (Thread thread2 : this.threads) {
            thread2.join();
        }
        LOGGER.info(this.count + " crawled pages.");
    }

    @Override // java.lang.Runnable
    public void run() {
        HttpURLConnection httpURLConnection;
        BulletParser bulletParser = new BulletParser();
        LinkExtractor linkExtractor = new LinkExtractor();
        bulletParser.setCallback(linkExtractor);
        while (true) {
            try {
                try {
                    Thread.sleep(this.urlDelay);
                    this.currentCrawledURIs.put(Thread.currentThread(), null);
                    URI take = this.visitQueue.take();
                    this.enqueued.remove(take);
                    if (this.stopped) {
                        return;
                    }
                    if (LOGGER.isDebugEnabled()) {
                        LOGGER.debug("Received URI " + take);
                    }
                    if (this.visitedURIs.add(take)) {
                        try {
                            this.currentCrawledURIs.put(Thread.currentThread(), take);
                            httpURLConnection = (HttpURLConnection) take.toURL().openConnection();
                            httpURLConnection.setInstanceFollowRedirects(true);
                            httpURLConnection.connect();
                        } catch (Exception e) {
                            LOGGER.warn(e.toString(), (Throwable) e);
                        }
                        if (httpURLConnection.getResponseCode() != 200) {
                            httpURLConnection.disconnect();
                        } else {
                            FastBufferedReader fastBufferedReader = new FastBufferedReader(new InputStreamReader(httpURLConnection.getInputStream()));
                            MutableString mutableString = new MutableString();
                            MutableString mutableString2 = new MutableString();
                            while (mutableString.length() < 1000000 && fastBufferedReader.readLine(mutableString2) != null) {
                                mutableString.append(mutableString2).append('\n');
                            }
                            fastBufferedReader.close();
                            httpURLConnection.disconnect();
                            System.out.println(">>>>>>>>>>>>>>>> " + take + " <<<<<<<<<<<<<<<<");
                            mutableString.print(System.out);
                            System.out.println();
                            bulletParser.parse(mutableString.array(), 0, mutableString.length());
                            int i = this.count + 1;
                            this.count = i;
                            if (i >= this.maxPages) {
                                LOGGER.info("Thread shutting down: enough pages crawled.");
                                stop();
                                return;
                            }
                            Iterator<String> it2 = linkExtractor.urls.iterator();
                            while (it2.hasNext()) {
                                try {
                                    Job job = new Job(take.resolve(it2.next()));
                                    if (accept(job.uri)) {
                                        if (!local(job)) {
                                            submit((Crawler) job);
                                        } else if (this.enqueued.add(job.uri)) {
                                            this.visitQueue.add(job.uri);
                                        }
                                    }
                                } catch (IllegalArgumentException e2) {
                                }
                            }
                        }
                    }
                } catch (Exception e3) {
                    LOGGER.error(e3.toString(), (Throwable) e3);
                    return;
                }
            } catch (InterruptedException e4) {
                return;
            }
        }
    }

    public static void main(String[] strArr) throws Exception {
        SimpleJSAP simpleJSAP = new SimpleJSAP(Crawler.class.getName(), "Runs a crawler with JGroups groups name " + Crawler.class.getSimpleName() + ".", new Parameter[]{new FlaggedOption("numThreads", JSAP.INTEGER_PARSER, "2", false, 't', "numThreads", "Number of crawling threads."), new FlaggedOption("delay", JSAP.INTSIZE_PARSER, "5000", false, 'd', "delay", "Delay in milliseconds between pages."), new FlaggedOption("domain", JSAP.STRING_PARSER, "", false, 'D', "domain", "Limit crawl to this domain."), new FlaggedOption("maxPages", JSAP.INTEGER_PARSER, "1000", false, 'p', "maxPages", "Maximum number of pages the crawler should crawl."), new FlaggedOption("droppingFactory", JSAP.CLASS_PARSER, ImmediateDroppingThreadFactory.class.getName(), false, 'F', "droppingFactory", "The dropping thread factory."), new FlaggedOption("messageStrategy", JSAP.CLASS_PARSER, DiscardMessagesStrategy.class.getName(), false, 'M', "messageStrategy", "The message strategy."), new FlaggedOption("jmxHost", JSAP.STRING_PARSER, InetAddress.getLocalHost().getCanonicalHostName(), false, 'h', "jmxHost", "The host exporting the JMX interface."), new FlaggedOption("jmxPort", JSAP.INTEGER_PARSER, System.getProperty(Agent.JMX_REMOTE_PORT_SYSTEM_PROPERTY), false, 'P', "jmxPort", "The port exporting the JMX interface."), new UnflaggedOption("name", JSAP.STRING_PARSER, true, "The name of the job manager.")});
        JSAPResult parse = simpleJSAP.parse(strArr);
        if (simpleJSAP.messagePrinted()) {
            return;
        }
        LinkedBlockingQueue linkedBlockingQueue = new LinkedBlockingQueue();
        DroppingThreadFactory droppingThreadFactory = (DroppingThreadFactory) parse.getClass("droppingFactory").newInstance();
        PendingMessagesStrategy pendingMessagesStrategy = (PendingMessagesStrategy) parse.getClass("messageStrategy").newInstance();
        String string = parse.getString("jmxHost");
        Crawler crawler = new Crawler(new InetSocketAddress(string, parse.getInt("jmxPort")), droppingThreadFactory, pendingMessagesStrategy, parse.getString("name"), new JChannel("UDP(ip_mcast=true;mcast_addr=228.8.8.8;bind_addr=" + string + ";ip_ttl=32):PING(timeout=7000):MERGE2:FD(timeout=10000):pbcast.NAKACK:UNICAST:pbcast.GMS"), Crawler.class.getSimpleName(), linkedBlockingQueue, parse.getInt("numThreads"), parse.getInt("maxPages"), parse.getInt("delay"), parse.getString("domain"));
        LOGGER.info("Starting crawl...");
        crawler.connectAndGo();
        crawler.close();
        LOGGER.info("Ending crawl.");
        Thread[] threadArr = new Thread[Thread.activeCount()];
        Thread.enumerate(threadArr);
        for (Thread thread : threadArr) {
            if (thread != null) {
                LOGGER.error("Thread " + thread.getName() + " is still alive.");
            }
        }
    }
}
