package it.unimi.di.law.warc.processors;

import it.unimi.di.law.bubing.parser.HTMLParser;
import it.unimi.di.law.warc.processors.ParallelFilteredProcessorRunner;
import it.unimi.di.law.warc.records.HttpResponseWarcRecord;
import it.unimi.di.law.warc.records.WarcRecord;
import java.io.IOException;
import java.net.URI;
import java.util.Iterator;
import java.util.Set;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/warc/processors/PositionLinkExtractorProcessor.class */
public class PositionLinkExtractorProcessor implements ParallelFilteredProcessorRunner.Processor<String> {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) LinkExtractorProcessor.class);
    HTMLParser<Void> htmlParser = new HTMLParser<>();

    /* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/warc/processors/PositionLinkExtractorProcessor$PairURLLinks.class */
    public final class PairURLLinks {
        final URI targetURI;
        Set<URI> outlinks;

        public PairURLLinks(URI uri, Set<URI> set) {
            this.targetURI = uri;
            this.outlinks = set;
        }

        private String outlinksToString() {
            String str = "";
            Iterator<URI> it2 = this.outlinks.iterator();
            while (it2.hasNext()) {
                str = str + it2.next().toString() + "\t";
            }
            return str;
        }

        public String toString() {
            return "URI:\t" + this.targetURI.toString() + "\tLINKS:\t" + outlinksToString();
        }
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
    }

    @Override // it.unimi.dsi.lang.FlyweightPrototype
    public ParallelFilteredProcessorRunner.Processor<String> copy() {
        return new PositionLinkExtractorProcessor();
    }

    private String outlinksToString(Set<URI> set) {
        String str = "";
        Iterator<URI> it2 = set.iterator();
        while (it2.hasNext()) {
            str = str + it2.next().toString() + "\t";
        }
        return str;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // it.unimi.di.law.warc.processors.ParallelFilteredProcessorRunner.Processor
    public String process(WarcRecord warcRecord, long j) {
        URI warcTargetURI = warcRecord.getWarcTargetURI();
        HTMLParser.SetLinkReceiver setLinkReceiver = new HTMLParser.SetLinkReceiver();
        try {
            this.htmlParser.parse(warcRecord.getWarcTargetURI(), (HttpResponseWarcRecord) warcRecord, setLinkReceiver);
            return "URI:\t" + warcTargetURI.toString() + "\tPOSITION:\t" + j + "\tLINKS:\t" + outlinksToString(setLinkReceiver.urls);
        } catch (Exception e) {
            LOGGER.error("Unexpected exception during parsing", (Throwable) e);
            return null;
        }
    }
}
