package it.unimi.di.law.warc.processors;

import it.unimi.di.law.bubing.parser.HTMLParser;
import it.unimi.di.law.warc.processors.ParallelFilteredProcessorRunner;
import it.unimi.di.law.warc.records.HttpResponseWarcRecord;
import it.unimi.di.law.warc.records.WarcHeader;
import it.unimi.di.law.warc.records.WarcRecord;
import java.io.IOException;
import java.net.URI;
import java.util.Iterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/warc/processors/StatisticalProcessor.class */
public class StatisticalProcessor implements ParallelFilteredProcessorRunner.Processor<StatisticalProperties> {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) StatisticalProcessor.class);
    HTMLParser<Void> htmlParser = new HTMLParser<>();

    /* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/warc/processors/StatisticalProcessor$StatisticalProperties.class */
    public final class StatisticalProperties {
        final URI targetURI;
        final long length;
        final String contentType;
        final boolean isDuplicate;
        final String charset;
        final int degree;
        final int outHostDegree;
        final int statusCode;

        public StatisticalProperties(URI uri, boolean z, String str, long j, int i, int i2, int i3, String str2) {
            this.targetURI = uri;
            this.length = j;
            this.contentType = str2;
            this.isDuplicate = z;
            this.charset = str;
            this.degree = i;
            this.outHostDegree = i2;
            this.statusCode = i3;
        }

        public String toString() {
            return "URI " + this.targetURI.toString() + "; length " + this.length + "; contentType " + this.contentType + "; isDuplicate " + this.isDuplicate + "; charset " + this.charset + "; degree " + this.degree + "; outHostDegree " + this.outHostDegree + "; status " + this.statusCode;
        }
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
    }

    @Override // it.unimi.dsi.lang.FlyweightPrototype
    public ParallelFilteredProcessorRunner.Processor<StatisticalProperties> copy() {
        return new StatisticalProcessor();
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // it.unimi.di.law.warc.processors.ParallelFilteredProcessorRunner.Processor
    public StatisticalProperties process(WarcRecord warcRecord, long j) {
        String str;
        long j2;
        int i;
        URI warcTargetURI = warcRecord.getWarcTargetURI();
        String host = warcTargetURI.getHost();
        boolean z = warcRecord.getWarcHeader(WarcHeader.Name.BUBING_IS_DUPLICATE) != null;
        try {
            str = warcRecord.getWarcHeader(WarcHeader.Name.BUBING_GUESSED_CHARSET).getValue();
        } catch (NullPointerException e) {
            LOGGER.warn("Undefined charset for URI " + warcTargetURI.toString());
            str = "null";
        }
        try {
            j2 = ((HttpResponseWarcRecord) warcRecord).response().getEntity().getContentLength();
        } catch (NullPointerException e2) {
            LOGGER.warn("Undefined Content Length for URI " + warcTargetURI.toString());
            j2 = -1;
        }
        try {
            i = ((HttpResponseWarcRecord) warcRecord).getStatusLine().getStatusCode() / 100;
        } catch (NullPointerException e3) {
            LOGGER.warn("Undefined statusCode for URI " + warcTargetURI.toString());
            i = -1;
        }
        String str2 = "";
        try {
            str2 = ((HttpResponseWarcRecord) warcRecord).response().getEntity().getContentType().getValue();
        } catch (NullPointerException e4) {
            LOGGER.warn("Undefined contentType for URI " + warcTargetURI.toString());
            str = "";
        }
        HTMLParser.SetLinkReceiver setLinkReceiver = new HTMLParser.SetLinkReceiver();
        try {
            this.htmlParser.parse(warcRecord.getWarcTargetURI(), (HttpResponseWarcRecord) warcRecord, setLinkReceiver);
        } catch (Exception e5) {
            LOGGER.error("Unexpected exception during parsing", (Throwable) e5);
        }
        int size = setLinkReceiver.urls.size();
        int i2 = 0;
        Iterator<URI> it2 = setLinkReceiver.urls.iterator();
        while (it2.hasNext()) {
            if (it2.next().getHost().equals(host)) {
                i2++;
            }
        }
        return new StatisticalProperties(warcTargetURI, z, str, j2, size, i2, i, str2);
    }
}
