package it.unimi.di.law.warc.processors;

import it.unimi.di.law.bubing.spam.sztaki.SztakiSpamDetector;
import it.unimi.di.law.spam.SpamConfig;
import it.unimi.di.law.warc.processors.ParallelFilteredProcessorRunner;
import it.unimi.di.law.warc.records.HttpResponseWarcRecord;
import it.unimi.di.law.warc.records.WarcRecord;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.shorts.Short2ShortMap;
import it.unimi.dsi.fastutil.shorts.Short2ShortOpenHashMap;
import it.unimi.dsi.io.FastBufferedReader;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.logging.ProgressLogger;
import java.io.IOException;
import java.io.InputStreamReader;
import org.hsqldb.Tokens;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/warc/processors/SpamAggregationReaderProcessor.class */
public class SpamAggregationReaderProcessor implements ParallelFilteredProcessorRunner.Processor<String> {
    private static SztakiSpamDetector detector;
    public static final SpamAggregationReaderProcessor INSTANCE = new SpamAggregationReaderProcessor();
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) SpamAggregationReaderProcessor.class);
    static ProgressLogger pl = new ProgressLogger(LOGGER);

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
    }

    @Override // it.unimi.dsi.lang.FlyweightPrototype
    public ParallelFilteredProcessorRunner.Processor<String> copy() {
        return INSTANCE;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // it.unimi.di.law.warc.processors.ParallelFilteredProcessorRunner.Processor
    public String process(WarcRecord warcRecord, long j) {
        pl.lightUpdate();
        String host = warcRecord.getWarcTargetURI().getHost();
        LOGGER.info("Processing HOST " + host);
        String str = "";
        try {
            FastBufferedReader fastBufferedReader = new FastBufferedReader(new InputStreamReader(((HttpResponseWarcRecord) warcRecord).response().getEntity().getContent()));
            MutableString mutableString = new MutableString();
            MutableString mutableString2 = new MutableString();
            int i = 0;
            short s = -1;
            double[] dArr = new double[9999];
            Short2ShortOpenHashMap short2ShortOpenHashMap = new Short2ShortOpenHashMap();
            while (fastBufferedReader.next(mutableString, mutableString2)) {
                if (i == 0) {
                    if (!mutableString.toString().equals(Tokens.T_SIZE)) {
                        LOGGER.warn("The first word should be SIZE");
                    }
                } else if (i == 1) {
                    int parseInt = Integer.parseInt(mutableString.toString());
                    if (LOGGER.isTraceEnabled()) {
                        LOGGER.trace(host + " with size " + parseInt);
                    }
                } else if (i % 2 == 0) {
                    s = Short.parseShort(mutableString.toString());
                } else {
                    short parseShort = Short.parseShort(mutableString.toString());
                    if (parseShort < 0) {
                        LOGGER.warn("Overflow for the frequency of term " + i + " for host " + host);
                        parseShort = Short.MAX_VALUE;
                    }
                    dArr[s] = parseShort;
                    short2ShortOpenHashMap.put(s, parseShort);
                }
                i++;
            }
            fastBufferedReader.close();
            str = "" + detector.estimate((Short2ShortMap) short2ShortOpenHashMap);
        } catch (Exception e) {
            LOGGER.warn("Unexpected Exception while reading terms for host: " + host + "\n" + e.getMessage() + "\n");
        }
        LOGGER.info("HOST " + host + " PREDICTION " + str);
        return host + " " + str;
    }

    static {
        pl.start("Aggregation by host starts...");
        try {
            detector = (SztakiSpamDetector) BinIO.loadObject(SpamConfig.SZTAKI_SPAM_DETECTOR_FILE + ".detector");
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
