package it.unimi.di.law.warc.processors;

import it.unimi.di.law.warc.processors.ParallelFilteredProcessorRunner;
import it.unimi.di.law.warc.processors.StatisticalProcessor;
import it.unimi.dsi.fastutil.ints.Int2IntOpenHashMap;
import it.unimi.dsi.fastutil.ints.Int2LongOpenHashMap;
import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
import it.unimi.dsi.fastutil.objects.Object2IntOpenHashMap;
import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap;
import it.unimi.dsi.fastutil.objects.Object2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.objects.ObjectIterator;
import it.unimi.dsi.stat.SummaryStats;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.util.Map;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.apache.commons.lang.StringUtils;

/* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/warc/processors/StatisticalWriter.class */
public class StatisticalWriter implements ParallelFilteredProcessorRunner.Writer<StatisticalProcessor.StatisticalProperties> {
    public static final String RESULT_DIRECTORY = "./";
    public static final String BASE_NAME = "STATs";
    private static final Object2ObjectOpenHashMap<String, HostData> host2HostData = new Object2ObjectOpenHashMap<>();
    private static final Object2LongOpenHashMap<String> charsetDistribution = new Object2LongOpenHashMap<>();
    private static final Object2LongOpenHashMap<String> charsetDistributionArchetypes = new Object2LongOpenHashMap<>();
    private static final Object2LongOpenHashMap<String> charsetDistributionDuplicates = new Object2LongOpenHashMap<>();
    private static final Object2LongOpenHashMap<String> contentTypeDistribution = new Object2LongOpenHashMap<>();
    private static final Object2LongOpenHashMap<String> contentTypeDistributionArchetypes = new Object2LongOpenHashMap<>();
    private static final Object2LongOpenHashMap<String> contentTypeDistributionDuplicates = new Object2LongOpenHashMap<>();
    private static final Int2LongOpenHashMap statusCodeDistribution = new Int2LongOpenHashMap();
    private static final Int2LongOpenHashMap statusCodeDistributionArchetypes = new Int2LongOpenHashMap();
    private static final Int2LongOpenHashMap statusCodeDistributionDuplicates = new Int2LongOpenHashMap();
    private static int numberOfPages = 0;
    private static int numberOfDuplicates = 0;
    private static final SummaryStats outDegree = new SummaryStats();
    private static final SummaryStats outDegreeArchetypes = new SummaryStats();
    private static final SummaryStats outDegreeDuplicates = new SummaryStats();
    private static final SummaryStats outHostDegree = new SummaryStats();
    private static final SummaryStats outHostDegreeArchetypes = new SummaryStats();
    private static final SummaryStats outHostDegreeDuplicates = new SummaryStats();
    private static final SummaryStats length = new SummaryStats();
    private static final SummaryStats lengthArchetypes = new SummaryStats();
    private static final SummaryStats lengthDuplicates = new SummaryStats();
    private PrintStream out;

    /* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/warc/processors/StatisticalWriter$HostData.class */
    public class HostData {
        public int numberOfPages = 0;
        public int numberOfDuplicates = 0;
        public SummaryStats outDegree = new SummaryStats();
        public SummaryStats outDegreeArchetypes = new SummaryStats();
        public SummaryStats outHostDegree = new SummaryStats();
        public SummaryStats outHostDegreeArchetypes = new SummaryStats();
        public SummaryStats length = new SummaryStats();
        public SummaryStats lengthOfArchetypes = new SummaryStats();
        public Object2IntOpenHashMap<String> contentTypeDistributionArchetypes = new Object2IntOpenHashMap<>();
        public Int2IntOpenHashMap statusCodeDistribution = new Int2IntOpenHashMap();
        public Int2IntOpenHashMap statusCodeDistributionArchetypes = new Int2IntOpenHashMap();

        public HostData() {
        }
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
        OutputAndexportSummaryStats("./STATsSummary");
        exportDistribution(charsetDistribution, "./STATsCharsetDistribution");
        exportDistribution(charsetDistributionArchetypes, "./STATsCharsetDistributionArchetypes");
        exportDistribution(charsetDistributionDuplicates, "./STATsCharsetDistributionDuplicates");
        exportDistribution(contentTypeDistribution, "./STATsContentTypeDistribution");
        exportDistribution(contentTypeDistributionArchetypes, "./STATsContentTypeDistributionArchetypes");
        exportDistribution(contentTypeDistributionDuplicates, "./STATsContentTypeDistributionDuplicates");
        exportDistribution(statusCodeDistribution, "./STATsStatusCodeDistribution");
        exportDistribution(statusCodeDistributionArchetypes, "./STATsStatusCodeDistributionArchetypes");
        exportDistribution(statusCodeDistributionDuplicates, "./STATsStatusCodeDistributionDuplicates");
        exportHostData(host2HostData, "./STATsHostData");
    }

    private void OutputAndexportSummaryStats(String str) throws FileNotFoundException {
        PrintStream printStream = new PrintStream(new FastBufferedOutputStream(new FileOutputStream(new File(str + ".txt"))));
        printStream.print("numberOfPages:\t" + numberOfPages + "\n");
        this.out.print("numberOfPages:\t" + numberOfPages + "\n");
        printStream.print("numberOfDuplicates:\t" + numberOfDuplicates + "\n");
        this.out.print("numberOfDuplicates:\t" + numberOfDuplicates + "\n");
        printStream.print("outDegree:\t" + outDegree.toString() + "\n");
        this.out.print("outDegree:\t" + outDegree.toString() + "\n");
        printStream.print("outDegreeOfArchtypes:\t" + outDegreeArchetypes.toString() + "\n");
        this.out.print("outDegreeOfArchetypes:\t" + outDegreeArchetypes.toString() + "\n");
        this.out.print("outDegreeOfDuplicates:\t" + outDegreeDuplicates.toString() + "\n");
        printStream.print("outDegreeOfDuplicates:\t" + outDegreeDuplicates.toString() + "\n");
        printStream.print("outHostDegree:\t" + outHostDegree.toString() + "\n");
        this.out.print("outHostDegree:\t" + outHostDegree.toString() + "\n");
        printStream.print("outHostDegreeOfArchetypes:\t" + outHostDegreeArchetypes.toString() + "\n");
        this.out.print("outHostDegreeOfArchetypes:\t" + outHostDegreeArchetypes.toString() + "\n");
        this.out.print("outHostDegreeOfDuplicates:\t" + outHostDegreeDuplicates.toString() + "\n");
        printStream.print("outHostDegreeOfDuplicates:\t" + outHostDegreeDuplicates.toString() + "\n");
        printStream.print("contentLength:\t" + length.toString() + "\n");
        this.out.print("contentLength:\t" + length.toString() + "\n");
        printStream.print("contentLengthOfArchetypes:\t" + lengthArchetypes.toString() + "\n");
        this.out.print("contentLengthOfArchetypes:\t" + lengthArchetypes.toString() + "\n");
        this.out.print("contentLengthOfDuplicates:\t" + lengthDuplicates.toString() + "\n");
        printStream.print("contentLengthOfDuplicates:\t" + lengthDuplicates.toString() + "\n");
        printStream.close();
    }

    private static void exportDistribution(Object2LongOpenHashMap<String> object2LongOpenHashMap, String str) throws FileNotFoundException {
        PrintStream printStream = new PrintStream(new FastBufferedOutputStream(new FileOutputStream(new File(str + ".tsv"))));
        ObjectIterator<Map.Entry<String, Long>> it2 = object2LongOpenHashMap.entrySet().iterator();
        while (it2.hasNext()) {
            Map.Entry<String, Long> next = it2.next();
            printStream.println(next.getKey() + "\t" + next.getValue());
        }
        printStream.close();
    }

    private static void exportHostData(Object2ObjectOpenHashMap<String, HostData> object2ObjectOpenHashMap, String str) throws FileNotFoundException {
        PrintStream printStream = new PrintStream(new FastBufferedOutputStream(new FileOutputStream(new File(str + ".csv"))));
        printStream.println("#HostName," + hostData2StringFormat());
        ObjectIterator<Map.Entry<String, HostData>> it2 = object2ObjectOpenHashMap.entrySet().iterator();
        while (it2.hasNext()) {
            Map.Entry<String, HostData> next = it2.next();
            printStream.println(next.getKey() + "," + hostData2String(next.getValue()));
        }
        printStream.close();
    }

    private static String distribution2String(Object2IntOpenHashMap<String> object2IntOpenHashMap) {
        String str = "[";
        ObjectIterator<Map.Entry<String, Integer>> it2 = object2IntOpenHashMap.entrySet().iterator();
        while (it2.hasNext()) {
            Map.Entry<String, Integer> next = it2.next();
            str = str + DefaultExpressionEngine.DEFAULT_INDEX_START + StringUtils.replace(next.getKey(), " ", "") + " " + next.getValue() + DefaultExpressionEngine.DEFAULT_INDEX_END;
        }
        return str + "]";
    }

    /* JADX WARN: Type inference failed for: r0v2, types: [it.unimi.dsi.fastutil.objects.ObjectSet] */
    private static String distribution2String(Int2IntOpenHashMap int2IntOpenHashMap) {
        String str = "[";
        ObjectIterator it2 = int2IntOpenHashMap.entrySet().iterator();
        while (it2.hasNext()) {
            Map.Entry entry = (Map.Entry) it2.next();
            str = str + DefaultExpressionEngine.DEFAULT_INDEX_START + entry.getKey() + " " + entry.getValue() + DefaultExpressionEngine.DEFAULT_INDEX_END;
        }
        return str + "]";
    }

    private static String hostData2String(HostData hostData) {
        return hostData.numberOfPages + "," + hostData.numberOfDuplicates + "," + StringUtils.replace(hostData.outDegree.toString(), ",", " ") + "," + StringUtils.replace(hostData.outDegreeArchetypes.toString(), ",", " ") + "," + StringUtils.replace(hostData.outHostDegree.toString(), ",", " ") + "," + StringUtils.replace(hostData.outHostDegreeArchetypes.toString(), ",", " ") + "," + StringUtils.replace(hostData.length.toString(), ",", " ") + "," + StringUtils.replace(hostData.lengthOfArchetypes.toString(), ",", " ") + "," + StringUtils.replace(distribution2String(hostData.contentTypeDistributionArchetypes), ",", " ") + "," + StringUtils.replace(distribution2String(hostData.statusCodeDistribution), ",", " ") + "," + StringUtils.replace(distribution2String(hostData.statusCodeDistributionArchetypes), ",", " ");
    }

    private static String hostData2StringFormat() {
        return "numberOfPages,numberOfDuplicates,outDegree,outDegreeArchetypes,outHostDegree,outHostDegreeArchetypes,length,lengthOfArchetypes,contentTypeDistributionArchetypes,statusCodeDistribution,statusCodeDistributionArchetypes";
    }

    /* JADX WARN: Type inference failed for: r0v2, types: [it.unimi.dsi.fastutil.objects.ObjectSet] */
    private static void exportDistribution(Int2LongOpenHashMap int2LongOpenHashMap, String str) throws FileNotFoundException {
        PrintStream printStream = new PrintStream(new FastBufferedOutputStream(new FileOutputStream(new File(str + ".tsv"))));
        ObjectIterator it2 = int2LongOpenHashMap.entrySet2().iterator();
        while (it2.hasNext()) {
            Map.Entry entry = (Map.Entry) it2.next();
            printStream.println("( " + entry.getKey() + " " + entry.getValue() + " )");
        }
        printStream.close();
    }

    private static void checkPutOrIncrease(Object2IntOpenHashMap<String> object2IntOpenHashMap, String str) {
        if (object2IntOpenHashMap.containsKey(str)) {
            object2IntOpenHashMap.put((Object2IntOpenHashMap<String>) str, object2IntOpenHashMap.get((Object) str).intValue() + 1);
        } else {
            object2IntOpenHashMap.put((Object2IntOpenHashMap<String>) str, 1);
        }
    }

    private static void checkPutOrIncrease(Object2LongOpenHashMap<String> object2LongOpenHashMap, String str) {
        if (object2LongOpenHashMap.containsKey(str)) {
            object2LongOpenHashMap.put((Object2LongOpenHashMap<String>) str, object2LongOpenHashMap.get((Object) str).intValue() + 1);
        } else {
            object2LongOpenHashMap.put((Object2LongOpenHashMap<String>) str, 1L);
        }
    }

    private static void checkPutOrIncrease(Int2IntOpenHashMap int2IntOpenHashMap, int i) {
        if (int2IntOpenHashMap.containsKey(i)) {
            int2IntOpenHashMap.put(i, int2IntOpenHashMap.get(i) + 1);
        } else {
            int2IntOpenHashMap.put(i, 1);
        }
    }

    private static void checkPutOrIncrease(Int2LongOpenHashMap int2LongOpenHashMap, int i) {
        if (int2LongOpenHashMap.containsKey(i)) {
            int2LongOpenHashMap.put(i, int2LongOpenHashMap.get(i) + 1);
        } else {
            int2LongOpenHashMap.put(i, 1L);
        }
    }

    @Override // it.unimi.di.law.warc.processors.ParallelFilteredProcessorRunner.Writer
    public void write(StatisticalProcessor.StatisticalProperties statisticalProperties, long j, PrintStream printStream) throws IOException {
        HostData hostData;
        String host = statisticalProperties.targetURI.getHost();
        if (host2HostData.containsKey(host)) {
            hostData = host2HostData.get(host);
        } else {
            hostData = new HostData();
            host2HostData.put(host, hostData);
        }
        hostData.numberOfPages++;
        numberOfPages++;
        if (statisticalProperties.isDuplicate) {
            hostData.numberOfDuplicates++;
            numberOfDuplicates++;
            outDegreeDuplicates.add(statisticalProperties.degree);
            outHostDegreeDuplicates.add(statisticalProperties.outHostDegree);
            lengthDuplicates.add(statisticalProperties.length);
            checkPutOrIncrease(charsetDistributionDuplicates, statisticalProperties.charset);
            checkPutOrIncrease(contentTypeDistributionDuplicates, statisticalProperties.contentType);
            checkPutOrIncrease(statusCodeDistributionDuplicates, statisticalProperties.statusCode);
        } else {
            hostData.outDegreeArchetypes.add(statisticalProperties.degree);
            outDegreeArchetypes.add(statisticalProperties.degree);
            hostData.outHostDegreeArchetypes.add(statisticalProperties.outHostDegree);
            outHostDegreeArchetypes.add(statisticalProperties.outHostDegree);
            hostData.lengthOfArchetypes.add(statisticalProperties.length);
            lengthArchetypes.add(statisticalProperties.length);
            checkPutOrIncrease(hostData.contentTypeDistributionArchetypes, statisticalProperties.contentType);
            checkPutOrIncrease(hostData.statusCodeDistributionArchetypes, statisticalProperties.statusCode);
            checkPutOrIncrease(charsetDistributionArchetypes, statisticalProperties.charset);
            checkPutOrIncrease(contentTypeDistributionArchetypes, statisticalProperties.contentType);
            checkPutOrIncrease(statusCodeDistributionArchetypes, statisticalProperties.statusCode);
        }
        hostData.outDegree.add(statisticalProperties.degree);
        outDegree.add(statisticalProperties.degree);
        hostData.outHostDegree.add(statisticalProperties.outHostDegree);
        outHostDegree.add(statisticalProperties.outHostDegree);
        hostData.length.add(statisticalProperties.length);
        length.add(statisticalProperties.length);
        checkPutOrIncrease(hostData.statusCodeDistribution, statisticalProperties.statusCode);
        checkPutOrIncrease(charsetDistribution, statisticalProperties.charset);
        checkPutOrIncrease(contentTypeDistribution, statisticalProperties.contentType);
        checkPutOrIncrease(statusCodeDistribution, statisticalProperties.statusCode);
        this.out = printStream;
    }
}
