package it.unimi.di.law.bubing.spam.sztaki;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.di.law.bubing.Agent;
import it.unimi.di.law.bubing.spam.SpamDetector;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.objects.Object2LongOpenHashMap;
import it.unimi.dsi.fastutil.objects.ObjectIterator;
import it.unimi.dsi.fastutil.shorts.Short2ShortMap;
import it.unimi.dsi.io.FileLinesCollection;
import java.io.Serializable;
import net.htmlparser.jericho.HTMLElementName;

/* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/bubing/spam/sztaki/SztakiSpamDetector.class */
public class SztakiSpamDetector implements SpamDetector<Short2ShortMap> {
    private static final long serialVersionUID = 1;
    private final SVMModel model;
    private TermWeighting termWeighting;
    private int numTerms;

    /* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/bubing/spam/sztaki/SztakiSpamDetector$TermWeighting.class */
    public interface TermWeighting extends Serializable {
        double compute(long j, int i, double d);
    }

    public SztakiSpamDetector(SVMModel sVMModel, TermWeighting termWeighting, int i) throws Exception {
        this.termWeighting = termWeighting;
        this.numTerms = i;
        this.model = sVMModel;
    }

    @Override // it.unimi.di.law.bubing.spam.SpamDetector
    public double estimate(Short2ShortMap short2ShortMap) {
        double[] dArr = new double[this.numTerms];
        long j = 0;
        while (short2ShortMap.short2ShortEntrySet().iterator().hasNext()) {
            j += r0.next().getShortValue();
        }
        double d = 0.0d;
        ObjectIterator<Short2ShortMap.Entry> it2 = short2ShortMap.short2ShortEntrySet().iterator();
        while (it2.hasNext()) {
            Short2ShortMap.Entry next = it2.next();
            short shortKey = next.getShortKey();
            short shortValue = next.getShortValue();
            if (shortValue > 0) {
                double compute = this.termWeighting.compute(shortValue, shortKey, j);
                dArr[shortKey] = compute;
                d += compute * compute;
            }
        }
        double sqrt = 1.0d / Math.sqrt(d);
        for (int i = 0; i < this.numTerms; i++) {
            if (dArr[i] != 0.0d) {
                int i2 = i;
                dArr[i2] = dArr[i2] * sqrt;
            }
        }
        return this.model.predict(0, 1, this.model.calculateKernel(dArr, 0));
    }

    @Override // it.unimi.dsi.lang.FlyweightPrototype
    public SpamDetector<Short2ShortMap> copy() {
        return this;
    }

    public static void main(String[] strArr) throws Exception {
        SimpleJSAP simpleJSAP = new SimpleJSAP(Agent.class.getName(), "Creates map and model file starting from configuration data.", new Parameter[]{new FlaggedOption("avgDocSize", JSAP.DOUBLE_PARSER, JSAP.NO_DEFAULT, true, 'a', "avg-doc-size", "The average size of a document."), new FlaggedOption("numTerms", JSAP.INTEGER_PARSER, JSAP.NO_DEFAULT, true, 'n', "num-terms", "The number of terms."), new UnflaggedOption("terms", JSAP.STRING_PARSER, true, "The term file (UTF-8 file containing one term per line; the first term is term zero)."), new UnflaggedOption("idf", JSAP.STRING_PARSER, true, "The IDF scores, parallel to the score file."), new UnflaggedOption("model", JSAP.STRING_PARSER, true, "The model."), new UnflaggedOption(HTMLElementName.OUTPUT, JSAP.STRING_PARSER, true, "The basename of the output files.")});
        JSAPResult parse = simpleJSAP.parse(strArr);
        if (simpleJSAP.messagePrinted()) {
            System.exit(1);
        }
        double d = parse.getDouble("avgDocSize");
        int i = parse.getInt("numTerms");
        String string = parse.getString("terms");
        String string2 = parse.getString("idf");
        String string3 = parse.getString("model");
        String string4 = parse.getString(HTMLElementName.OUTPUT);
        Object2LongOpenHashMap object2LongOpenHashMap = new Object2LongOpenHashMap();
        FileLinesCollection.FileLinesIterator it2 = new FileLinesCollection(string, "UTF-8", true).iterator();
        for (int i2 = 0; i2 < i; i2++) {
            object2LongOpenHashMap.put((Object2LongOpenHashMap) it2.next().trim().toLowerCase().copy(), i2);
        }
        if (it2.hasNext()) {
            System.err.println("WARNING: ignoring terms after " + i);
        }
        it2.close();
        BinIO.storeObject(object2LongOpenHashMap, string4 + ".map");
        BinIO.storeObject(new SztakiSpamDetector(SVMModel.readFromFile(string3), new BM25TermWeighting(string2, d, i), i), string4 + ".detector");
    }
}
