package it.unimi.di.law.warc.processors;

import it.unimi.di.law.bubing.parser.HTMLParser;
import it.unimi.di.law.warc.processors.ParallelFilteredProcessorRunner;
import it.unimi.di.law.warc.records.HttpResponseWarcRecord;
import it.unimi.di.law.warc.records.WarcHeader;
import it.unimi.di.law.warc.records.WarcRecord;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.objects.Object2LongFunction;
import java.io.IOException;
import java.net.URI;
import java.util.Arrays;
import java.util.Iterator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/warc/processors/GraphBuilder.class */
public class GraphBuilder implements ParallelFilteredProcessorRunner.Processor<long[][]> {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) GraphBuilder.class);
    private final Object2LongFunction<CharSequence> map;
    private final long[][] position;
    private final HTMLParser<Void> htmlParser;
    private final long storeIndexMask;
    private final LongOpenHashSet repeatedSet;
    private final HTMLParser.SetLinkReceiver setLinkReceiver;

    public GraphBuilder(String str, String str2, String str3, String str4) throws ClassNotFoundException, IOException {
        this(Integer.parseInt(str) << 48, (LongOpenHashSet) BinIO.loadObject(str2), (Object2LongFunction<CharSequence>) BinIO.loadObject(str3), BinIO.loadLongsBig(str4));
    }

    protected GraphBuilder(long j, LongOpenHashSet longOpenHashSet, Object2LongFunction<CharSequence> object2LongFunction, long[][] jArr) {
        this.repeatedSet = longOpenHashSet;
        this.map = object2LongFunction;
        this.position = jArr;
        this.htmlParser = new HTMLParser<>();
        this.setLinkReceiver = new HTMLParser.SetLinkReceiver();
        this.storeIndexMask = j;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // it.unimi.di.law.warc.processors.ParallelFilteredProcessorRunner.Processor
    public long[][] process(WarcRecord warcRecord, long j) {
        if (this.repeatedSet.contains(this.storeIndexMask | j) || warcRecord.getWarcHeader(WarcHeader.Name.BUBING_IS_DUPLICATE) != null) {
            return (long[][]) null;
        }
        try {
            this.htmlParser.parse(warcRecord.getWarcTargetURI(), (HttpResponseWarcRecord) warcRecord, this.setLinkReceiver);
        } catch (Exception e) {
            LOGGER.error("Unexpected exception during parsing", (Throwable) e);
        }
        LongOpenHashSet longOpenHashSet = new LongOpenHashSet();
        Iterator<URI> it2 = this.setLinkReceiver.urls.iterator();
        while (it2.hasNext()) {
            long j2 = this.map.getLong(it2.next().toString());
            if (j2 != -1) {
                longOpenHashSet.add(LongBigArrays.get(this.position, j2));
            }
        }
        long[] longArray = longOpenHashSet.toLongArray();
        Arrays.sort(longArray);
        return LongBigArrays.wrap(longArray);
    }

    @Override // it.unimi.dsi.lang.FlyweightPrototype
    public GraphBuilder copy() {
        return new GraphBuilder(this.storeIndexMask, this.repeatedSet, this.map, this.position);
    }

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
    }
}
