package it.unimi.di.law.warc.processors;

import it.unimi.di.law.warc.processors.ParallelFilteredProcessorRunner;
import it.unimi.di.law.warc.records.HttpResponseWarcRecord;
import it.unimi.di.law.warc.records.WarcHeader;
import it.unimi.di.law.warc.records.WarcRecord;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.util.List;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;

/* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/warc/processors/PageExtractorProcessorFromListOfHosts.class */
public class PageExtractorProcessorFromListOfHosts implements ParallelFilteredProcessorRunner.Processor<String> {
    public static String hostFile = "targethosts.txt";
    public static String[] targetHosts;

    @Override // java.io.Closeable, java.lang.AutoCloseable
    public void close() throws IOException {
    }

    @Override // it.unimi.dsi.lang.FlyweightPrototype
    public ParallelFilteredProcessorRunner.Processor<String> copy() {
        return new PageExtractorProcessorFromListOfHosts();
    }

    private boolean hostEqualToOneOfTargetHosts(String str) {
        for (int i = 0; i < targetHosts.length; i++) {
            if (str.equals(targetHosts[i])) {
                return true;
            }
        }
        return false;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // it.unimi.di.law.warc.processors.ParallelFilteredProcessorRunner.Processor
    public String process(WarcRecord warcRecord, long j) {
        URI warcTargetURI = warcRecord.getWarcTargetURI();
        if (!hostEqualToOneOfTargetHosts(warcTargetURI.getHost())) {
            return "";
        }
        String str = "";
        try {
            str = IOUtils.toString(((HttpResponseWarcRecord) warcRecord).getEntity().getContent());
        } catch (Exception e) {
            e.printStackTrace();
        }
        return "targetURI: " + warcTargetURI + "\nisDuplicate: " + (warcRecord.getWarcHeader(WarcHeader.Name.BUBING_IS_DUPLICATE) != null) + "\n" + warcRecord.toString() + "\nCONTENT:\n" + str + "\n";
    }

    static {
        try {
            List<String> readLines = FileUtils.readLines(new File(hostFile));
            targetHosts = new String[readLines.size()];
            readLines.toArray(targetHosts);
        } catch (IOException e) {
            System.err.println("ERROR while reading host file " + hostFile);
            e.printStackTrace();
        }
    }
}
