package it.unimi.di.law.warc.tool;

import cern.colt.Arrays;
import it.unimi.di.big.mg4j.index.DiskBasedIndex;
import it.unimi.di.law.bubing.util.BURL;
import it.unimi.di.law.warc.io.CompressedWarcCachingReader;
import it.unimi.di.law.warc.io.WarcCachingReader;
import it.unimi.di.law.warc.records.HttpResponseWarcRecord;
import it.unimi.di.law.warc.records.WarcRecord;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.io.FastBufferedInputStream;
import it.unimi.dsi.fastutil.longs.LongBigList;
import it.unimi.dsi.fastutil.objects.Object2LongFunction;
import it.unimi.dsi.lang.ObjectParser;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.ServerSocket;
import java.net.Socket;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.UnknownHostException;
import java.util.Locale;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.RejectedExecutionException;
import org.apache.http.ConnectionClosedException;
import org.apache.http.Header;
import org.apache.http.HttpConnectionFactory;
import org.apache.http.HttpException;
import org.apache.http.HttpHost;
import org.apache.http.HttpRequest;
import org.apache.http.HttpResponse;
import org.apache.http.HttpServerConnection;
import org.apache.http.MethodNotSupportedException;
import org.apache.http.ProtocolVersion;
import org.apache.http.entity.ContentType;
import org.apache.http.entity.StringEntity;
import org.apache.http.impl.DefaultBHttpClientConnection;
import org.apache.http.impl.DefaultBHttpClientConnectionFactory;
import org.apache.http.impl.DefaultBHttpServerConnection;
import org.apache.http.impl.DefaultBHttpServerConnectionFactory;
import org.apache.http.message.BasicHttpRequest;
import org.apache.http.message.BasicStatusLine;
import org.apache.http.protocol.BasicHttpContext;
import org.apache.http.protocol.HttpContext;
import org.apache.http.protocol.HttpCoreContext;
import org.apache.http.protocol.HttpProcessor;
import org.apache.http.protocol.HttpProcessorBuilder;
import org.apache.http.protocol.HttpRequestExecutor;
import org.apache.http.protocol.HttpRequestHandler;
import org.apache.http.protocol.HttpService;
import org.apache.http.protocol.RequestConnControl;
import org.apache.http.protocol.RequestContent;
import org.apache.http.protocol.RequestExpectContinue;
import org.apache.http.protocol.RequestTargetHost;
import org.apache.http.protocol.RequestUserAgent;
import org.apache.http.protocol.UriHttpRequestHandlerMapper;
import org.apache.log4j.varia.ExternallyRolledFileAppender;
import org.apache.pdfbox.contentstream.operator.OperatorName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/warc/tool/WarcHttpServer.class */
public class WarcHttpServer {
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) WarcHttpServer.class);
    private static final ProtocolVersion PROTOCOL_VERSION = new ProtocolVersion("HTTP", 1, 1);

    /* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/warc/tool/WarcHttpServer$ExternalFetcher.class */
    public static class ExternalFetcher implements HttpResponseFetcher {
        private final HttpProcessor httpProcessor = HttpProcessorBuilder.create().add(new RequestContent()).add(new RequestTargetHost()).add(new RequestConnControl()).add(new RequestUserAgent("Test/1.1")).add(new RequestExpectContinue(true)).build();
        private final HttpRequestExecutor httpexEcutor = new HttpRequestExecutor();
        private final DefaultBHttpClientConnectionFactory connFactory = new DefaultBHttpClientConnectionFactory();

        @Override // it.unimi.di.law.warc.tool.WarcHttpServer.HttpResponseFetcher
        public boolean fetch(URI uri, HttpResponse httpResponse) throws UnknownHostException, IOException, HttpException {
            String host = uri.getHost();
            int port = uri.getPort();
            HttpCoreContext create = HttpCoreContext.create();
            create.setTargetHost(new HttpHost(host, port));
            DefaultBHttpClientConnection createConnection = this.connFactory.createConnection(new Socket(host, port == -1 ? 80 : port));
            String str = uri.getPath() + (uri.getQuery() != null ? "?" + uri.getQuery() : "");
            BasicHttpRequest basicHttpRequest = new BasicHttpRequest("GET", str);
            this.httpexEcutor.preProcess(basicHttpRequest, this.httpProcessor, create);
            HttpResponse execute = this.httpexEcutor.execute(basicHttpRequest, createConnection, create);
            this.httpexEcutor.postProcess(execute, this.httpProcessor, create);
            if (execute.getStatusLine().getStatusCode() == 404) {
                WarcHttpServer.LOGGER.info(toString() + " got '" + execute.getStatusLine() + "' for '" + str + "' at '" + uri.getHost() + OperatorName.SHOW_TEXT_LINE);
                return false;
            }
            WarcHttpServer.LOGGER.info(toString() + " fetched " + uri);
            httpResponse.setStatusLine(execute.getStatusLine());
            httpResponse.setHeaders(execute.getAllHeaders());
            httpResponse.setEntity(execute.getEntity());
            return true;
        }

        public String toString() {
            return ExternalFetcher.class.getSimpleName();
        }
    }

    /* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/warc/tool/WarcHttpServer$HttpResponseFetcher.class */
    public interface HttpResponseFetcher {
        boolean fetch(URI uri, HttpResponse httpResponse) throws IOException, HttpException;
    }

    /* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/warc/tool/WarcHttpServer$RequestListener.class */
    public static class RequestListener {
        private final ExecutorService exec = Executors.newCachedThreadPool();
        private final HttpConnectionFactory<DefaultBHttpServerConnection> connFactory;
        private final ServerSocket serverSocket;
        private final HttpService httpService;

        public RequestListener(int i, HttpResponseFetcher[] httpResponseFetcherArr) throws IOException {
            HttpProcessor build = HttpProcessorBuilder.create().build();
            UriHttpRequestHandlerMapper uriHttpRequestHandlerMapper = new UriHttpRequestHandlerMapper();
            uriHttpRequestHandlerMapper.register("*", new SequentialFetchersRequestHandler(httpResponseFetcherArr));
            this.connFactory = DefaultBHttpServerConnectionFactory.INSTANCE;
            this.serverSocket = new ServerSocket(i);
            this.httpService = new HttpService(build, uriHttpRequestHandlerMapper);
        }

        public void stop() throws IOException {
            this.exec.shutdown();
            this.serverSocket.close();
        }

        public void start() throws IOException {
            while (!this.exec.isShutdown()) {
                try {
                    final Socket accept = this.serverSocket.accept();
                    this.exec.execute(new Runnable() { // from class: it.unimi.di.law.warc.tool.WarcHttpServer.RequestListener.1
                        @Override // java.lang.Runnable
                        public void run() {
                            try {
                                WarcHttpServer.LOGGER.info("incoming connection from " + accept.getInetAddress());
                                HttpServerConnection httpServerConnection = (HttpServerConnection) RequestListener.this.connFactory.createConnection(accept);
                                BasicHttpContext basicHttpContext = new BasicHttpContext(null);
                                while (!RequestListener.this.exec.isShutdown() && httpServerConnection.isOpen()) {
                                    try {
                                        try {
                                            RequestListener.this.httpService.handleRequest(httpServerConnection, basicHttpContext);
                                        } finally {
                                            try {
                                                httpServerConnection.shutdown();
                                            } catch (IOException e) {
                                            }
                                        }
                                    } catch (ConnectionClosedException e2) {
                                        WarcHttpServer.LOGGER.info("client closed connection");
                                        try {
                                            httpServerConnection.shutdown();
                                        } catch (IOException e3) {
                                        }
                                    } catch (IOException e4) {
                                        WarcHttpServer.LOGGER.error("I/O error", (Throwable) e4);
                                        try {
                                            httpServerConnection.shutdown();
                                        } catch (IOException e5) {
                                        }
                                    } catch (HttpException e6) {
                                        WarcHttpServer.LOGGER.error("unrecoverable HTTP protocol violation", (Throwable) e6);
                                        try {
                                            httpServerConnection.shutdown();
                                        } catch (IOException e7) {
                                        }
                                    }
                                }
                            } catch (IOException e8) {
                                WarcHttpServer.LOGGER.error("exception while handling request", (Throwable) e8);
                            }
                        }
                    });
                } catch (RejectedExecutionException e) {
                    if (!this.exec.isShutdown()) {
                        WarcHttpServer.LOGGER.error("task submission rejected", (Throwable) e);
                    }
                }
            }
        }
    }

    /* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/warc/tool/WarcHttpServer$ReverseProxyFetcher.class */
    public static class ReverseProxyFetcher implements HttpResponseFetcher {
        private final HttpHost target;
        private final HttpProcessor httpProcessor = HttpProcessorBuilder.create().add(new RequestContent()).add(new RequestTargetHost()).add(new RequestConnControl()).add(new RequestUserAgent("Test/1.1")).add(new RequestExpectContinue(true)).build();
        private final HttpRequestExecutor httpexEcutor = new HttpRequestExecutor();
        private final DefaultBHttpClientConnectionFactory connFactory = new DefaultBHttpClientConnectionFactory();

        public ReverseProxyFetcher(String str, String str2) {
            this.target = new HttpHost(str, Integer.parseInt(str2));
        }

        @Override // it.unimi.di.law.warc.tool.WarcHttpServer.HttpResponseFetcher
        public boolean fetch(URI uri, HttpResponse httpResponse) throws UnknownHostException, IOException, HttpException {
            HttpCoreContext create = HttpCoreContext.create();
            create.setTargetHost(this.target);
            DefaultBHttpClientConnection createConnection = this.connFactory.createConnection(new Socket(this.target.getHostName(), this.target.getPort()));
            BasicHttpRequest basicHttpRequest = new BasicHttpRequest("GET", uri.toString());
            this.httpexEcutor.preProcess(basicHttpRequest, this.httpProcessor, create);
            HttpResponse execute = this.httpexEcutor.execute(basicHttpRequest, createConnection, create);
            this.httpexEcutor.postProcess(execute, this.httpProcessor, create);
            if (execute.getStatusLine().getStatusCode() == 404) {
                return false;
            }
            WarcHttpServer.LOGGER.info(toString() + " fetched " + uri);
            httpResponse.setStatusLine(execute.getStatusLine());
            httpResponse.setHeaders(execute.getAllHeaders());
            httpResponse.setEntity(execute.getEntity());
            return true;
        }

        public String toString() {
            return String.format("%s(%s,%d)", ReverseProxyFetcher.class.getSimpleName(), this.target.getHostName(), Integer.valueOf(this.target.getPort()));
        }
    }

    /* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/warc/tool/WarcHttpServer$SequentialFetchersRequestHandler.class */
    static class SequentialFetchersRequestHandler implements HttpRequestHandler {
        private final HttpResponseFetcher[] fetchers;

        public SequentialFetchersRequestHandler(HttpResponseFetcher[] httpResponseFetcherArr) {
            this.fetchers = httpResponseFetcherArr;
        }

        @Override // org.apache.http.protocol.HttpRequestHandler
        public void handle(HttpRequest httpRequest, HttpResponse httpResponse, HttpContext httpContext) throws HttpException, IOException {
            String upperCase = httpRequest.getRequestLine().getMethod().toUpperCase(Locale.ENGLISH);
            try {
                URI uri = new URI(httpRequest.getRequestLine().getUri());
                if (!upperCase.equals("GET") && !upperCase.equals("HEAD") && !upperCase.equals("POST")) {
                    WarcHttpServer.LOGGER.warn(upperCase + " method not supported for " + uri);
                    throw new MethodNotSupportedException(upperCase + " method not supported");
                }
                int i = 0;
                while (true) {
                    if (i >= this.fetchers.length) {
                        break;
                    }
                    if (this.fetchers[i].fetch(uri, httpResponse)) {
                        WarcHttpServer.LOGGER.info("Served " + uri);
                        break;
                    }
                    i++;
                }
                if (i == this.fetchers.length) {
                    httpResponse.setStatusCode(404);
                    httpResponse.setEntity(new StringEntity("<html><body><h1>Record with URI " + uri + " not found</h1></body></html>", ContentType.create("text/html", "UTF-8")));
                    WarcHttpServer.LOGGER.warn("Not found " + uri);
                }
            } catch (URISyntaxException e) {
                throw new HttpException("Malformed URI", e);
            }
        }
    }

    /* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/warc/tool/WarcHttpServer$WarcFetcher.class */
    public static class WarcFetcher implements HttpResponseFetcher {
        private final String prefix;
        private final boolean maskRedirects;
        private final Object2LongFunction<CharSequence> map;
        private final LongBigList index;
        private final WarcCachingReader reader;

        public WarcFetcher(String str, boolean z) throws ClassNotFoundException, IOException {
            this.prefix = str;
            this.maskRedirects = z;
            this.map = (Object2LongFunction) BinIO.loadObject(str + ".mph");
            this.index = (LongBigList) BinIO.loadObject(str + DiskBasedIndex.INDEX_EXTENSION);
            this.reader = new CompressedWarcCachingReader(new FastBufferedInputStream(new FileInputStream(new File(str + ".warc.gz"))));
        }

        public WarcFetcher(String str, String str2) throws ClassNotFoundException, IOException {
            this(str, Boolean.parseBoolean(str2));
        }

        public WarcFetcher(String str) throws ClassNotFoundException, IOException {
            this(str, false);
        }

        @Override // it.unimi.di.law.warc.tool.WarcHttpServer.HttpResponseFetcher
        public boolean fetch(URI uri, HttpResponse httpResponse) throws IOException, HttpException {
            long j = this.map.getLong(BURL.parse(uri.toString()).toString());
            if (j == -1) {
                return false;
            }
            this.reader.position(this.index.getLong(j));
            WarcRecord read = this.reader.cache().read();
            if (!(read instanceof HttpResponseWarcRecord)) {
                return false;
            }
            HttpResponseWarcRecord httpResponseWarcRecord = (HttpResponseWarcRecord) read;
            WarcHttpServer.LOGGER.info(toString() + " fetched " + uri);
            String str = null;
            if (httpResponseWarcRecord.getStatusLine().getStatusCode() / 100 == 3 && this.maskRedirects) {
                httpResponse.setStatusLine(new BasicStatusLine(WarcHttpServer.PROTOCOL_VERSION, 200, ExternallyRolledFileAppender.OK));
                for (Header header : httpResponseWarcRecord.getAllHeaders()) {
                    if (header.getName().equalsIgnoreCase("location")) {
                        str = header.getValue();
                    } else {
                        httpResponse.addHeader(header);
                    }
                }
                WarcHttpServer.LOGGER.info(toString() + " masked redirect to " + str);
            } else {
                httpResponse.setStatusLine(httpResponseWarcRecord.getStatusLine());
                httpResponse.setHeaders(httpResponseWarcRecord.getAllHeaders());
            }
            for (Header header2 : httpResponseWarcRecord.getWarcHeaders().getAllHeaders()) {
                httpResponse.addHeader("X-WarcHttpServer-" + header2.getName(), header2.getValue());
            }
            if (str != null) {
                httpResponse.addHeader("X-WarcHttpServer-Location", str);
            }
            httpResponse.setEntity(httpResponseWarcRecord.getEntity());
            return true;
        }

        public String toString() {
            return String.format("%s(%s)", WarcFetcher.class.getSimpleName(), this.prefix);
        }
    }

    public static void main(String[] strArr) throws IOException {
        if (strArr.length < 2 || (strArr.length == 1 && (strArr[0].equals("--help") || strArr[0].equals("-h")))) {
            System.err.println("You must specify a port numeber followed by a list (of at least one) 'ObjectParser.fromSpec' parseable 'HttpResponseFetcher' objects.");
            System.exit(1);
        }
        String[] strArr2 = {WarcHttpServer.class.getPackage().getName()};
        String simpleName = WarcHttpServer.class.getSimpleName();
        int parseInt = Integer.parseInt(strArr[0]);
        HttpResponseFetcher[] httpResponseFetcherArr = new HttpResponseFetcher[strArr.length - 1];
        for (int i = 1; i < strArr.length; i++) {
            try {
                httpResponseFetcherArr[i - 1] = (HttpResponseFetcher) ObjectParser.fromSpec(simpleName + "$" + strArr[i], HttpResponseFetcher.class, strArr2);
            } catch (Exception e) {
                throw new RuntimeException("Impossible to instantiate '" + strArr[i] + "' fetcher.", e);
            }
        }
        RequestListener requestListener = new RequestListener(parseInt, httpResponseFetcherArr);
        LOGGER.info("serving on port " + parseInt + " using fetchers: " + Arrays.toString(httpResponseFetcherArr));
        requestListener.start();
    }
}
