package it.unimi.di.law.warc.io;

import it.unimi.di.law.warc.records.AbstractWarcRecord;
import it.unimi.di.law.warc.records.WarcHeader;
import it.unimi.di.law.warc.records.WarcRecord;
import it.unimi.di.law.warc.util.BoundSessionInputBuffer;
import java.io.IOException;
import java.io.InputStream;
import org.apache.http.Header;
import org.apache.http.HttpException;
import org.apache.http.ParseException;
import org.apache.http.ProtocolVersion;
import org.apache.http.impl.io.AbstractMessageParser;
import org.apache.http.impl.io.HttpTransportMetricsImpl;
import org.apache.http.impl.io.SessionInputBufferImpl;
import org.apache.http.io.SessionInputBuffer;
import org.apache.http.message.BasicLineParser;
import org.apache.http.message.HeaderGroup;
import org.apache.http.message.LineParser;
import org.apache.http.message.ParserCursor;
import org.apache.http.util.CharArrayBuffer;
import org.apache.pdfbox.contentstream.operator.OperatorName;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/bubing-0.9.11.jar:it/unimi/di/law/warc/io/AbstractWarcReader.class */
public abstract class AbstractWarcReader implements WarcReader {
    private static final boolean VERSION = Boolean.parseBoolean(System.getProperty("it.unimi.di.law.warc.io.version", "true"));
    private static final Logger LOGGER = LoggerFactory.getLogger((Class<?>) AbstractWarcReader.class);
    private static final int BUFFER_SIZE = 1024;
    private SessionInputBuffer buffer;
    private final CharArrayBuffer line = new CharArrayBuffer(1024);
    private final HttpTransportMetricsImpl metrics = new HttpTransportMetricsImpl();
    private final LineParser parser = new BasicLineParser(WarcRecord.PROTOCOL_VERSION);
    private BoundSessionInputBuffer payload = null;

    /* JADX INFO: Access modifiers changed from: protected */
    public void setInput(InputStream inputStream) {
        SessionInputBufferImpl sessionInputBufferImpl = new SessionInputBufferImpl(this.metrics, 1024, 0, null, null);
        sessionInputBufferImpl.bind(inputStream);
        this.buffer = sessionInputBufferImpl;
        this.payload = null;
    }

    private ProtocolVersion parseHead() throws IOException {
        this.line.clear();
        int readLine = this.buffer.readLine(this.line);
        if (LOGGER.isTraceEnabled()) {
            LOGGER.trace("Protocol header '{}'.", new String(this.line.toCharArray()));
        }
        if (readLine == -1) {
            return null;
        }
        try {
            return this.parser.parseProtocolVersion(this.line, new ParserCursor(0, this.line.length()));
        } catch (ParseException e) {
            throw new WarcFormatException("Can't parse WARC version header.", e);
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public WarcRecord read(boolean z) throws IOException, WarcFormatException {
        if (z && this.payload != null) {
            this.payload.consume();
            this.payload = null;
            this.line.clear();
            this.buffer.readLine(this.line);
            this.buffer.readLine(this.line);
            if (this.line.length() != 0) {
                throw new WarcFormatException("Missing CRLFs at WARC record end, got \"" + ((Object) this.line) + OperatorName.SHOW_TEXT_LINE_AND_SPACE);
            }
            this.line.clear();
        }
        ProtocolVersion parseHead = parseHead();
        if (parseHead == null) {
            return null;
        }
        if (VERSION && (parseHead.getMajor() != 1 || parseHead.getMinor() != 0)) {
            throw new IllegalArgumentException("Unsupported WARC version " + parseHead);
        }
        HeaderGroup headerGroup = new HeaderGroup();
        try {
            headerGroup.setHeaders(AbstractMessageParser.parseHeaders(this.buffer, -1, -1, null));
            Header firstHeader = WarcHeader.getFirstHeader(headerGroup, WarcHeader.Name.CONTENT_LENGTH);
            if (firstHeader == null) {
                throw new WarcFormatException("Missing 'Content-Length' WARC header");
            }
            try {
                this.payload = new BoundSessionInputBuffer(this.buffer, Long.parseLong(firstHeader.getValue()));
                return AbstractWarcRecord.fromPayload(headerGroup, this.payload);
            } catch (NumberFormatException e) {
                throw new WarcFormatException("Can't parse 'Content-Length' WARC header (is \"" + firstHeader.getValue() + "\")", e);
            }
        } catch (HttpException e2) {
            throw new WarcFormatException("Can't parse WARC headers", e2);
        }
    }
}
