package gate.mimir.index;

import it.unimi.dsi.fastutil.longs.Long2ObjectLinkedOpenHashMap;
import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FilenameFilter;
import java.io.IOException;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.ObjectStreamClass;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Iterator;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipFile;
import java.util.zip.ZipOutputStream;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/mimir-core-6.2-SNAPSHOT.jar:gate/mimir/index/DocumentCollection.class */
public class DocumentCollection {
    protected static final int DOCUMENT_DATA_CACHE_SIZE = 100;
    protected List<CollectionFile> collectionFiles;
    protected CollectionFileWriter collectionFileWriter;
    private static Logger logger = LoggerFactory.getLogger((Class<?>) DocumentCollection.class);
    protected File indexDirectory;
    protected Long2ObjectLinkedOpenHashMap<DocumentData> documentCache;
    private volatile boolean closed = false;
    public static final long ZIP_FILE_MAX_SIZE = 2000000000;
    public static final int ZIP_FILE_MAX_ENTRIES = 250000;
    protected long nextDocumentId;

    /* loaded from: input_file:WEB-INF/lib/mimir-core-6.2-SNAPSHOT.jar:gate/mimir/index/DocumentCollection$CollectionFile.class */
    public static class CollectionFile implements Comparable<CollectionFile> {
        public static final String MIMIR_COLLECTION_BASENAME = "mimir-collection-";
        public static final String MIMIR_COLLECTION_EXTENSION = ".zip";
        protected static final Pattern MIMIR_COLLECTION_PATTERN = Pattern.compile("\\Qmimir-collection-\\E((\\d+)(?:-([-0-9a-zA-Z]+))?)\\Q.zip\\E");
        public static FilenameFilter FILENAME_FILTER = new FilenameFilter() { // from class: gate.mimir.index.DocumentCollection.CollectionFile.1
            @Override // java.io.FilenameFilter
            public boolean accept(File file, String str) {
                return CollectionFile.MIMIR_COLLECTION_PATTERN.matcher(str).matches();
            }
        };
        protected File file;
        protected ZipFile zipFile;
        protected long firstEntry;
        protected long lastEntry;
        protected int collectionFileNumber;
        protected long length;
        protected int documentCount;

        protected static String getCollectionFileId(String str) {
            Matcher matcher = MIMIR_COLLECTION_PATTERN.matcher(str);
            if (matcher.matches()) {
                return matcher.group(1);
            }
            return null;
        }

        protected static int getCollectionFileNumber(String str) {
            Matcher matcher = MIMIR_COLLECTION_PATTERN.matcher(str);
            if (matcher.matches()) {
                return Integer.parseInt(matcher.group(2));
            }
            return -1;
        }

        public static String getCollectionFileName(String str) {
            return MIMIR_COLLECTION_BASENAME + str + ".zip";
        }

        public CollectionFile(File file) throws ZipException, IOException {
            this.file = file;
            this.zipFile = new ZipFile(file);
            this.collectionFileNumber = getCollectionFileNumber(file.getName());
            Enumeration<? extends ZipEntry> entries = this.zipFile.entries();
            this.firstEntry = Long.MAX_VALUE;
            this.lastEntry = -1L;
            this.documentCount = 0;
            while (entries.hasMoreElements()) {
                String name = entries.nextElement().getName();
                try {
                    long parseLong = Long.parseLong(name);
                    if (parseLong > this.lastEntry) {
                        this.lastEntry = parseLong;
                    }
                    if (parseLong < this.firstEntry) {
                        this.firstEntry = parseLong;
                    }
                    this.documentCount++;
                } catch (NumberFormatException e) {
                    DocumentCollection.logger.warn("Unparseable zip entry name: " + name);
                }
            }
            if (this.firstEntry == Long.MAX_VALUE) {
                this.firstEntry = -1L;
            }
            this.length = file.length();
        }

        @Override // java.lang.Comparable
        public int compareTo(CollectionFile collectionFile) {
            return Long.compare(this.firstEntry, collectionFile.firstEntry);
        }

        public boolean containsDocument(long j) {
            return this.firstEntry <= j && j <= this.lastEntry && this.zipFile.getEntry(Long.toString(j)) != null;
        }

        public DocumentData getDocumentData(Long l) throws IOException {
            ZipEntry entry = this.zipFile.getEntry(Long.toString(l.longValue()));
            if (entry == null) {
                throw new NoSuchElementException("No entry found for document ID " + l);
            }
            CustomObjectInputStream customObjectInputStream = null;
            try {
                try {
                    customObjectInputStream = new CustomObjectInputStream(this.zipFile.getInputStream(entry));
                    DocumentData documentData = (DocumentData) customObjectInputStream.readObject();
                    if (customObjectInputStream != null) {
                        customObjectInputStream.close();
                    }
                    return documentData;
                } catch (ClassNotFoundException e) {
                    throw new IOException("Invalid data read from zip file!", e);
                }
            } catch (Throwable th) {
                if (customObjectInputStream != null) {
                    customObjectInputStream.close();
                }
                throw th;
            }
        }

        public void close() throws IOException {
            this.zipFile.close();
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:WEB-INF/lib/mimir-core-6.2-SNAPSHOT.jar:gate/mimir/index/DocumentCollection$CollectionFileWriter.class */
    public static class CollectionFileWriter {
        protected static final int INPUT_BUFFER_SIZE = 1000;
        protected Long2ObjectLinkedOpenHashMap<DocumentData> inputBuffer;
        protected ZipOutputStream zipOuputStream;
        protected File zipFile;
        protected int currentEntries;
        protected long currentLength;
        protected ByteArrayOutputStream byteArrayOS;

        public CollectionFileWriter(File file) throws IndexException {
            this.zipFile = file;
            if (this.zipFile.exists()) {
                throw new IndexException("Collection zip file (" + file.getAbsolutePath() + ") already exists!");
            }
            this.byteArrayOS = new ByteArrayOutputStream();
            try {
                this.zipOuputStream = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(this.zipFile)));
                this.currentEntries = 0;
                this.currentLength = 0L;
                this.inputBuffer = new Long2ObjectLinkedOpenHashMap<>();
            } catch (FileNotFoundException e) {
                throw new IndexException("Cannot write to collection zip file (" + this.zipFile.getAbsolutePath() + DefaultExpressionEngine.DEFAULT_INDEX_END, e);
            }
        }

        public boolean writeDocumentData(long j, DocumentData documentData) throws IOException {
            ObjectOutputStream objectOutputStream = new ObjectOutputStream(this.byteArrayOS);
            objectOutputStream.writeObject(documentData);
            objectOutputStream.close();
            if (this.currentLength + this.byteArrayOS.size() > DocumentCollection.ZIP_FILE_MAX_SIZE || this.currentEntries >= 250000 || this.inputBuffer.size() >= 1000) {
                return false;
            }
            ZipEntry zipEntry = new ZipEntry(Long.toString(j));
            this.zipOuputStream.putNextEntry(zipEntry);
            this.byteArrayOS.writeTo(this.zipOuputStream);
            this.zipOuputStream.closeEntry();
            this.currentLength += zipEntry.getCompressedSize();
            this.byteArrayOS.reset();
            this.currentEntries++;
            this.inputBuffer.put(j, (long) documentData);
            return true;
        }

        public void close() throws IOException {
            if (this.zipOuputStream != null) {
                this.zipOuputStream.close();
            }
            this.inputBuffer.clear();
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:WEB-INF/lib/mimir-core-6.2-SNAPSHOT.jar:gate/mimir/index/DocumentCollection$CustomObjectInputStream.class */
    public static class CustomObjectInputStream extends ObjectInputStream {
        public CustomObjectInputStream() throws IOException, SecurityException {
        }

        public CustomObjectInputStream(InputStream inputStream) throws IOException {
            super(inputStream);
        }

        @Override // java.io.ObjectInputStream
        protected Class<?> resolveClass(ObjectStreamClass objectStreamClass) throws IOException, ClassNotFoundException {
            if ("gate.mimir.index.mg4j.zipcollection.DocumentData".equals(objectStreamClass.getName())) {
                objectStreamClass = ObjectStreamClass.lookup(Class.forName("gate.mimir.index.DocumentData"));
            }
            return super.resolveClass(objectStreamClass);
        }
    }

    public DocumentCollection(File file) throws IOException {
        this.collectionFiles = null;
        this.indexDirectory = file;
        this.collectionFiles = new ArrayList();
        for (File file2 : file.listFiles(CollectionFile.FILENAME_FILTER)) {
            this.collectionFiles.add(new CollectionFile(file2));
        }
        Collections.sort(this.collectionFiles);
        for (int i = 0; i < this.collectionFiles.size() - 1; i++) {
            CollectionFile collectionFile = this.collectionFiles.get(i);
            CollectionFile collectionFile2 = this.collectionFiles.get(i + 1);
            if (collectionFile.lastEntry >= collectionFile2.firstEntry) {
                throw new IOException("Invalid entries distribution: collection file " + collectionFile2.zipFile.getName() + " contains an entry named \"" + collectionFile2.firstEntry + "\", but an entry with a larger-or-equal ID was already seen in a previous collection file!");
            }
        }
        this.documentCache = new Long2ObjectLinkedOpenHashMap<>();
        this.nextDocumentId = this.collectionFiles.isEmpty() ? 0L : this.collectionFiles.get(this.collectionFiles.size() - 1).lastEntry + 1;
    }

    public DocumentData getDocumentData(long j) throws IOException {
        DocumentData documentData;
        if (this.closed) {
            throw new IllegalStateException("This document collection has already been closed!");
        }
        if (!this.collectionFiles.isEmpty() && j <= this.collectionFiles.get(this.collectionFiles.size() - 1).lastEntry) {
            documentData = this.documentCache.getAndMoveToFirst(j);
            if (documentData == null) {
                synchronized (this.collectionFiles) {
                    Iterator<CollectionFile> it2 = this.collectionFiles.iterator();
                    while (true) {
                        if (!it2.hasNext()) {
                            break;
                        }
                        CollectionFile next = it2.next();
                        if (next.containsDocument(j)) {
                            documentData = next.getDocumentData(Long.valueOf(j));
                            this.documentCache.putAndMoveToFirst(j, documentData);
                            if (this.documentCache.size() > 100) {
                                this.documentCache.removeLast();
                            }
                        }
                    }
                }
            }
        } else {
            documentData = this.collectionFileWriter != null ? this.collectionFileWriter.inputBuffer.get(j) : null;
        }
        if (documentData == null) {
            throw new NoSuchElementException("No entry found for document ID " + j);
        }
        return documentData;
    }

    public void writeDocument(DocumentData documentData) throws IndexException {
        if (this.collectionFileWriter == null) {
            openCollectionWriter();
        }
        boolean z = false;
        while (!z) {
            try {
                try {
                    z = this.collectionFileWriter.writeDocumentData(this.nextDocumentId, documentData);
                    if (!z) {
                        this.collectionFileWriter.close();
                        synchronized (this.collectionFiles) {
                            this.collectionFiles.add(new CollectionFile(this.collectionFileWriter.zipFile));
                        }
                        openCollectionWriter();
                    }
                } catch (IOException e) {
                    throw new IndexException("Problem while accessing the collection file", e);
                }
            } finally {
                this.nextDocumentId++;
            }
        }
    }

    protected void openCollectionWriter() throws IndexException {
        int i;
        synchronized (this.collectionFiles) {
            i = this.collectionFiles.isEmpty() ? 0 : this.collectionFiles.get(this.collectionFiles.size() - 1).collectionFileNumber + 1;
        }
        this.collectionFileWriter = new CollectionFileWriter(new File(this.indexDirectory, CollectionFile.getCollectionFileName(Integer.toString(i))));
    }

    public void close() throws IOException {
        if (this.collectionFileWriter != null) {
            this.collectionFileWriter.close();
        }
        this.closed = true;
        if (this.collectionFiles != null) {
            Iterator<CollectionFile> it2 = this.collectionFiles.iterator();
            while (it2.hasNext()) {
                try {
                    it2.next().close();
                } catch (IOException e) {
                }
            }
            this.collectionFiles.clear();
            this.collectionFiles = null;
        }
        this.documentCache.clear();
    }

    public int getArchiveCount() {
        return this.collectionFiles.size();
    }

    public synchronized void compact() throws ZipException, IOException, IndexException {
        logger.debug("Starting collection compact.");
        CollectionFile[] collectionFileArr = (CollectionFile[]) this.collectionFiles.toArray(new CollectionFile[this.collectionFiles.size()]);
        int i = -1;
        int i2 = 0;
        int i3 = 0;
        long j = 0;
        for (int length = collectionFileArr.length - 1; length >= 0; length--) {
            boolean z = collectionFileArr[length].documentCount < 250000 && collectionFileArr[length].length < ZIP_FILE_MAX_SIZE;
            if (i >= 0) {
                boolean z2 = i3 + collectionFileArr[length].documentCount < 250000 && j + collectionFileArr[length].length < ZIP_FILE_MAX_SIZE;
                if (z2) {
                    i3 += collectionFileArr[length].documentCount;
                    j += collectionFileArr[length].length;
                    i2 = (i - length) + 1;
                }
                if (!z2 || length == 0) {
                    if (i2 > 1) {
                        int i4 = (i - i2) + 1;
                        File file = new File(this.indexDirectory, "temp-mimir-collection-" + collectionFileArr[i4].collectionFileNumber + "-" + collectionFileArr[i].collectionFileNumber + ".zip");
                        ZipOutputStream zipOutputStream = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(file)));
                        byte[] bArr = new byte[1048576];
                        for (int i5 = i4; i5 <= i; i5++) {
                            Enumeration<? extends ZipEntry> entries = collectionFileArr[i5].zipFile.entries();
                            while (entries.hasMoreElements()) {
                                ZipEntry nextElement = entries.nextElement();
                                zipOutputStream.putNextEntry(new ZipEntry(nextElement));
                                InputStream inputStream = collectionFileArr[i5].zipFile.getInputStream(nextElement);
                                int read = inputStream.read(bArr);
                                while (true) {
                                    int i6 = read;
                                    if (i6 >= 0) {
                                        zipOutputStream.write(bArr, 0, i6);
                                        read = inputStream.read(bArr);
                                    }
                                }
                                zipOutputStream.closeEntry();
                            }
                        }
                        zipOutputStream.close();
                        synchronized (collectionFileArr) {
                            for (int i7 = i4; i7 <= i; i7++) {
                                if (collectionFileArr[i7] != this.collectionFiles.get(i7)) {
                                    logger.warn("Collection files have changed since the compacting operation started. Compact aborted.Details: " + collectionFileArr[i7].file.getAbsolutePath() + " not the same as " + this.collectionFiles.get(i7).file.getAbsolutePath());
                                    file.delete();
                                    return;
                                }
                            }
                            File file2 = new File(this.indexDirectory, CollectionFile.getCollectionFileName(Integer.toString(collectionFileArr[i4].collectionFileNumber) + "-" + Integer.toString(collectionFileArr[i].collectionFileNumber)));
                            for (int i8 = i4; i8 <= i; i8++) {
                                CollectionFile remove = this.collectionFiles.remove(i4);
                                if (!remove.file.delete()) {
                                    throw new IndexException("Could not delete old collection file " + remove.file + "! Document collection now inconsistent.");
                                }
                            }
                            file.renameTo(file2);
                            this.collectionFiles.add(i4, new CollectionFile(file2));
                        }
                    }
                    i = -1;
                    i2 = 0;
                    i3 = 0;
                    j = 0;
                }
            } else if (z) {
                i = length;
                i2 = 1;
                i3 = collectionFileArr[length].documentCount;
                j = collectionFileArr[length].length;
            }
        }
    }
}
