package gate.mimir.index;

import com.google.common.io.PatternFilenameFilter;
import gate.Annotation;
import gate.mimir.MimirIndex;
import gate.util.Files;
import it.unimi.di.big.mg4j.index.CompressionFlags;
import it.unimi.di.big.mg4j.index.DiskBasedIndex;
import it.unimi.di.big.mg4j.index.Index;
import it.unimi.di.big.mg4j.index.IndexIterator;
import it.unimi.di.big.mg4j.index.IndexReader;
import it.unimi.di.big.mg4j.index.IndexWriter;
import it.unimi.di.big.mg4j.index.NullTermProcessor;
import it.unimi.di.big.mg4j.index.QuasiSuccinctIndexWriter;
import it.unimi.di.big.mg4j.index.TermProcessor;
import it.unimi.di.big.mg4j.index.cluster.ContiguousDocumentalStrategy;
import it.unimi.di.big.mg4j.index.cluster.ContiguousLexicalStrategy;
import it.unimi.di.big.mg4j.index.cluster.DocumentalConcatenatedCluster;
import it.unimi.di.big.mg4j.index.cluster.IndexCluster;
import it.unimi.di.big.mg4j.index.cluster.LexicalCluster;
import it.unimi.di.big.mg4j.io.IOFactory;
import it.unimi.di.big.mg4j.tool.Combine;
import it.unimi.di.big.mg4j.tool.Concatenate;
import it.unimi.di.big.mg4j.tool.Scan;
import it.unimi.dsi.big.io.FileLinesCollection;
import it.unimi.dsi.big.util.ShiftAddXorSignedStringMap;
import it.unimi.dsi.bits.Fast;
import it.unimi.dsi.bits.TransformationStrategies;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.fastutil.Swapper;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntBigArrayBigList;
import it.unimi.dsi.fastutil.ints.IntComparator;
import it.unimi.dsi.fastutil.ints.IntList;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.io.FastBufferedOutputStream;
import it.unimi.dsi.fastutil.objects.Object2LongAVLTreeMap;
import it.unimi.dsi.fastutil.objects.Object2LongMap;
import it.unimi.dsi.fastutil.objects.Object2ReferenceOpenHashMap;
import it.unimi.dsi.fastutil.objects.ObjectBigArrayBigList;
import it.unimi.dsi.fastutil.objects.ObjectBigList;
import it.unimi.dsi.fastutil.objects.ObjectBigListIterator;
import it.unimi.dsi.fastutil.objects.ObjectIterator;
import it.unimi.dsi.io.OutputBitStream;
import it.unimi.dsi.lang.MutableString;
import it.unimi.dsi.lang.ObjectParser;
import it.unimi.dsi.sux4j.mph.LcpMonotoneMinimalPerfectHashFunction;
import it.unimi.dsi.util.BloomFilter;
import it.unimi.dsi.util.Properties;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStreamWriter;
import java.io.PrintStream;
import java.io.PrintWriter;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.Callable;
import java.util.concurrent.Future;
import java.util.concurrent.FutureTask;
import java.util.concurrent.RunnableFuture;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.commons.configuration.tree.DefaultExpressionEngine;
import org.apache.commons.math3.geometry.VectorFormat;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:WEB-INF/lib/mimir-core-6.2-SNAPSHOT.jar:gate/mimir/index/AtomicIndex.class */
public abstract class AtomicIndex implements Runnable {
    public static final String HEAD_FILE_NAME = "head";
    public static final String HEAD_NEW_EXT = ".new";
    public static final String HEAD_OLD_EXT = ".old";
    public static final String TAIL_FILE_NAME_PREFIX = "tail-";
    public static final String DIRECT_TERMS_FILENAME = "direct.terms";
    public static final String DIRECT_INDEX_NAME_SUFFIX = "-dir";
    public static final String DOCUMENTS_QUEUE_FILE_NAME = "queued-documents";
    private static final int INITIAL_TERM_MAP_SIZE = 1024;
    protected String name;
    protected File indexDirectory;
    protected TermProcessor termProcessor;
    protected MimirIndex parent;
    protected List<MG4JIndex> batches;
    protected Index invertedIndex;
    protected Index directIndex;
    protected Properties additionalDirectProperties;
    protected boolean hasDirectIndex;
    protected Object2LongMap<String> directTermIds;
    protected ObjectBigList<String> directTerms;
    protected Thread indexingThread;
    protected BlockingQueue<GATEDocument> inputQueue;
    protected BlockingQueue<GATEDocument> outputQueue;
    protected int tokenPosition;
    protected int documentsInRAM;
    protected Object2ReferenceOpenHashMap<MutableString, PostingsList> termMap;
    protected IntArrayList documentSizesInRAM;
    protected RunnableFuture<Void> compactIndexTask;
    protected RunnableFuture<Long> batchWriteTask;
    private static final Callable<Void> noOpVoid = new Callable<Void>() { // from class: gate.mimir.index.AtomicIndex.1
        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.concurrent.Callable
        public Void call() throws Exception {
            return null;
        }
    };
    private static final GATEDocument DUMP_BATCH = new GATEDocument() { // from class: gate.mimir.index.AtomicIndex.2
    };
    private static final GATEDocument COMPACT_INDEX = new GATEDocument() { // from class: gate.mimir.index.AtomicIndex.3
    };
    private static Logger logger = LoggerFactory.getLogger((Class<?>) AtomicIndex.class);
    protected static final PatternFilenameFilter TAILS_FILENAME_FILTER = new PatternFilenameFilter("\\Qtail-\\E\\d+");
    protected int maxDocSizeInRAM = -1;
    protected long occurrencesInRAM = 0;
    protected MutableString currentTerm = new MutableString();
    protected Properties additionalProperties = new Properties();

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:WEB-INF/lib/mimir-core-6.2-SNAPSHOT.jar:gate/mimir/index/AtomicIndex$MG4JIndex.class */
    public static class MG4JIndex {
        protected File indexDir;
        protected Index invertedIndex;
        protected Index directIndex;
        protected BloomFilter<Void> invertedTermFilter;
        protected BloomFilter<Void> directTermFilter;
        protected String indexName;

        public MG4JIndex(File file, String str, Index index, BloomFilter<Void> bloomFilter, Index index2, BloomFilter<Void> bloomFilter2) {
            this.indexDir = file;
            this.indexName = str;
            this.invertedIndex = index;
            this.invertedTermFilter = bloomFilter;
            this.directIndex = index2;
            this.directTermFilter = bloomFilter2;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    /* loaded from: input_file:WEB-INF/lib/mimir-core-6.2-SNAPSHOT.jar:gate/mimir/index/AtomicIndex$PostingsList.class */
    public static class PostingsList {
        private long firstDocumentPointer;
        private IntArrayList positions;
        private long lastDocumentPointer = -1;
        private int lastPosition = -1;
        private int count = 0;
        private int maxCount = 0;
        private long frequency = 0;
        private long occurrences = 0;
        private long sumMaxPos = 0;
        private IntList documentPointersDifferential = new IntArrayList();
        private IntList counts = new IntArrayList();

        public PostingsList(boolean z) {
            this.firstDocumentPointer = -1L;
            this.firstDocumentPointer = -1L;
            if (z) {
                this.positions = new IntArrayList();
            }
        }

        public void newDocumentPointer(long j) {
            if (j != this.lastDocumentPointer) {
                if (this.firstDocumentPointer < 0) {
                    this.firstDocumentPointer = j;
                }
                if (this.lastDocumentPointer == -1) {
                    this.documentPointersDifferential.add(0);
                } else {
                    flush();
                    this.documentPointersDifferential.add((int) (j - this.lastDocumentPointer));
                }
                this.lastDocumentPointer = j;
                this.lastPosition = -1;
                this.frequency++;
            }
        }

        public void addPosition(int i) {
            if (i != this.lastPosition) {
                this.positions.add(i);
                this.count++;
                this.lastPosition = i;
            }
        }

        public void setCount(int i) {
            this.count = i;
        }

        public boolean checkPosition(int i) {
            return i > this.lastPosition;
        }

        public void flush() {
            if (this.count > 0) {
                this.counts.add(this.count);
                if (this.count > this.maxCount) {
                    this.maxCount = this.count;
                }
                this.sumMaxPos += this.lastPosition;
                this.occurrences += this.count;
            }
            this.count = 0;
        }

        public void clear() {
            this.documentPointersDifferential.clear();
            this.count = 0;
            this.counts.clear();
            this.maxCount = 0;
            this.occurrences = 0L;
            if (this.positions != null) {
                this.positions.clear();
                this.lastPosition = -1;
                this.sumMaxPos = 0L;
            }
            this.firstDocumentPointer = -1L;
            this.lastDocumentPointer = -1L;
            this.frequency = 0L;
        }

        public void write(IndexWriter indexWriter) throws IOException {
            flush();
            if (indexWriter instanceof QuasiSuccinctIndexWriter) {
                ((QuasiSuccinctIndexWriter) indexWriter).newInvertedList(this.frequency, this.occurrences, this.positions != null ? this.sumMaxPos : 0L);
            } else {
                indexWriter.newInvertedList();
            }
            indexWriter.writeFrequency(this.frequency);
            long j = this.firstDocumentPointer;
            int i = 0;
            for (int i2 = 0; i2 < this.documentPointersDifferential.size(); i2++) {
                j += this.documentPointersDifferential.get(i2).intValue();
                int intValue = this.counts.get(i2).intValue();
                OutputBitStream newDocumentRecord = indexWriter.newDocumentRecord();
                indexWriter.writeDocumentPointer(newDocumentRecord, j);
                indexWriter.writePositionCount(newDocumentRecord, intValue);
                if (this.positions != null) {
                    indexWriter.writeDocumentPositions(newDocumentRecord, this.positions.elements(), i, intValue, -1);
                    i += intValue;
                }
            }
        }

        public String toString() {
            StringBuilder sb = new StringBuilder();
            long j = this.firstDocumentPointer;
            boolean z = true;
            for (int i = 0; i < this.documentPointersDifferential.size(); i++) {
                j += this.documentPointersDifferential.get(i).intValue();
                int i2 = this.counts.getInt(i);
                if (z) {
                    z = false;
                } else {
                    sb.append(VectorFormat.DEFAULT_SEPARATOR);
                }
                sb.append(j).append(DefaultExpressionEngine.DEFAULT_INDEX_START);
                boolean z2 = true;
                for (int i3 = 0; i3 < 0 + i2; i3++) {
                    if (z2) {
                        z2 = false;
                    } else {
                        sb.append(", ");
                    }
                    sb.append(this.positions.getInt(i3));
                }
                sb.append(") ");
            }
            return sb.toString();
        }
    }

    public static void generateTermMap(File file, File file2, File file3) throws IOException {
        FileLinesCollection fileLinesCollection = new FileLinesCollection(file.getAbsolutePath(), "UTF-8");
        if (file2 != null) {
            BinIO.storeObject(new ShiftAddXorSignedStringMap(fileLinesCollection.iterator(), new LcpMonotoneMinimalPerfectHashFunction.Builder().keys(fileLinesCollection).transform(TransformationStrategies.prefixFreeUtf16()).build()), file2);
        }
        if (file3 != null) {
            BloomFilter<Void> create = BloomFilter.create(fileLinesCollection.size64());
            FileLinesCollection.FileLinesIterator it2 = fileLinesCollection.iterator();
            while (it2.hasNext()) {
                create.add(it2.next());
            }
            BinIO.storeObject(create, file3);
        }
    }

    protected static final Index openInvertedIndexCluster(List<MG4JIndex> list, TermProcessor termProcessor) {
        if (list == null || list.size() == 0) {
            return null;
        }
        if (list.size() == 1) {
            return list.get(0).invertedIndex;
        }
        Index[] indexArr = new Index[list.size()];
        long[] jArr = new long[indexArr.length + 1];
        jArr[0] = 0;
        int i = -1;
        int i2 = -1;
        long j = -1;
        long j2 = -1;
        int i3 = -1;
        int i4 = 0;
        IntBigArrayBigList intBigArrayBigList = new IntBigArrayBigList();
        BloomFilter[] bloomFilterArr = new BloomFilter[indexArr.length];
        for (MG4JIndex mG4JIndex : list) {
            indexArr[i4] = mG4JIndex.invertedIndex;
            jArr[i4 + 1] = jArr[i4] + mG4JIndex.invertedIndex.numberOfDocuments;
            i = (int) (i + mG4JIndex.invertedIndex.numberOfTerms);
            i2 = (int) (i2 + mG4JIndex.invertedIndex.numberOfDocuments);
            j += mG4JIndex.invertedIndex.numberOfPostings;
            j2 += mG4JIndex.invertedIndex.numberOfOccurrences;
            if (i3 < mG4JIndex.invertedIndex.maxCount) {
                i3 = mG4JIndex.invertedIndex.maxCount;
            }
            bloomFilterArr[i4] = mG4JIndex.invertedTermFilter;
            intBigArrayBigList.addAll(mG4JIndex.invertedIndex.sizes);
            i4++;
        }
        return new DocumentalConcatenatedCluster(indexArr, new ContiguousDocumentalStrategy(jArr), false, bloomFilterArr, i2 == -1 ? -1 : i2 + 1, i == -1 ? -1 : i + 1, j == -1 ? -1L : j + 1, j2 == -1 ? -1L : j2 + 1, i3, null, true, true, termProcessor, null, intBigArrayBigList, null);
    }

    protected static final Index openDirectIndexCluster(List<MG4JIndex> list) {
        if (list == null || list.size() == 0) {
            return null;
        }
        if (list.size() == 1) {
            return list.get(0).directIndex;
        }
        Index[] indexArr = new Index[list.size()];
        int[] iArr = new int[indexArr.length + 1];
        iArr[0] = 0;
        String[] strArr = new String[indexArr.length + 1];
        strArr[0] = longToTerm(0L);
        int i = -1;
        int i2 = -1;
        long j = -1;
        long j2 = -1;
        int i3 = -1;
        int i4 = 0;
        BloomFilter[] bloomFilterArr = new BloomFilter[indexArr.length];
        for (MG4JIndex mG4JIndex : list) {
            indexArr[i4] = mG4JIndex.directIndex;
            if (i4 < iArr.length - 1) {
                iArr[i4 + 1] = iArr[i4] + ((int) mG4JIndex.invertedIndex.numberOfDocuments);
                strArr[i4 + 1] = longToTerm(iArr[i4 + 1]);
            }
            i = (int) (i + mG4JIndex.directIndex.numberOfTerms);
            i2 = (int) (i2 + mG4JIndex.directIndex.numberOfDocuments);
            j += mG4JIndex.directIndex.numberOfPostings;
            j2 += mG4JIndex.directIndex.numberOfOccurrences;
            if (i3 < mG4JIndex.directIndex.maxCount) {
                i3 = mG4JIndex.directIndex.maxCount;
            }
            bloomFilterArr[i4] = mG4JIndex.directTermFilter;
            i4++;
        }
        strArr[strArr.length - 1] = null;
        return new LexicalCluster(indexArr, new ContiguousLexicalStrategy(iArr, strArr), bloomFilterArr, i2 == -1 ? -1 : i2 + 1, i == -1 ? -1 : i + 1, j == -1 ? -1L : j + 1, j2 == -1 ? -1L : j2 + 1, i3, null, true, false, NullTermProcessor.getInstance(), null, null, null);
    }

    public static final String longToTerm(long j) {
        String hexString = Long.toHexString(j);
        return "0000000000000000".substring(hexString.length()) + hexString;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public AtomicIndex(MimirIndex mimirIndex, String str, boolean z, TermProcessor termProcessor, BlockingQueue<GATEDocument> blockingQueue, BlockingQueue<GATEDocument> blockingQueue2) throws IOException, IndexException {
        this.termProcessor = null;
        this.parent = mimirIndex;
        this.name = str;
        this.indexDirectory = new File(mimirIndex.getIndexDirectory(), str);
        this.hasDirectIndex = z;
        this.termProcessor = termProcessor;
        this.inputQueue = blockingQueue;
        this.outputQueue = blockingQueue2;
        this.additionalProperties.setProperty(Index.PropertyKeys.TERMPROCESSOR, ObjectParser.toSpec(termProcessor));
        if (z) {
            this.additionalDirectProperties = new Properties();
            this.additionalDirectProperties.setProperty(Index.PropertyKeys.TERMPROCESSOR, ObjectParser.toSpec(NullTermProcessor.getInstance()));
        }
        initIndex();
    }

    /* JADX WARN: Type inference failed for: r0v40, types: [it.unimi.dsi.fastutil.objects.Object2LongMap, long, it.unimi.dsi.fastutil.objects.Object2LongMap<java.lang.String>] */
    protected void initIndex() throws IOException, IndexException {
        this.batches = new ArrayList();
        if (this.indexDirectory.exists()) {
            ArrayList arrayList = new ArrayList();
            if (new File(this.indexDirectory, "head").exists()) {
                arrayList.add("head");
            }
            TreeMap treeMap = new TreeMap();
            for (String str : this.indexDirectory.list(TAILS_FILENAME_FILTER)) {
                treeMap.put(Integer.valueOf(Integer.parseInt(str.substring(TAIL_FILE_NAME_PREFIX.length()))), str);
            }
            arrayList.addAll(treeMap.values());
            synchronized (this) {
                Iterator it2 = arrayList.iterator();
                while (it2.hasNext()) {
                    this.batches.add(openSubIndex((String) it2.next()));
                }
            }
        } else {
            this.indexDirectory.mkdirs();
        }
        synchronized (this) {
            this.invertedIndex = openInvertedIndexCluster(this.batches, this.termProcessor);
        }
        if (this.hasDirectIndex) {
            this.directTerms = new ObjectBigArrayBigList();
            this.directTermIds = new Object2LongAVLTreeMap();
            this.directTermIds.defaultReturnValue(-1L);
            File file = new File(this.indexDirectory, DIRECT_TERMS_FILENAME);
            if (file.exists()) {
                FileLinesCollection.FileLinesIterator it3 = new FileLinesCollection(file.getAbsolutePath(), "UTF-8").iterator();
                long j = 0;
                while (it3.hasNext()) {
                    String mutableString = it3.next().toString();
                    this.directTerms.add(mutableString);
                    ?? r0 = this.directTermIds;
                    long j2 = j;
                    j = r0 + 1;
                    r0.put(mutableString, j2);
                }
            }
            synchronized (this) {
                this.directIndex = openDirectIndexCluster(this.batches);
            }
        }
    }

    public String getName() {
        return this.name;
    }

    public boolean hasDirectIndex() {
        return this.hasDirectIndex;
    }

    protected void newBatch() {
        this.occurrencesInRAM = 0L;
        this.maxDocSizeInRAM = -1;
        this.documentsInRAM = 0;
        if (this.termMap == null) {
            this.termMap = new Object2ReferenceOpenHashMap<>(1024, 0.5f);
        } else {
            this.termMap.clear();
            this.termMap.trim(1024);
        }
        if (this.documentSizesInRAM == null) {
            this.documentSizesInRAM = new IntArrayList();
        } else {
            this.documentSizesInRAM.clear();
        }
    }

    protected long writeCurrentBatch() throws IOException, IndexException {
        if (this.documentsInRAM == 0) {
            return 0L;
        }
        int i = -1;
        if (new File(this.indexDirectory, "head").exists()) {
            for (String str : this.indexDirectory.list(TAILS_FILENAME_FILTER)) {
                int parseInt = Integer.parseInt(str.substring(TAIL_FILE_NAME_PREFIX.length()));
                if (parseInt > i) {
                    i = parseInt;
                }
            }
            i++;
        }
        String str2 = i == -1 ? "head" : TAIL_FILE_NAME_PREFIX + Integer.toString(i);
        File file = new File(this.indexDirectory, str2);
        file.mkdir();
        String absolutePath = new File(file, this.name).getAbsolutePath();
        QuasiSuccinctIndexWriter quasiSuccinctIndexWriter = new QuasiSuccinctIndexWriter(IOFactory.FILESYSTEM_FACTORY, absolutePath, this.documentsInRAM, Fast.mostSignificantBit(256), 16777216, CompressionFlags.DEFAULT_QUASI_SUCCINCT_INDEX, ByteOrder.nativeOrder());
        int size = this.termMap.size();
        logger.info("Generating index for batch " + str2 + "; documents: " + this.documentsInRAM + "; terms:" + size + "; occurrences: " + this.occurrencesInRAM + " / " + this.parent.getOccurrencesInRam());
        final MutableString[] mutableStringArr = (MutableString[]) this.termMap.keySet().toArray(new MutableString[size]);
        Arrays.quickSort(0, mutableStringArr.length, new IntComparator() { // from class: gate.mimir.index.AtomicIndex.4
            @Override // java.util.Comparator
            public int compare(Integer num, Integer num2) {
                return compare(num.intValue(), num2.intValue());
            }

            @Override // it.unimi.dsi.fastutil.ints.IntComparator
            public int compare(int i2, int i3) {
                return mutableStringArr[i2].compareTo(mutableStringArr[i3]);
            }
        }, new Swapper() { // from class: gate.mimir.index.AtomicIndex.5
            @Override // it.unimi.dsi.fastutil.Swapper
            public void swap(int i2, int i3) {
                MutableString mutableString = mutableStringArr[i2];
                mutableStringArr[i2] = mutableStringArr[i3];
                mutableStringArr[i3] = mutableString;
            }
        });
        BloomFilter<Void> create = BloomFilter.create(Math.max(size, 1));
        PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(new FastBufferedOutputStream(new FileOutputStream(absolutePath + ".terms"), 65536), "UTF-8"));
        for (MutableString mutableString : mutableStringArr) {
            mutableString.println(printWriter);
            create.add(mutableString);
        }
        printWriter.close();
        generateTermMap(new File(absolutePath + ".terms"), new File(absolutePath + DiskBasedIndex.TERMMAP_EXTENSION), null);
        BinIO.storeObject(create, new File(absolutePath + IndexCluster.BLOOM_EXTENSION));
        OutputBitStream outputBitStream = new OutputBitStream(new File(absolutePath + DiskBasedIndex.SIZES_EXTENSION));
        for (int i2 : this.documentSizesInRAM.elements()) {
            outputBitStream.writeGamma(i2);
        }
        outputBitStream.close();
        int i3 = 0;
        for (int i4 = 0; i4 < size; i4++) {
            PostingsList postingsList = this.termMap.get(mutableStringArr[i4]);
            if (i3 < postingsList.maxCount) {
                i3 = postingsList.maxCount;
            }
            postingsList.write(quasiSuccinctIndexWriter);
        }
        quasiSuccinctIndexWriter.close();
        try {
            Properties properties = quasiSuccinctIndexWriter.properties();
            this.additionalProperties.setProperty((Enum<?>) Index.PropertyKeys.SIZE, quasiSuccinctIndexWriter.writtenBits());
            this.additionalProperties.setProperty((Enum<?>) Index.PropertyKeys.MAXDOCSIZE, this.maxDocSizeInRAM);
            this.additionalProperties.setProperty((Enum<?>) Index.PropertyKeys.MAXCOUNT, i3);
            this.additionalProperties.setProperty((Enum<?>) Index.PropertyKeys.OCCURRENCES, this.occurrencesInRAM);
            properties.addAll(this.additionalProperties);
            Scan.saveProperties(IOFactory.FILESYSTEM_FACTORY, properties, absolutePath + ".properties");
            PrintStream printStream = new PrintStream(new File(absolutePath + ".stats"));
            quasiSuccinctIndexWriter.printStats(printStream);
            printStream.close();
            if (this.hasDirectIndex) {
                writeDirectIndex(file);
            }
            long j = this.occurrencesInRAM;
            newBatch();
            try {
                synchronized (this) {
                    this.batches.add(openSubIndex(str2));
                    this.invertedIndex = openInvertedIndexCluster(this.batches, this.termProcessor);
                    if (this.hasDirectIndex) {
                        this.directIndex = openDirectIndexCluster(this.batches);
                    }
                }
                return j;
            } catch (Exception e) {
                throw new IndexException("Could not open the index just written to " + absolutePath, e);
            }
        } catch (ConfigurationException e2) {
            throw new IndexException("Error while saving tail properties", e2);
        }
    }

    protected void writeDirectIndex(File file) throws IOException, IndexException {
        long j = 0;
        Iterator<MG4JIndex> it2 = this.batches.iterator();
        while (it2.hasNext()) {
            j += it2.next().invertedIndex.numberOfDocuments;
        }
        Object2ReferenceOpenHashMap object2ReferenceOpenHashMap = new Object2ReferenceOpenHashMap(1024, 0.5f);
        MutableString mutableString = new MutableString();
        ObjectIterator<MutableString> it3 = this.termMap.keySet().iterator();
        while (it3.hasNext()) {
            String mutableString2 = it3.next().toString();
            if (this.directTermIds.getLong(mutableString2) == this.directTermIds.defaultReturnValue()) {
                this.directTerms.add(mutableString2);
                this.directTermIds.put((Object2LongMap<String>) mutableString2, this.directTerms.size64() - 1);
            }
        }
        MutableString mutableString3 = new MutableString();
        long j2 = 0;
        while (true) {
            long j3 = j2;
            if (j3 >= this.directTerms.size64()) {
                break;
            }
            mutableString3.replace(this.directTerms.get(j3));
            PostingsList postingsList = this.termMap.get(mutableString3);
            if (postingsList != null) {
                long j4 = j + postingsList.firstDocumentPointer;
                for (int i = 0; i < postingsList.documentPointersDifferential.size(); i++) {
                    j4 += postingsList.documentPointersDifferential.get(i).intValue();
                    int i2 = postingsList.counts.getInt(i);
                    mutableString.replace(longToTerm(j4));
                    PostingsList postingsList2 = (PostingsList) object2ReferenceOpenHashMap.get(mutableString);
                    if (postingsList2 == null) {
                        postingsList2 = new PostingsList(false);
                        object2ReferenceOpenHashMap.put(mutableString.copy(), postingsList2);
                    }
                    postingsList2.newDocumentPointer(j3);
                    postingsList2.setCount(i2);
                    postingsList2.flush();
                }
            }
            j2 = j3 + 1;
        }
        String absolutePath = new File(file, this.name + DIRECT_INDEX_NAME_SUFFIX).getAbsolutePath();
        HashMap hashMap = new HashMap(CompressionFlags.DEFAULT_QUASI_SUCCINCT_INDEX);
        hashMap.remove(CompressionFlags.Component.POSITIONS);
        QuasiSuccinctIndexWriter quasiSuccinctIndexWriter = new QuasiSuccinctIndexWriter(IOFactory.FILESYSTEM_FACTORY, absolutePath, this.directTerms.size64(), Fast.mostSignificantBit(256), 16777216, hashMap, ByteOrder.nativeOrder());
        final MutableString[] mutableStringArr = (MutableString[]) object2ReferenceOpenHashMap.keySet().toArray(new MutableString[object2ReferenceOpenHashMap.size()]);
        Arrays.quickSort(0, mutableStringArr.length, new IntComparator() { // from class: gate.mimir.index.AtomicIndex.6
            @Override // java.util.Comparator
            public int compare(Integer num, Integer num2) {
                return compare(num.intValue(), num2.intValue());
            }

            @Override // it.unimi.dsi.fastutil.ints.IntComparator
            public int compare(int i3, int i4) {
                return mutableStringArr[i3].compareTo(mutableStringArr[i4]);
            }
        }, new Swapper() { // from class: gate.mimir.index.AtomicIndex.7
            @Override // it.unimi.dsi.fastutil.Swapper
            public void swap(int i3, int i4) {
                MutableString mutableString4 = mutableStringArr[i3];
                mutableStringArr[i3] = mutableStringArr[i4];
                mutableStringArr[i4] = mutableString4;
            }
        });
        BloomFilter<Void> create = BloomFilter.create(mutableStringArr.length);
        PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(new FastBufferedOutputStream(new FileOutputStream(absolutePath + ".terms"), 65536), "UTF-8"));
        for (MutableString mutableString4 : mutableStringArr) {
            mutableString4.println(printWriter);
            create.add(mutableString4);
        }
        printWriter.close();
        generateTermMap(new File(absolutePath + ".terms"), new File(absolutePath + DiskBasedIndex.TERMMAP_EXTENSION), null);
        BinIO.storeObject(create, new File(absolutePath + IndexCluster.BLOOM_EXTENSION));
        OutputBitStream outputBitStream = new OutputBitStream(new File(absolutePath + DiskBasedIndex.SIZES_EXTENSION));
        int i3 = -1;
        long j5 = 0;
        while (true) {
            long j6 = j5;
            if (j6 >= this.directTerms.size64()) {
                break;
            }
            mutableString3.replace(this.directTerms.get(j6));
            PostingsList postingsList3 = this.termMap.get(mutableString3);
            int i4 = postingsList3 != null ? (int) postingsList3.frequency : 0;
            outputBitStream.writeGamma(i4);
            if (i4 > i3) {
                i3 = i4;
            }
            j5 = j6 + 1;
        }
        outputBitStream.close();
        int i5 = 0;
        long j7 = 0;
        for (MutableString mutableString5 : mutableStringArr) {
            PostingsList postingsList4 = (PostingsList) object2ReferenceOpenHashMap.get(mutableString5);
            if (i5 < postingsList4.maxCount) {
                i5 = postingsList4.maxCount;
            }
            postingsList4.write(quasiSuccinctIndexWriter);
            j7 += postingsList4.occurrences;
        }
        quasiSuccinctIndexWriter.close();
        try {
            Properties properties = quasiSuccinctIndexWriter.properties();
            this.additionalDirectProperties.setProperty((Enum<?>) Index.PropertyKeys.SIZE, quasiSuccinctIndexWriter.writtenBits());
            this.additionalDirectProperties.setProperty((Enum<?>) Index.PropertyKeys.MAXDOCSIZE, i3);
            this.additionalDirectProperties.setProperty((Enum<?>) Index.PropertyKeys.MAXCOUNT, i5);
            this.additionalDirectProperties.setProperty((Enum<?>) Index.PropertyKeys.OCCURRENCES, j7);
            properties.addAll(this.additionalDirectProperties);
            Scan.saveProperties(IOFactory.FILESYSTEM_FACTORY, properties, absolutePath + ".properties");
            PrintStream printStream = new PrintStream(new File(absolutePath + ".stats"));
            quasiSuccinctIndexWriter.printStats(printStream);
            printStream.close();
            File file2 = new File(this.indexDirectory, "direct.terms.new");
            PrintWriter printWriter2 = new PrintWriter(new OutputStreamWriter(new FastBufferedOutputStream(new FileOutputStream(file2), 65536), "UTF-8"));
            ObjectBigListIterator<String> it4 = this.directTerms.iterator();
            while (it4.hasNext()) {
                printWriter2.println(it4.next());
            }
            printWriter2.close();
            File file3 = new File(this.indexDirectory, DIRECT_TERMS_FILENAME);
            File file4 = new File(this.indexDirectory, "direct.terms.old");
            if (!file3.exists() || file3.renameTo(file4)) {
                if (!file2.renameTo(file3)) {
                    throw new IndexException("Unable to save direct terms file");
                }
                file4.delete();
            }
        } catch (ConfigurationException e) {
            throw new IndexException("Error while saving tail properties", e);
        }
    }

    protected void compactIndex() throws IndexException, IOException, ConfigurationException {
        File file = new File(this.indexDirectory, "head.new");
        ArrayList<MG4JIndex> arrayList = new ArrayList(this.batches);
        if (!file.mkdir()) {
            throw new IndexException("Could not create new head directory at " + file.getAbsolutePath() + "!");
        }
        Map<CompressionFlags.Component, CompressionFlags.Coding> map = CompressionFlags.DEFAULT_QUASI_SUCCINCT_INDEX;
        String absolutePath = new File(file, this.name).getAbsolutePath();
        String[] strArr = new String[arrayList.size()];
        for (int i = 0; i < strArr.length; i++) {
            strArr[i] = new File(((MG4JIndex) arrayList.get(i)).indexDir, this.name).getAbsolutePath();
        }
        try {
            new Concatenate(IOFactory.FILESYSTEM_FACTORY, absolutePath, strArr, false, 1048576, map, Combine.IndexType.QUASI_SUCCINCT, true, -5, 16, 33554432, 10000L).run();
            generateTermMap(new File(absolutePath + ".terms"), new File(absolutePath + DiskBasedIndex.TERMMAP_EXTENSION), new File(absolutePath + IndexCluster.BLOOM_EXTENSION));
            if (hasDirectIndex()) {
                combineDirectIndexes(arrayList, new File(file, this.name + DIRECT_INDEX_NAME_SUFFIX).getAbsolutePath());
            }
            synchronized (this) {
                this.batches.removeAll(arrayList);
                File file2 = new File(this.indexDirectory, "head");
                File file3 = new File(this.indexDirectory, "head.old");
                if (!file2.exists() || !file2.renameTo(file3)) {
                    throw new IndexException("Cold not rename head at " + file2.getAbsolutePath() + " to " + file3);
                }
                if (!file.renameTo(file2)) {
                    throw new IndexException("Cold not rename new head at " + file.getAbsolutePath() + " to " + file2);
                }
                this.batches.add(0, openSubIndex("head"));
                this.invertedIndex = openInvertedIndexCluster(this.batches, this.termProcessor);
                if (this.hasDirectIndex) {
                    this.directIndex = openDirectIndexCluster(this.batches);
                }
                if (!Files.rmdir(file3)) {
                    throw new IndexException("Could not fully delete old sub-index at: " + file3);
                }
                for (MG4JIndex mG4JIndex : arrayList) {
                    if (!mG4JIndex.indexDir.equals(file2) && !Files.rmdir(mG4JIndex.indexDir)) {
                        throw new IndexException("Could not fully delete old sub-index at: " + mG4JIndex.indexDir);
                    }
                }
            }
        } catch (Exception e) {
            throw new IndexException("Exception while combining sub-indexes", e);
        }
    }

    protected static void combineDirectIndexes(List<MG4JIndex> list, String str) throws IOException, ConfigurationException {
        long j = 0;
        long j2 = 0;
        for (MG4JIndex mG4JIndex : list) {
            j += mG4JIndex.directIndex.numberOfDocuments;
            j2 += mG4JIndex.directIndex.numberOfTerms;
        }
        HashMap hashMap = new HashMap(CompressionFlags.DEFAULT_QUASI_SUCCINCT_INDEX);
        hashMap.remove(CompressionFlags.Component.POSITIONS);
        QuasiSuccinctIndexWriter quasiSuccinctIndexWriter = new QuasiSuccinctIndexWriter(IOFactory.FILESYSTEM_FACTORY, str, j, Fast.mostSignificantBit(256), 16777216, hashMap, ByteOrder.nativeOrder());
        BloomFilter<Void> create = BloomFilter.create(j2);
        PrintWriter printWriter = new PrintWriter(new OutputStreamWriter(new FastBufferedOutputStream(new FileOutputStream(str + ".terms"), 65536), "UTF-8"));
        long j3 = 0;
        int i = 0;
        PostingsList postingsList = new PostingsList(false);
        for (MG4JIndex mG4JIndex2 : list) {
            IndexReader reader = mG4JIndex2.directIndex.getReader();
            FileLinesCollection.FileLinesIterator it2 = new FileLinesCollection(new File(mG4JIndex2.indexDir, mG4JIndex2.indexName + DIRECT_INDEX_NAME_SUFFIX + ".terms").getAbsolutePath(), "UTF-8").iterator();
            IndexIterator nextIterator = reader.nextIterator();
            while (true) {
                IndexIterator indexIterator = nextIterator;
                if (indexIterator != null && it2.hasNext()) {
                    MutableString next = it2.next();
                    create.add(next);
                    next.println(printWriter);
                    long nextDocument = indexIterator.nextDocument();
                    while (true) {
                        long j4 = nextDocument;
                        if (j4 == Long.MAX_VALUE) {
                            break;
                        }
                        postingsList.newDocumentPointer(j4);
                        postingsList.setCount(indexIterator.count());
                        nextDocument = indexIterator.nextDocument();
                    }
                    postingsList.flush();
                    j3 += postingsList.occurrences;
                    if (i < postingsList.maxCount) {
                        i = postingsList.maxCount;
                    }
                    postingsList.write(quasiSuccinctIndexWriter);
                    postingsList.clear();
                    nextIterator = reader.nextIterator();
                }
            }
            reader.close();
        }
        quasiSuccinctIndexWriter.close();
        printWriter.close();
        generateTermMap(new File(str + ".terms"), new File(str + DiskBasedIndex.TERMMAP_EXTENSION), null);
        BinIO.storeObject(create, new File(str + IndexCluster.BLOOM_EXTENSION));
        Properties properties = quasiSuccinctIndexWriter.properties();
        properties.setProperty(Index.PropertyKeys.TERMPROCESSOR, ObjectParser.toSpec(NullTermProcessor.getInstance()));
        properties.setProperty((Enum<?>) Index.PropertyKeys.SIZE, quasiSuccinctIndexWriter.writtenBits());
        properties.setProperty((Enum<?>) Index.PropertyKeys.MAXDOCSIZE, -1);
        properties.setProperty((Enum<?>) Index.PropertyKeys.MAXCOUNT, i);
        properties.setProperty((Enum<?>) Index.PropertyKeys.OCCURRENCES, j3);
        Scan.saveProperties(IOFactory.FILESYSTEM_FACTORY, properties, str + ".properties");
        PrintStream printStream = new PrintStream(new File(str + ".stats"));
        quasiSuccinctIndexWriter.printStats(printStream);
        printStream.close();
    }

    public Future<Long> requestSyncToDisk() throws InterruptedException {
        if (this.batchWriteTask == null) {
            this.batchWriteTask = new FutureTask(new Callable<Long>() { // from class: gate.mimir.index.AtomicIndex.8
                /* JADX WARN: Can't rename method to resolve collision */
                @Override // java.util.concurrent.Callable
                public Long call() throws Exception {
                    return Long.valueOf(AtomicIndex.this.writeCurrentBatch());
                }
            });
            this.inputQueue.put(DUMP_BATCH);
        }
        return this.batchWriteTask;
    }

    public Future<Void> requestCompactIndex() throws InterruptedException {
        if (this.compactIndexTask == null) {
            this.compactIndexTask = new FutureTask(new Callable<Void>() { // from class: gate.mimir.index.AtomicIndex.9
                /* JADX WARN: Can't rename method to resolve collision */
                @Override // java.util.concurrent.Callable
                public Void call() throws Exception {
                    AtomicIndex.this.compactIndex();
                    return null;
                }
            });
            this.inputQueue.put(COMPACT_INDEX);
        }
        return this.compactIndexTask;
    }

    protected MG4JIndex openSubIndex(String str) throws IOException, IndexException {
        Index index;
        File file = new File(this.indexDirectory, str);
        String absolutePath = new File(file, this.name).getAbsolutePath();
        try {
            try {
                index = Index.getInstance(absolutePath + "?" + Index.UriKeys.MAPPED.name().toLowerCase() + "=1;", true, true);
            } catch (Exception e) {
                throw new IndexException("Could not open the sub-index at" + absolutePath, e);
            }
        } catch (IOException e2) {
            logger.info("Memory mapping failed for index " + absolutePath + ". Loading as file index instead");
            index = Index.getInstance(absolutePath, true, true);
        }
        File file2 = new File(absolutePath + IndexCluster.BLOOM_EXTENSION);
        BloomFilter bloomFilter = null;
        try {
            if (file2.exists()) {
                bloomFilter = (BloomFilter) BinIO.loadObject(file2);
            }
        } catch (ClassNotFoundException e3) {
            logger.warn("Exception wile loading stre Bloom Filter", (Throwable) e3);
        }
        Index index2 = null;
        BloomFilter bloomFilter2 = null;
        if (this.hasDirectIndex) {
            String absolutePath2 = new File(file, this.name + DIRECT_INDEX_NAME_SUFFIX).getAbsolutePath();
            try {
                try {
                    index2 = Index.getInstance(absolutePath2 + "?" + Index.UriKeys.MAPPED.name().toLowerCase() + "=1;", true, false);
                } catch (IOException e4) {
                    logger.info("Memory mapping failed for index " + absolutePath2 + ". Loading as file index instead");
                    index2 = Index.getInstance(absolutePath2, true, false);
                }
                File file3 = new File(absolutePath2 + IndexCluster.BLOOM_EXTENSION);
                try {
                    if (file3.exists()) {
                        bloomFilter2 = (BloomFilter) BinIO.loadObject(file3);
                    }
                } catch (ClassNotFoundException e5) {
                    logger.warn("Exception wile loading stre Bloom Filter", (Throwable) e5);
                }
            } catch (Exception e6) {
                throw new IndexException("Could not open the sub-index at" + absolutePath2, e6);
            }
        }
        return new MG4JIndex(file, this.name, index, bloomFilter, index2, bloomFilter2);
    }

    /* JADX WARN: Code restructure failed: missing block: B:10:0x0031, code lost:
    
        if (r6.batchWriteTask == null) goto L12;
     */
    /* JADX WARN: Code restructure failed: missing block: B:11:0x0034, code lost:
    
        r6.batchWriteTask.run();
     */
    /* JADX WARN: Code restructure failed: missing block: B:12:0x003d, code lost:
    
        r6.batchWriteTask = null;
     */
    /* JADX WARN: Code restructure failed: missing block: B:23:0x0049, code lost:
    
        if (r0 != gate.mimir.index.AtomicIndex.COMPACT_INDEX) goto L43;
     */
    /* JADX WARN: Code restructure failed: missing block: B:25:0x0050, code lost:
    
        if (r6.compactIndexTask == null) goto L18;
     */
    /* JADX WARN: Code restructure failed: missing block: B:26:0x0053, code lost:
    
        r6.compactIndexTask.run();
     */
    /* JADX WARN: Code restructure failed: missing block: B:27:0x005c, code lost:
    
        r6.compactIndexTask = null;
     */
    /* JADX WARN: Code restructure failed: missing block: B:29:0x0064, code lost:
    
        r0 = r6.occurrencesInRAM;
        processDocument(r0);
        r0.addOccurrences(r6.occurrencesInRAM - r0);
     */
    /* JADX WARN: Code restructure failed: missing block: B:31:0x007b, code lost:
    
        r8 = move-exception;
     */
    /* JADX WARN: Code restructure failed: missing block: B:32:0x007c, code lost:
    
        gate.mimir.index.AtomicIndex.logger.error("Problem while indexing document!", r8);
     */
    /* JADX WARN: Code restructure failed: missing block: B:33:0x008b, code lost:
    
        writeCurrentBatch();
        flush();
     */
    /* JADX WARN: Code restructure failed: missing block: B:4:0x000f, code lost:
    
        if (r6.inputQueue != null) goto L5;
     */
    /* JADX WARN: Code restructure failed: missing block: B:5:0x0012, code lost:
    
        r0 = r6.inputQueue.take();
     */
    /* JADX WARN: Code restructure failed: missing block: B:6:0x0023, code lost:
    
        if (r0 == gate.mimir.index.GATEDocument.END_OF_QUEUE) goto L22;
     */
    /* JADX WARN: Code restructure failed: missing block: B:8:0x002a, code lost:
    
        if (r0 != gate.mimir.index.AtomicIndex.DUMP_BATCH) goto L13;
     */
    @Override // java.lang.Runnable
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    public void run() {
        /*
            Method dump skipped, instructions count: 238
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: gate.mimir.index.AtomicIndex.run():void");
    }

    protected abstract void flush() throws IOException;

    public void close() throws InterruptedException {
        this.inputQueue.put(GATEDocument.END_OF_QUEUE);
        if (this.indexingThread != null) {
            this.indexingThread.join();
        }
    }

    protected void documentStarting(GATEDocument gATEDocument) throws IndexException {
    }

    protected void documentEnding(GATEDocument gATEDocument) throws IndexException {
    }

    protected abstract Annotation[] getAnnotsToProcess(GATEDocument gATEDocument) throws IndexException;

    protected abstract void calculateStartPositionForAnnotation(Annotation annotation, GATEDocument gATEDocument) throws IndexException;

    protected abstract String[] calculateTermStringForAnnotation(Annotation annotation, GATEDocument gATEDocument) throws IndexException;

    protected void processDocument(GATEDocument gATEDocument) throws IndexException {
        this.tokenPosition = 0;
        documentStarting(gATEDocument);
        Annotation[] annotsToProcess = getAnnotsToProcess(gATEDocument);
        logger.debug("Starting document " + gATEDocument.getDocument().getName() + ". " + annotsToProcess.length + " annotations to process");
        try {
            for (Annotation annotation : annotsToProcess) {
                processAnnotation(annotation, gATEDocument);
            }
            int i = this.tokenPosition + 1;
            if (i > this.maxDocSizeInRAM) {
                this.maxDocSizeInRAM = i;
            }
            this.documentSizesInRAM.add(i);
            documentEnding(gATEDocument);
            this.documentsInRAM++;
        } catch (Throwable th) {
            documentEnding(gATEDocument);
            this.documentsInRAM++;
            throw th;
        }
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void processAnnotation(Annotation annotation, GATEDocument gATEDocument) throws IndexException {
        calculateStartPositionForAnnotation(annotation, gATEDocument);
        String[] calculateTermStringForAnnotation = calculateTermStringForAnnotation(annotation, gATEDocument);
        if (calculateTermStringForAnnotation == null) {
            indexCurrentTerm();
            return;
        }
        if (calculateTermStringForAnnotation.length == 0) {
            return;
        }
        int length = calculateTermStringForAnnotation.length;
        for (int i = 0; i < length; i++) {
            String str = calculateTermStringForAnnotation[i];
            this.currentTerm.replace(str == null ? "" : str);
            indexCurrentTerm();
        }
    }

    protected void indexCurrentTerm() {
        PostingsList postingsList = this.termMap.get(this.currentTerm);
        if (postingsList == null) {
            Object2ReferenceOpenHashMap<MutableString, PostingsList> object2ReferenceOpenHashMap = this.termMap;
            MutableString copy = this.currentTerm.copy();
            PostingsList postingsList2 = new PostingsList(true);
            postingsList = postingsList2;
            object2ReferenceOpenHashMap.put(copy, postingsList2);
        }
        postingsList.newDocumentPointer(this.documentsInRAM);
        if (!postingsList.checkPosition(this.tokenPosition)) {
            logger.debug("Duplicate position");
        } else {
            postingsList.addPosition(this.tokenPosition);
            this.occurrencesInRAM++;
        }
    }

    public File getIndexDirectory() {
        return this.indexDirectory;
    }

    public MimirIndex getParent() {
        return this.parent;
    }

    public BlockingQueue<GATEDocument> getInputQueue() {
        return this.inputQueue;
    }

    public BlockingQueue<GATEDocument> getOutputQueue() {
        return this.outputQueue;
    }

    public Index getIndex() {
        return this.invertedIndex;
    }

    public Index getDirectIndex() {
        return this.directIndex;
    }

    public CharSequence getDirectTerm(long j) {
        return this.directTerms.get(j);
    }

    public ObjectBigList<? extends CharSequence> getDirectTerms() {
        return this.directTerms;
    }

    public long getDirectTermOccurenceCount(long j) throws IOException {
        long j2 = 0;
        IndexIterator documents = this.invertedIndex.documents(this.directTerms.get(j));
        for (long nextDocument = documents.nextDocument(); nextDocument != Long.MAX_VALUE; nextDocument = documents.nextDocument()) {
            j2 += documents.count();
        }
        return j2;
    }

    public int getBatchCount() {
        return this.batches.size();
    }
}
