/*
 * Decompiled with CFR 0.152.
 */
package it.unimi.dsi.mg4j.tool;

import com.martiansoftware.jsap.FlaggedOption;
import com.martiansoftware.jsap.JSAP;
import com.martiansoftware.jsap.JSAPException;
import com.martiansoftware.jsap.JSAPResult;
import com.martiansoftware.jsap.Parameter;
import com.martiansoftware.jsap.SimpleJSAP;
import com.martiansoftware.jsap.StringParser;
import com.martiansoftware.jsap.Switch;
import com.martiansoftware.jsap.UnflaggedOption;
import it.unimi.dsi.fastutil.ints.Int2IntArrayMap;
import it.unimi.dsi.fastutil.ints.Int2IntMap;
import it.unimi.dsi.fastutil.ints.Int2ObjectArrayMap;
import it.unimi.dsi.fastutil.ints.Int2ObjectMap;
import it.unimi.dsi.fastutil.ints.IntRBTreeSet;
import it.unimi.dsi.fastutil.ints.IntSortedSet;
import it.unimi.dsi.fastutil.io.BinIO;
import it.unimi.dsi.fastutil.objects.ObjectArrays;
import it.unimi.dsi.mg4j.document.DocumentFactory;
import it.unimi.dsi.mg4j.document.DocumentSequence;
import it.unimi.dsi.mg4j.document.IdentityDocumentFactory;
import it.unimi.dsi.mg4j.index.CompressionFlags;
import it.unimi.dsi.mg4j.index.DowncaseTermProcessor;
import it.unimi.dsi.mg4j.index.Index;
import it.unimi.dsi.mg4j.index.NullTermProcessor;
import it.unimi.dsi.mg4j.index.TermMap;
import it.unimi.dsi.mg4j.index.TermMaps;
import it.unimi.dsi.mg4j.index.TermProcessor;
import it.unimi.dsi.mg4j.index.cluster.IndexCluster;
import it.unimi.dsi.mg4j.io.FileLinesCollection;
import it.unimi.dsi.mg4j.tool.Concatenate;
import it.unimi.dsi.mg4j.tool.Merge;
import it.unimi.dsi.mg4j.tool.Paste;
import it.unimi.dsi.mg4j.tool.Scan;
import it.unimi.dsi.mg4j.tool.VirtualDocumentResolver;
import it.unimi.dsi.mg4j.util.Fast;
import it.unimi.dsi.mg4j.util.ImmutableExternalTriePrefixDictionary;
import it.unimi.dsi.mg4j.util.MG4JClassParser;
import it.unimi.dsi.mg4j.util.Properties;
import java.io.File;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Map;
import org.apache.commons.configuration.ConfigurationException;
import org.apache.log4j.Logger;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class IndexBuilder {
    static final Logger LOGGER = Fast.getLogger(IndexBuilder.class);
    private final String basename;
    private final DocumentSequence documentSequence;
    private TermProcessor termProcessor = DowncaseTermProcessor.getInstance();
    private int documentsPerBatch = 100000;
    private boolean keepBatches;
    private String zipCollectionBasename;
    private Map<CompressionFlags.Component, CompressionFlags.Coding> standardWriterFlags = CompressionFlags.DEFAULT_STANDARD_INDEX;
    private Map<CompressionFlags.Component, CompressionFlags.Coding> payloadWriterFlags = CompressionFlags.DEFAULT_PAYLOAD_INDEX;
    private boolean skips;
    private boolean interleaved;
    private int quantum = 64;
    private int height = 8;
    private int scanBufferSize = 65536;
    private int combineBufferSize = 0x100000;
    private int skipBufferSize = 0x4000000;
    private int pasteBufferSize = 0x1000000;
    private String batchDirName;
    public IntSortedSet indexedFields = new IntRBTreeSet();
    public Int2ObjectMap<VirtualDocumentResolver> virtualDocumentResolvers = new Int2ObjectArrayMap();
    private Class<? extends TermMap> termMapClass = ImmutableExternalTriePrefixDictionary.class;
    public Int2IntMap virtualDocumentGaps = new Int2IntArrayMap();
    private String mapFile;
    private long logInterval;

    public IndexBuilder(String basename, DocumentSequence documentSequence) {
        this.virtualDocumentGaps.defaultReturnValue(64);
        this.logInterval = 10000L;
        this.basename = basename;
        this.documentSequence = documentSequence;
    }

    public IndexBuilder termProcessor(TermProcessor termProcessor) {
        this.termProcessor = termProcessor;
        return this;
    }

    public IndexBuilder indexedFields(int ... field) {
        this.indexedFields.clear();
        for (int f : field) {
            this.indexedFields.add(f);
        }
        return this;
    }

    public IndexBuilder virtualDocumentResolver(int field, VirtualDocumentResolver virtualDocumentResolver) {
        this.virtualDocumentResolvers.put(field, (Object)virtualDocumentResolver);
        return this;
    }

    public IndexBuilder scanBufferSize(int bufferSize) {
        this.scanBufferSize = bufferSize;
        return this;
    }

    public IndexBuilder combineBufferSize(int bufferSize) {
        this.combineBufferSize = bufferSize;
        return this;
    }

    public IndexBuilder bufferSize(int bufferSize) {
        this.scanBufferSize(bufferSize);
        this.combineBufferSize(bufferSize);
        return this;
    }

    public IndexBuilder skipBufferSize(int bufferSize) {
        this.skipBufferSize = bufferSize;
        return this;
    }

    public IndexBuilder pasteBufferSize(int bufferSize) {
        this.pasteBufferSize = bufferSize;
        return this;
    }

    public IndexBuilder zipCollectionBasename(String zipCollectionBasename) {
        this.zipCollectionBasename = zipCollectionBasename;
        return this;
    }

    public IndexBuilder documentsPerBatch(int documentsPerBatch) {
        this.documentsPerBatch = documentsPerBatch;
        return this;
    }

    public IndexBuilder keepBatches(boolean keepBatches) {
        this.keepBatches = keepBatches;
        return this;
    }

    public IndexBuilder standardWriterFlags(Map<CompressionFlags.Component, CompressionFlags.Coding> standardWriterFlags) {
        this.standardWriterFlags = standardWriterFlags;
        return this;
    }

    public IndexBuilder payloadWriterFlags(Map<CompressionFlags.Component, CompressionFlags.Coding> payloadWriterFlags) {
        this.payloadWriterFlags = payloadWriterFlags;
        return this;
    }

    public IndexBuilder skips(boolean skips) {
        this.skips = skips;
        return this;
    }

    public IndexBuilder interleaved(boolean interleaved) {
        this.interleaved = interleaved;
        return this;
    }

    public IndexBuilder quantum(int quantum) {
        this.quantum = quantum;
        return this;
    }

    public IndexBuilder height(int height) {
        this.height = height;
        return this;
    }

    public IndexBuilder mapFile(String mapFile) {
        this.mapFile = mapFile;
        return this;
    }

    public IndexBuilder logInterval(long logInterval) {
        this.logInterval = logInterval;
        return this;
    }

    public IndexBuilder batchDirName(String batchDirName) {
        this.batchDirName = batchDirName;
        return this;
    }

    public IndexBuilder termMapClass(Class<? extends TermMap> termMapClass) {
        this.termMapClass = termMapClass;
        try {
            termMapClass.getConstructor(Iterable.class);
        }
        catch (Exception e) {
            throw new IllegalArgumentException("Class " + termMapClass + " have no constructor accepting an Iterable");
        }
        return this;
    }

    public void run() throws ConfigurationException, SecurityException, IOException, URISyntaxException, ClassNotFoundException, InstantiationException, IllegalAccessException, InvocationTargetException, NoSuchMethodException {
        int i;
        DocumentFactory factory = this.documentSequence.factory();
        if (this.indexedFields.isEmpty()) {
            for (int i2 = 0; i2 < factory.numberOfFields(); ++i2) {
                if (factory.fieldType(i2) == DocumentFactory.FieldType.VIRTUAL && !this.virtualDocumentResolvers.containsKey(i2)) continue;
                this.indexedFields.add(i2);
            }
        }
        int[] indexedField = this.indexedFields.toIntArray();
        Object[] basenameField = new String[indexedField.length];
        for (int i3 = 0; i3 < indexedField.length; ++i3) {
            basenameField[i3] = this.basename + "-" + factory.fieldName(indexedField[i3]);
        }
        LOGGER.info((Object)("Creating indices " + Arrays.toString(basenameField) + "..."));
        int[] virtualDocumentGap = new int[indexedField.length];
        for (int i4 = 0; i4 < indexedField.length; ++i4) {
            virtualDocumentGap[i4] = this.virtualDocumentGaps.get(i4);
        }
        Object[] virtualDocumentResolver = new VirtualDocumentResolver[indexedField.length];
        Iterator i$ = this.virtualDocumentResolvers.keySet().iterator();
        while (i$.hasNext()) {
            i = (Integer)i$.next();
            virtualDocumentResolver[i] = (VirtualDocumentResolver)this.virtualDocumentResolvers.get(i);
        }
        Scan.run(this.basename, this.documentSequence, this.termProcessor, this.zipCollectionBasename, this.scanBufferSize, this.documentsPerBatch, indexedField, (VirtualDocumentResolver[])virtualDocumentResolver, virtualDocumentGap, this.mapFile, this.logInterval, this.batchDirName);
        if (virtualDocumentResolver != null) {
            ObjectArrays.fill((Object[])virtualDocumentResolver, null);
        }
        File batchDir = this.batchDirName == null ? null : new File(this.batchDirName);
        for (i = 0; i < indexedField.length; ++i) {
            String[] inputBasename;
            int batches;
            if (factory.fieldType(indexedField[i]) == DocumentFactory.FieldType.VIRTUAL) {
                batches = new Properties((String)basenameField[i] + ".properties").getInt(Index.PropertyKeys.BATCHES);
                inputBasename = new String[batches];
                for (int j = 0; j < inputBasename.length; ++j) {
                    inputBasename[j] = Scan.batchBasename(j, (String)basenameField[i], batchDir);
                }
                new Paste((String)basenameField[i], inputBasename, false, this.combineBufferSize, batchDir, this.pasteBufferSize, this.standardWriterFlags, this.interleaved, this.skips, this.quantum, this.height, this.skipBufferSize, this.logInterval).run();
            } else {
                inputBasename = new Properties((String)basenameField[i] + ".cluster.properties").getStringArray(IndexCluster.PropertyKeys.LOCALINDEX);
                batches = inputBasename.length;
                if (factory.fieldType(indexedField[i]) == DocumentFactory.FieldType.TEXT) {
                    if (this.mapFile != null) {
                        new Merge((String)basenameField[i], inputBasename, false, this.combineBufferSize, this.standardWriterFlags, this.interleaved, this.skips, this.quantum, this.height, this.skipBufferSize, this.logInterval).run();
                    } else {
                        new Concatenate((String)basenameField[i], inputBasename, false, this.combineBufferSize, this.standardWriterFlags, this.interleaved, this.skips, this.quantum, this.height, this.skipBufferSize, this.logInterval).run();
                    }
                } else if (this.mapFile != null) {
                    new Merge((String)basenameField[i], inputBasename, false, this.combineBufferSize, this.payloadWriterFlags, this.interleaved, this.skips, this.quantum, this.height, this.skipBufferSize, this.logInterval).run();
                } else {
                    new Concatenate((String)basenameField[i], inputBasename, false, this.combineBufferSize, this.payloadWriterFlags, this.interleaved, this.skips, this.quantum, this.height, this.skipBufferSize, this.logInterval).run();
                }
            }
            if (this.keepBatches) continue;
            Scan.cleanup((String)basenameField[i], batches, batchDir);
        }
        LOGGER.info((Object)("Creating term maps (class: " + this.termMapClass.getSimpleName() + ")..."));
        for (i = 0; i < indexedField.length; ++i) {
            BinIO.storeObject((Object)TermMaps.synchronize(this.termMapClass.getConstructor(Iterable.class).newInstance(new FileLinesCollection((String)basenameField[i] + ".terms", "UTF-8"))), (CharSequence)((String)basenameField[i] + ".termmap"));
        }
        LOGGER.info((Object)"Indexing completed.");
    }

    public static void main(String[] arg) throws JSAPException, InvocationTargetException, NoSuchMethodException, IllegalAccessException, ConfigurationException, ClassNotFoundException, IOException, InstantiationException, URISyntaxException {
        int i;
        SimpleJSAP jsap = new SimpleJSAP(IndexBuilder.class.getName(), "Builds an index (creates batches, combines them, and builds a term map).", new Parameter[]{new FlaggedOption("sequence", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'S', "sequence", "A serialised document sequence that will be used instead of stdin."), new FlaggedOption("factory", (StringParser)MG4JClassParser.getParser(), IdentityDocumentFactory.class.getName(), false, 'f', "factory", "A document factory with a standard constructor."), new FlaggedOption("property", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'p', "property", "A 'key=value' specification, or the name of a property file").setAllowMultipleDeclarations(true), new FlaggedOption("termProcessor", (StringParser)MG4JClassParser.getParser(), NullTermProcessor.class.getName(), false, 't', "term-processor", "Sets the term processor to the given class."), new FlaggedOption("termMap", (StringParser)MG4JClassParser.getParser(), ImmutableExternalTriePrefixDictionary.class.getName(), false, 'm', "term-map", "Sets the term map class."), new Switch("downcase", '\u0000', "downcase", "A shortcut for setting the term processor to the downcasing processor."), new FlaggedOption("indexedField", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'I', "indexed-field", "The field(s) of the document factory that will be indexed. (default: all fields)").setAllowMultipleDeclarations(true), new Switch("allFields", 'a', "all-fields", "Index also all virtual fields; has no effect if indexedField has been used at least once."), new FlaggedOption("batchSize", (StringParser)JSAP.INTSIZE_PARSER, Integer.toString(100000), false, 's', "batch-size", "The size of a batch, in documents. (default: 100000)"), new Switch("keepBatches", '\u0000', "keep-batches", "Do not delete intermediate batch files."), new FlaggedOption("virtualDocumentResolver", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'v', "virtual-document-resolver", "The virtual document resolver. It can be specified several times in the form [<field>:]<filename>. If the field is omitted, it sets the document resolver for all virtual fields.").setAllowMultipleDeclarations(true), new FlaggedOption("virtualDocumentGap", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'g', "virtual-document-gap", "The virtual document gap. It can be specified several times in the form [<field>:]<gap>. If the field is omitted, it sets the document gap for all virtual fields; the default gap is 64").setAllowMultipleDeclarations(true), new FlaggedOption("scanBufferSize", (StringParser)JSAP.INTSIZE_PARSER, Fast.formatBinarySize(65536L), false, 'b', "scan-buffer-size", "The size of an I/O buffer for the scanning phase."), new FlaggedOption("combineBufferSize", (StringParser)JSAP.INTSIZE_PARSER, Fast.formatBinarySize(0x100000L), false, 'B', "combine-buffer-size", "The size of an I/O buffer for the combination phase."), new FlaggedOption("pasteBufferSize", (StringParser)JSAP.INTSIZE_PARSER, Fast.formatBinarySize(0x100000L), false, '\u0000', "paste-buffer-size", "The size of the internal temporary buffer used while pasting indices."), new FlaggedOption("skipBufferSize", (StringParser)JSAP.INTSIZE_PARSER, Fast.formatBinarySize(0x4000000L), false, '\u0000', "skip-buffer-size", "The size of the internal temporary buffer used while creating an index with skips."), new FlaggedOption("delimiter", (StringParser)JSAP.INTEGER_PARSER, Integer.toString(10), false, 'd', "delimiter", "The document delimiter."), new FlaggedOption("renumber", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'r', "renumber", "The filename of a document renumbering."), new FlaggedOption("zipCollection", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'z', "zip", "Creates a support ZipDocumentCollection with given basename."), new FlaggedOption("comp", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'c', "comp", "A compression flag for textual indices (may be specified several times).").setAllowMultipleDeclarations(true), new FlaggedOption("payloadComp", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'C', "comp-payload", "A compression flag for payload indices (may be specified several times).").setAllowMultipleDeclarations(true), new Switch("skips", '\u0000', "skips", "Requires skips (which however are present by default, unless you required an interleaved index)."), new Switch("interleaved", '\u0000', "interleaved", "Forces an interleaved index."), new FlaggedOption("quantum", (StringParser)JSAP.INTSIZE_PARSER, Integer.toString(64), false, 'Q', "quantum", "Enable skips with given quantum."), new FlaggedOption("height", (StringParser)JSAP.INTSIZE_PARSER, Integer.toString(8), false, 'H', "height", "Enable skips with given height."), new FlaggedOption("logInterval", (StringParser)JSAP.LONG_PARSER, Long.toString(10000L), false, 'l', "log-interval", "The minimum time interval between activity logs in milliseconds."), new FlaggedOption("tempDir", (StringParser)JSAP.STRING_PARSER, JSAP.NO_DEFAULT, false, 'T', "temp-dir", "A directory for all temporary batch files."), new UnflaggedOption("basename", (StringParser)JSAP.STRING_PARSER, true, "The basename of the resulting index.")});
        JSAPResult jsapResult = jsap.parse(arg);
        if (jsap.messagePrinted()) {
            return;
        }
        DocumentSequence documentSequence = Scan.getSequence(jsapResult.getString("sequence"), jsapResult.getClass("factory"), jsapResult.getStringArray("property"), jsapResult.getInt("delimiter"), LOGGER);
        DocumentFactory factory = documentSequence.factory();
        int[] indexedField = Scan.parseFieldNames(jsapResult.getStringArray("indexedField"), factory, jsapResult.getBoolean("allFields"));
        VirtualDocumentResolver[] virtualDocumentResolver = Scan.parseVirtualDocumentResolver(jsapResult.getStringArray("virtualDocumentResolver"), indexedField, factory);
        int[] virtualDocumentGap = Scan.parseVirtualDocumentGap(jsapResult.getStringArray("virtualDocumentGap"), indexedField, factory);
        Class termProcessorClass = jsapResult.getClass("termProcessor");
        TermProcessor termProcessor = jsapResult.getBoolean("downcase") ? DowncaseTermProcessor.getInstance() : (TermProcessor)termProcessorClass.getMethod("getInstance", null).invoke((Object)termProcessorClass, (Object[])null);
        boolean skips = jsapResult.getBoolean("skips");
        boolean interleaved = jsapResult.getBoolean("interleaved");
        if (interleaved && !skips && (jsapResult.userSpecified("quantum") || jsapResult.userSpecified("height"))) {
            System.err.println("You specified quantum or height, but did not turn on skips.");
            return;
        }
        IndexBuilder indexBuilder = new IndexBuilder(jsapResult.getString("basename"), documentSequence).termProcessor(termProcessor).zipCollectionBasename(jsapResult.getString("zipCollection")).scanBufferSize(jsapResult.getInt("scanBufferSize")).skipBufferSize(jsapResult.getInt("skipBufferSize")).pasteBufferSize(jsapResult.getInt("pasteBufferSize")).combineBufferSize(jsapResult.getInt("combineBufferSize")).documentsPerBatch(jsapResult.getInt("batchSize")).keepBatches(jsapResult.getBoolean("keepBatches")).termMapClass(jsapResult.getClass("termMap")).indexedFields(indexedField).skips(skips).interleaved(interleaved).quantum(jsapResult.getInt("quantum")).height(jsapResult.getInt("height")).logInterval(jsapResult.getLong("logInterval")).batchDirName(jsapResult.getString("tempDir"));
        for (i = 0; i < virtualDocumentResolver.length; ++i) {
            if (virtualDocumentResolver[i] == null) continue;
            indexBuilder.virtualDocumentResolvers.put(i, (Object)virtualDocumentResolver[i]);
        }
        for (i = 0; i < virtualDocumentGap.length; ++i) {
            indexBuilder.virtualDocumentGaps.put(i, virtualDocumentGap[i]);
        }
        if (jsapResult.userSpecified("comp")) {
            indexBuilder.standardWriterFlags(CompressionFlags.valueOf(jsapResult.getStringArray("comp"), CompressionFlags.DEFAULT_STANDARD_INDEX));
        }
        if (jsapResult.userSpecified("compPayload")) {
            indexBuilder.payloadWriterFlags(CompressionFlags.valueOf(jsapResult.getStringArray("compPayload"), CompressionFlags.DEFAULT_PAYLOAD_INDEX));
        }
        if (jsapResult.userSpecified("renumber")) {
            indexBuilder.mapFile(jsapResult.getString("renumber"));
        }
        indexBuilder.run();
    }
}

