/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.util;

import cc.mallet.pipe.NGramPreprocessor;
import cc.mallet.pipe.iterator.CsvIterator;
import cc.mallet.types.Instance;
import cc.mallet.util.CommandOption;
import cc.mallet.util.MalletLogger;
import java.io.File;
import java.io.FileReader;
import java.io.PrintWriter;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.logging.Logger;

public class Replacer {
    protected static Logger logger = MalletLogger.getLogger(Replacer.class.getName());
    static CommandOption.SpacedStrings inputFiles = new CommandOption.SpacedStrings(Replacer.class, "input", "FILE [FILE ...]", true, null, "The file(s) containing data, one instance per line", null);
    static CommandOption.File outputFile = new CommandOption.File(Replacer.class, "output", "FILE", true, new File("mallet.data"), "Write the strings with replacements applied to this file", null);
    static CommandOption.SpacedStrings replacementFiles = new CommandOption.SpacedStrings(Replacer.class, "replacement-files", "FILE [FILE ...]", true, null, "files containing string replacements, one per line:\n    'A B [tab] C' replaces A B with C,\n    'A B' replaces A B with A_B", null);
    static CommandOption.SpacedStrings deletionFiles = new CommandOption.SpacedStrings(Replacer.class, "deletion-files", "FILE [FILE ...]", true, null, "files containing strings to delete after replacements but before tokenization (ie multiword stop terms)", null);
    static CommandOption.String lineRegex = new CommandOption.String(Replacer.class, "line-regex", "REGEX", true, "^([^\\t]*)\\t([^\\t]*)\\t(.*)", "Regular expression containing regex-groups for label, name and data.", null);
    static CommandOption.Integer nameGroup = new CommandOption.Integer(Replacer.class, "name", "INTEGER", true, 1, "The index of the group containing the instance name.\n   Use 0 to indicate that this field is not used.", null);
    static CommandOption.Integer labelGroup = new CommandOption.Integer(Replacer.class, "label", "INTEGER", true, 2, "The index of the group containing the label string.\n   Use 0 to indicate that this field is not used.", null);
    static CommandOption.Integer dataGroup = new CommandOption.Integer(Replacer.class, "data", "INTEGER", true, 3, "The index of the group containing the data.", null);

    public static void main(String[] args) throws Exception {
        CommandOption.setSummary(Replacer.class, "Tool for modifying text with n-gram preprocessing");
        CommandOption.process(Replacer.class, args);
        NGramPreprocessor preprocessor = new NGramPreprocessor();
        if (Replacer.replacementFiles.value != null) {
            for (String filename : Replacer.replacementFiles.value) {
                System.out.println("including replacements from " + filename);
                preprocessor.loadReplacements(filename);
            }
        }
        if (Replacer.deletionFiles.value != null) {
            for (String filename : Replacer.deletionFiles.value) {
                System.out.println("including deletions from " + filename);
                preprocessor.loadDeletions(filename);
            }
        }
        ArrayList pipes = new ArrayList();
        PrintWriter out = new PrintWriter(Replacer.outputFile.value);
        for (String filename : Replacer.inputFiles.value) {
            logger.info("Loading " + filename);
            CsvIterator reader = new CsvIterator((Reader)new FileReader(filename), Replacer.lineRegex.value, Replacer.dataGroup.value, Replacer.labelGroup.value, Replacer.nameGroup.value);
            Iterator<Instance> iterator = preprocessor.newIteratorFrom(reader);
            int count = 0;
            while (iterator.hasNext()) {
                Instance instance = iterator.next();
                out.println(instance.getName() + "\t" + instance.getTarget() + "\t" + instance.getData());
                if (++count % 10000 == 0) {
                    logger.info("instance " + count);
                }
                iterator.next();
            }
        }
        out.close();
    }
}

