package cc.mallet.share.upenn;

import cc.mallet.classify.Classification;
import cc.mallet.classify.Classifier;
import cc.mallet.classify.MaxEnt;
import cc.mallet.classify.MaxEntTrainer;
import cc.mallet.pipe.CharSequence2TokenSequence;
import cc.mallet.pipe.CharSequenceArray2TokenSequence;
import cc.mallet.pipe.FeatureSequence2FeatureVector;
import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SerialPipes;
import cc.mallet.pipe.Target2Label;
import cc.mallet.pipe.TokenSequence2FeatureSequence;
import cc.mallet.pipe.iterator.ArrayDataAndTargetIterator;
import cc.mallet.pipe.iterator.ArrayIterator;
import cc.mallet.pipe.iterator.LineIterator;
import cc.mallet.pipe.iterator.PipeExtendedIterator;
import cc.mallet.types.Alphabet;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.LabelAlphabet;
import cc.mallet.types.Labeling;
import cc.mallet.types.TokenSequence;
import cc.mallet.util.CharSequenceLexer;
import cc.mallet.util.CommandOption;
import cc.mallet.util.MalletLogger;
import com.ctc.wstx.cfg.XmlConsts;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.Iterator;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import org.restlet.data.Disposition;

/* loaded from: input_file:cc/mallet/share/upenn/MaxEntShell.class */
public class MaxEntShell {
    private static Logger logger = MalletLogger.getLogger(MaxEntShell.class.getName());
    private static final CommandOption.Double gaussianVarianceOption = new CommandOption.Double(MaxEntShell.class, "gaussian-variance", "decimal", true, 1.0d, "The gaussian prior variance used for training.", null);
    private static final CommandOption.File trainOption = new CommandOption.File(MaxEntShell.class, "train", "FILENAME", true, null, "Training datafile", null);
    private static final CommandOption.File testOption = new CommandOption.File(MaxEntShell.class, "test", Disposition.NAME_FILENAME, true, null, "Test datafile", null);
    private static final CommandOption.File classifyOption = new CommandOption.File(MaxEntShell.class, "classify", Disposition.NAME_FILENAME, true, null, "Datafile to classify", null);
    private static final CommandOption.File modelOption = new CommandOption.File(MaxEntShell.class, "model", Disposition.NAME_FILENAME, true, null, "Model file", null);
    private static final CommandOption.String encodingOption = new CommandOption.String(MaxEntShell.class, "encoding", "character-encoding-name", true, null, "Input character encoding", null);
    private static final CommandOption.Boolean internalTestOption = new CommandOption.Boolean(MaxEntShell.class, "internal-test", "true|false", true, false, "Run internal tests", null);
    private static final CommandOption.List commandOptions = new CommandOption.List("Training, testing and running a generic tagger.", new CommandOption[]{gaussianVarianceOption, trainOption, testOption, modelOption, classifyOption, encodingOption, internalTestOption});
    private static final String[][] internalData = {new String[]{"a", "b"}, new String[]{"b", "c"}, new String[]{"a", "c"}};
    private static final String[] internalTargets = {XmlConsts.XML_SA_YES, XmlConsts.XML_SA_NO, XmlConsts.XML_SA_NO};
    private static final String[] internalInstance = {"a", "b", "c"};

    private MaxEntShell() {
    }

    public static Classifier train(String[][] strArr, String[] strArr2, double d, File file) throws IOException {
        return train(new PipeExtendedIterator(new ArrayDataAndTargetIterator(strArr, strArr2), new CharSequenceArray2TokenSequence()), d, file);
    }

    public static Classifier train(Iterator<Instance> it, double d, File file) throws IOException {
        Alphabet alphabet = new Alphabet();
        LabelAlphabet labelAlphabet = new LabelAlphabet();
        InstanceList instanceList = new InstanceList(new SerialPipes(new Pipe[]{new Target2Label(labelAlphabet), new TokenSequence2FeatureSequence(alphabet), new FeatureSequence2FeatureVector()}));
        instanceList.addThruPipe(it);
        logger.info("# features = " + alphabet.size());
        logger.info("# labels = " + labelAlphabet.size());
        logger.info("# training instances = " + instanceList.size());
        MaxEnt train = new MaxEntTrainer(d).train(instanceList);
        logger.info("The training accuracy is " + train.getAccuracy(instanceList));
        alphabet.stopGrowth();
        if (file != null) {
            ObjectOutputStream objectOutputStream = new ObjectOutputStream(new FileOutputStream(file));
            objectOutputStream.writeObject(train);
            objectOutputStream.close();
        }
        return train;
    }

    public static double test(Classifier classifier, String[][] strArr, String[] strArr2) {
        return test(classifier, new PipeExtendedIterator(new ArrayDataAndTargetIterator(strArr, strArr2), new CharSequenceArray2TokenSequence()));
    }

    public static double test(Classifier classifier, Iterator<Instance> it) {
        InstanceList instanceList = new InstanceList(classifier.getInstancePipe());
        instanceList.addThruPipe(it);
        logger.info("# test instances = " + instanceList.size());
        return classifier.getAccuracy(instanceList);
    }

    public static Classification classify(Classifier classifier, String[] strArr) {
        return classifier.classify(new Instance(new TokenSequence(strArr), null, null, null));
    }

    public static Classification[] classify(Classifier classifier, String[][] strArr) {
        return classify(classifier, new PipeExtendedIterator(new ArrayIterator(strArr), new CharSequenceArray2TokenSequence()));
    }

    public static Classification[] classify(Classifier classifier, Iterator<Instance> it) {
        InstanceList instanceList = new InstanceList(classifier.getInstancePipe());
        instanceList.addThruPipe(it);
        logger.info("# unlabeled instances = " + instanceList.size());
        return (Classification[]) classifier.classify(instanceList).toArray(new Classification[0]);
    }

    public static Classifier load(File file) throws IOException, ClassNotFoundException {
        ObjectInputStream objectInputStream = new ObjectInputStream(new FileInputStream(file));
        Classifier classifier = (Classifier) objectInputStream.readObject();
        objectInputStream.close();
        return classifier;
    }

    private static void internalTest() throws IOException {
        Classifier train = train(internalData, internalTargets, 1.0d, null);
        System.out.println("Training accuracy = " + test(train, internalData, internalTargets));
        Labeling labeling = classify(train, internalInstance).getLabeling();
        LabelAlphabet labelAlphabet = labeling.getLabelAlphabet();
        for (int i = 0; i < labelAlphabet.size(); i++) {
            System.out.print(labelAlphabet.lookupObject(i) + " " + labeling.value(i) + " ");
        }
        System.out.println();
    }

    private static InputStreamReader getReader(File file, String str) throws IOException {
        return str != null ? new InputStreamReader(new FileInputStream(file), str) : new FileReader(file);
    }

    public static void main(String[] strArr) throws Exception {
        Classifier classifier = null;
        CharSequence2TokenSequence charSequence2TokenSequence = new CharSequence2TokenSequence(new CharSequenceLexer(CharSequenceLexer.LEX_NONWHITESPACE_TOGETHER));
        Pattern compile = Pattern.compile("^\\s*(\\S+)\\s*(.*)\\s*$");
        Pattern compile2 = Pattern.compile("^\\s*(.*)\\s*$");
        commandOptions.process(strArr);
        if (internalTestOption.value) {
            internalTest();
        }
        if (trainOption.value != null) {
            classifier = train(new PipeExtendedIterator(new LineIterator(getReader(trainOption.value, encodingOption.value), compile, 2, 1, -1), charSequence2TokenSequence), gaussianVarianceOption.value, modelOption.value);
        } else if (modelOption.value != null) {
            classifier = load(modelOption.value);
        }
        if (classifier != null) {
            if (testOption.value != null) {
                System.out.println("The testing accuracy is " + test(classifier, new PipeExtendedIterator(new LineIterator(getReader(testOption.value, encodingOption.value), compile, 2, 1, -1), charSequence2TokenSequence)));
            }
            if (classifyOption.value != null) {
                classifier.getInstancePipe().setTargetProcessing(false);
                for (Classification classification : classify(classifier, new PipeExtendedIterator(new LineIterator(getReader(classifyOption.value, encodingOption.value), compile2, 1, -1, -1), charSequence2TokenSequence))) {
                    Labeling labeling = classification.getLabeling();
                    LabelAlphabet labelAlphabet = labeling.getLabelAlphabet();
                    for (int i = 0; i < labelAlphabet.size(); i++) {
                        System.out.print(labelAlphabet.lookupObject(i) + " " + labeling.value(i) + " ");
                    }
                    System.out.println();
                }
            }
        }
    }
}
