/*
 * Decompiled with CFR 0.152.
 */
package de.datexis.ner.exec;

import de.datexis.common.CommandLineParser;
import de.datexis.common.Resource;
import de.datexis.common.WordHelpers;
import de.datexis.encoder.Encoder;
import de.datexis.encoder.impl.PositionEncoder;
import de.datexis.encoder.impl.SurfaceEncoder;
import de.datexis.encoder.impl.TrigramEncoder;
import de.datexis.model.Annotation;
import de.datexis.model.Dataset;
import de.datexis.ner.MentionAnnotator;
import de.datexis.ner.reader.CoNLLDatasetReader;
import java.io.IOException;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.ParseException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class TrainMentionAnnotatorCoNLL {
    protected static final Logger log = LoggerFactory.getLogger(TrainMentionAnnotatorCoNLL.class);

    public static void main(String[] args) throws IOException {
        ExecParams params = new ExecParams();
        CommandLineParser parser = new CommandLineParser((CommandLineParser.Options)params);
        try {
            parser.parse(args);
            new TrainMentionAnnotatorCoNLL().runTraining(params);
            System.exit(0);
        }
        catch (ParseException e) {
            HelpFormatter formatter = new HelpFormatter();
            formatter.printHelp("texoo-train-ner", "TeXoo: train MentionAnnotator with CoNLL annotations", params.setUpCliOptions(), "", true);
            System.exit(1);
        }
    }

    protected void runTraining(ExecParams params) throws IOException {
        Resource trainingPath = Resource.fromDirectory((String)params.trainingPath);
        Resource output = Resource.fromDirectory((String)params.outputPath);
        WordHelpers.Language lang = WordHelpers.getLanguage((String)params.language);
        Dataset train = CoNLLDatasetReader.readDataset(trainingPath, trainingPath.getFileName(), CoNLLDatasetReader.Charset.UTF_8);
        MentionAnnotator ner = new MentionAnnotator.Builder().withEncoders("tri", new Encoder[]{new PositionEncoder(), new SurfaceEncoder(), new TrigramEncoder()}).enableTrainingUI(params.trainingUI).pretrain(train).build();
        ner.trainModel(train, Annotation.Source.GOLD, lang, -1, false, true);
        ner.writeModel(output);
    }

    protected static class ExecParams
    implements CommandLineParser.Options {
        protected String trainingPath;
        protected String validationPath;
        protected String testPath;
        protected String outputPath;
        protected String language;
        protected boolean trainingUI = false;

        protected ExecParams() {
        }

        public void setParams(CommandLine parse) {
            this.trainingPath = parse.getOptionValue("i");
            this.validationPath = parse.getOptionValue("v");
            this.testPath = parse.getOptionValue("t");
            this.outputPath = parse.getOptionValue("o");
            this.trainingUI = parse.hasOption("u");
            this.language = parse.getOptionValue("l", "en");
        }

        protected void TrainMentionAnnotatorCoNLL() {
        }

        public Options setUpCliOptions() {
            Options op = new Options();
            op.addRequiredOption("i", "input", true, "path to input training data (CoNLL format)");
            op.addOption("v", "validation", true, "path to validation data (CoNLL format)");
            op.addOption("t", "test", true, "path to test data (CoNLL format)");
            op.addRequiredOption("o", "output", true, "path to create and store the model");
            op.addOption("l", "language", true, "language to use for sentence splitting and stopwords (EN or DE)");
            op.addOption("u", "ui", false, "enable training UI (http://127.0.0.1:9000)");
            return op;
        }
    }
}

