/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.tokenize;

import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import opennlp.tools.formats.ResourceAsStreamFactory;
import opennlp.tools.tokenize.TokenSample;
import opennlp.tools.tokenize.TokenSampleStream;
import opennlp.tools.tokenize.TokenizerFactory;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.util.CollectionObjectStream;
import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
import opennlp.tools.util.TrainingParameters;

public class TokenizerTestUtil {
    static TokenizerModel createSimpleMaxentTokenModel() throws IOException {
        ArrayList<TokenSample> samples = new ArrayList<TokenSample>();
        samples.add(new TokenSample("year", new Span[]{new Span(0, 4)}));
        samples.add(new TokenSample("year,", new Span[]{new Span(0, 4), new Span(4, 5)}));
        samples.add(new TokenSample("it,", new Span[]{new Span(0, 2), new Span(2, 3)}));
        samples.add(new TokenSample("it", new Span[]{new Span(0, 2)}));
        samples.add(new TokenSample("yes", new Span[]{new Span(0, 3)}));
        samples.add(new TokenSample("yes,", new Span[]{new Span(0, 3), new Span(3, 4)}));
        TrainingParameters mlParams = new TrainingParameters();
        mlParams.put("Iterations", 100);
        mlParams.put("Cutoff", 0);
        return TokenizerME.train((ObjectStream)new CollectionObjectStream(samples), (TokenizerFactory)TokenizerFactory.create(null, (String)"eng", null, (boolean)true, null), (TrainingParameters)mlParams);
    }

    static TokenizerModel createMaxentTokenModel() throws IOException {
        ResourceAsStreamFactory trainDataIn = new ResourceAsStreamFactory(TokenizerModel.class, "/opennlp/tools/tokenize/token.train");
        TokenSampleStream samples = new TokenSampleStream((ObjectStream)new PlainTextByLineStream((InputStreamFactory)trainDataIn, StandardCharsets.UTF_8));
        TrainingParameters mlParams = new TrainingParameters();
        mlParams.put("Iterations", 100);
        mlParams.put("Cutoff", 0);
        return TokenizerME.train((ObjectStream)samples, (TokenizerFactory)TokenizerFactory.create(null, (String)"eng", null, (boolean)true, null), (TrainingParameters)mlParams);
    }
}

