/*
 * Decompiled with CFR 0.152.
 */
package opennlp.tools.eval;

import java.io.File;
import java.io.IOException;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
import opennlp.tools.cmdline.parser.ParserTool;
import opennlp.tools.eval.AbstractEvalTest;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.parser.Parse;
import opennlp.tools.parser.Parser;
import opennlp.tools.parser.ParserFactory;
import opennlp.tools.parser.ParserModel;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSSample;
import opennlp.tools.postag.POSTagFormat;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.tokenize.WhitespaceTokenizer;
import opennlp.tools.util.FilterObjectStream;
import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.MarkableFileInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

public class SourceForgeModelEval
extends AbstractEvalTest {
    @BeforeAll
    static void verifyTrainingData() throws Exception {
        SourceForgeModelEval.verifyTrainingData(new LeipzigTestSampleStream(25, (Tokenizer)SimpleTokenizer.INSTANCE, (InputStreamFactory)new MarkableFileInputStreamFactory(new File(SourceForgeModelEval.getOpennlpDataDir(), "leipzig/eng_news_2010_300K-sentences.txt"))), new BigInteger("172812413483919324675263268750583851712"));
    }

    @Test
    void evalSentenceModel() throws Exception {
        String[] sentences;
        SentenceModel model = new SentenceModel(new File(SourceForgeModelEval.getOpennlpDataDir(), "models-sf/en-sent.bin"));
        MessageDigest digest = MessageDigest.getInstance("MD5");
        SentenceDetectorME sentenceDetector = new SentenceDetectorME(model);
        StringBuilder text = new StringBuilder();
        try (LeipzigTestSampleStream lineBatches = new LeipzigTestSampleStream(25, (Tokenizer)SimpleTokenizer.INSTANCE, (InputStreamFactory)new MarkableFileInputStreamFactory(new File(SourceForgeModelEval.getOpennlpDataDir(), "leipzig/eng_news_2010_300K-sentences.txt")));){
            LeipzigTestSample lineBatch;
            while ((lineBatch = (LeipzigTestSample)lineBatches.read()) != null) {
                text.append(String.join((CharSequence)" ", lineBatch.getText())).append(" ");
            }
        }
        for (String sentence : sentences = sentenceDetector.sentDetect((CharSequence)text.toString())) {
            digest.update(sentence.getBytes(StandardCharsets.UTF_8));
        }
        Assertions.assertEquals((Object)new BigInteger("228544068397077998410949364710969159291"), (Object)new BigInteger(1, digest.digest()));
    }

    @Test
    void evalTokenModel() throws Exception {
        TokenizerModel model = new TokenizerModel(new File(SourceForgeModelEval.getOpennlpDataDir(), "models-sf/en-token.bin"));
        MessageDigest digest = MessageDigest.getInstance("MD5");
        TokenizerME tokenizer = new TokenizerME(model);
        try (LeipzigTestSampleStream lines = new LeipzigTestSampleStream(1, (Tokenizer)WhitespaceTokenizer.INSTANCE, (InputStreamFactory)new MarkableFileInputStreamFactory(new File(SourceForgeModelEval.getOpennlpDataDir(), "leipzig/eng_news_2010_300K-sentences.txt")));){
            LeipzigTestSample line;
            while ((line = (LeipzigTestSample)lines.read()) != null) {
                String[] tokens;
                for (String token : tokens = tokenizer.tokenize(String.join((CharSequence)" ", line.getText()))) {
                    digest.update(token.getBytes(StandardCharsets.UTF_8));
                }
            }
        }
        Assertions.assertEquals((Object)new BigInteger("180602607571756839321060482558626151930"), (Object)new BigInteger(1, digest.digest()));
    }

    private ObjectStream<LeipzigTestSample> createLineWiseStream() throws IOException {
        return new LeipzigTestSampleStream(1, (Tokenizer)SimpleTokenizer.INSTANCE, (InputStreamFactory)new MarkableFileInputStreamFactory(new File(SourceForgeModelEval.getOpennlpDataDir(), "leipzig/eng_news_2010_300K-sentences.txt")));
    }

    private void evalNameFinder(TokenNameFinderModel model, BigInteger expectedHash) throws Exception {
        MessageDigest digest = MessageDigest.getInstance("MD5");
        NameFinderME nameFinder = new NameFinderME(model);
        try (ObjectStream<LeipzigTestSample> lines = this.createLineWiseStream();){
            LeipzigTestSample line;
            while ((line = (LeipzigTestSample)lines.read()) != null) {
                Span[] names;
                for (Span name : names = nameFinder.find(line.getText())) {
                    digest.update((name.getType() + name.getStart() + name.getEnd()).getBytes(StandardCharsets.UTF_8));
                }
            }
        }
        Assertions.assertEquals((Object)expectedHash, (Object)new BigInteger(1, digest.digest()));
    }

    @Test
    void evalNerDateModel() throws Exception {
        TokenNameFinderModel personModel = new TokenNameFinderModel(new File(SourceForgeModelEval.getOpennlpDataDir(), "models-sf/en-ner-date.bin"));
        this.evalNameFinder(personModel, new BigInteger("116570003910213570906062355532299200317"));
    }

    @Test
    void evalNerLocationModel() throws Exception {
        TokenNameFinderModel personModel = new TokenNameFinderModel(new File(SourceForgeModelEval.getOpennlpDataDir(), "models-sf/en-ner-location.bin"));
        this.evalNameFinder(personModel, new BigInteger("44810593886021404716125849669208680993"));
    }

    @Test
    void evalNerMoneyModel() throws Exception {
        TokenNameFinderModel personModel = new TokenNameFinderModel(new File(SourceForgeModelEval.getOpennlpDataDir(), "models-sf/en-ner-money.bin"));
        this.evalNameFinder(personModel, new BigInteger("65248897509365807977219790824670047287"));
    }

    @Test
    void evalNerOrganizationModel() throws Exception {
        TokenNameFinderModel personModel = new TokenNameFinderModel(new File(SourceForgeModelEval.getOpennlpDataDir(), "models-sf/en-ner-organization.bin"));
        this.evalNameFinder(personModel, new BigInteger("50454559690338630659278005157657197233"));
    }

    @Test
    void evalNerPercentageModel() throws Exception {
        TokenNameFinderModel personModel = new TokenNameFinderModel(new File(SourceForgeModelEval.getOpennlpDataDir(), "models-sf/en-ner-percentage.bin"));
        this.evalNameFinder(personModel, new BigInteger("320996882594215344113023719117249515343"));
    }

    @Test
    void evalNerPersonModel() throws Exception {
        TokenNameFinderModel personModel = new TokenNameFinderModel(new File(SourceForgeModelEval.getOpennlpDataDir(), "models-sf/en-ner-person.bin"));
        this.evalNameFinder(personModel, new BigInteger("143619582249937129618340838626447763744"));
    }

    @Test
    void evalNerTimeModel() throws Exception {
        TokenNameFinderModel personModel = new TokenNameFinderModel(new File(SourceForgeModelEval.getOpennlpDataDir(), "models-sf/en-ner-time.bin"));
        this.evalNameFinder(personModel, new BigInteger("282941772380683328816791801782579055940"));
    }

    @Test
    void evalChunkerModel() throws Exception {
        MessageDigest digest = MessageDigest.getInstance("MD5");
        POSTaggerME tagger = new POSTaggerME(new POSModel(new File(SourceForgeModelEval.getOpennlpDataDir(), "models-sf/en-pos-perceptron.bin")), POSTagFormat.PENN);
        ChunkerME chunker = new ChunkerME(new ChunkerModel(new File(SourceForgeModelEval.getOpennlpDataDir(), "models-sf/en-chunker.bin")));
        try (ObjectStream<LeipzigTestSample> lines = this.createLineWiseStream();){
            LeipzigTestSample line;
            while ((line = (LeipzigTestSample)lines.read()) != null) {
                String[] chunks;
                POSSample sentence = new POSSample(line.getText(), tagger.tag(line.getText()));
                for (String chunk : chunks = chunker.chunk(sentence.getSentence(), sentence.getTags())) {
                    digest.update(chunk.getBytes(StandardCharsets.UTF_8));
                }
            }
        }
        Assertions.assertEquals((Object)new BigInteger("304922886851384639120257052245406261332"), (Object)new BigInteger(1, digest.digest()));
    }

    private void evalPosModel(POSModel model, BigInteger expectedHash) throws Exception {
        MessageDigest digest = MessageDigest.getInstance("MD5");
        POSTaggerME tagger = new POSTaggerME(model, POSTagFormat.PENN);
        try (ObjectStream<LeipzigTestSample> lines = this.createLineWiseStream();){
            LeipzigTestSample line;
            while ((line = (LeipzigTestSample)lines.read()) != null) {
                String[] tags;
                for (String tag : tags = tagger.tag(line.getText())) {
                    digest.update(tag.getBytes(StandardCharsets.UTF_8));
                }
            }
        }
        Assertions.assertEquals((Object)expectedHash, (Object)new BigInteger(1, digest.digest()));
    }

    @Test
    void evalMaxentModel() throws Exception {
        POSModel maxentModel = new POSModel(new File(SourceForgeModelEval.getOpennlpDataDir(), "models-sf/en-pos-maxent.bin"));
        this.evalPosModel(maxentModel, new BigInteger("231995214522232523777090597594904492687"));
    }

    @Test
    void evalPerceptronModel() throws Exception {
        POSModel perceptronModel = new POSModel(new File(SourceForgeModelEval.getOpennlpDataDir(), "models-sf/en-pos-perceptron.bin"));
        this.evalPosModel(perceptronModel, new BigInteger("209440430718727101220960491543652921728"));
    }

    @Test
    void evalParserModel() throws Exception {
        ParserModel model = new ParserModel(new File(SourceForgeModelEval.getOpennlpDataDir(), "models-sf/en-parser-chunking.bin"));
        MessageDigest digest = MessageDigest.getInstance("MD5");
        Parser parser = ParserFactory.create((ParserModel)model);
        try (ObjectStream<LeipzigTestSample> lines = this.createLineWiseStream();){
            LeipzigTestSample line;
            while ((line = (LeipzigTestSample)lines.read()) != null) {
                Parse[] parse = ParserTool.parseLine((String)String.join((CharSequence)" ", line.getText()), (Parser)parser, (int)1);
                if (parse.length > 0) {
                    StringBuffer sb = new StringBuffer();
                    parse[0].show(sb);
                    digest.update(sb.toString().getBytes(StandardCharsets.UTF_8));
                    continue;
                }
                digest.update("empty".getBytes(StandardCharsets.UTF_8));
            }
        }
        Assertions.assertEquals((Object)new BigInteger("68039262350771988792233880373220954061"), (Object)new BigInteger(1, digest.digest()));
    }

    private static class LeipzigTestSampleStream
    extends FilterObjectStream<String, LeipzigTestSample> {
        private final int sentencePerDocument;
        private final Tokenizer tokenizer;

        private LeipzigTestSampleStream(int sentencePerDocument, Tokenizer tokenizer, InputStreamFactory in) throws IOException {
            super((ObjectStream)new PlainTextByLineStream(in, StandardCharsets.UTF_8));
            this.sentencePerDocument = sentencePerDocument;
            this.tokenizer = tokenizer;
        }

        public LeipzigTestSample read() throws IOException {
            String line;
            ArrayList<String> tokensList = new ArrayList<String>();
            for (int count = 0; count < this.sentencePerDocument && (line = (String)this.samples.read()) != null; ++count) {
                String[] tokens = this.tokenizer.tokenize(line);
                if (tokens.length == 0) {
                    throw new IOException("Empty lines are not allowed!");
                }
                tokensList.addAll(Arrays.asList(tokens).subList(1, tokens.length));
            }
            if (!tokensList.isEmpty()) {
                return new LeipzigTestSample(tokensList.toArray(new String[0]));
            }
            return null;
        }
    }

    private static class LeipzigTestSample {
        private final List<String> text;

        private LeipzigTestSample(String[] text) {
            Objects.requireNonNull(text, "text must not be null");
            this.text = List.of(text);
        }

        public String[] getText() {
            return this.text.toArray(new String[0]);
        }

        public String toString() {
            StringBuilder sampleString = new StringBuilder("eng");
            sampleString.append('\t');
            for (String s : this.text) {
                sampleString.append(s).append(' ');
            }
            if (!sampleString.isEmpty()) {
                sampleString.setLength(sampleString.length() - 1);
            }
            return sampleString.toString();
        }
    }
}

