package opennlp.tools.eval;

import java.io.File;
import java.io.IOException;
import java.math.BigInteger;
import java.nio.charset.StandardCharsets;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Objects;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
import opennlp.tools.cmdline.parser.ParserTool;
import opennlp.tools.namefind.NameFinderME;
import opennlp.tools.namefind.TokenNameFinderModel;
import opennlp.tools.parser.Parse;
import opennlp.tools.parser.Parser;
import opennlp.tools.parser.ParserFactory;
import opennlp.tools.parser.ParserModel;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSSample;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.tokenize.TokenizerME;
import opennlp.tools.tokenize.TokenizerModel;
import opennlp.tools.tokenize.WhitespaceTokenizer;
import opennlp.tools.util.FilterObjectStream;
import opennlp.tools.util.InputStreamFactory;
import opennlp.tools.util.MarkableFileInputStreamFactory;
import opennlp.tools.util.ObjectStream;
import opennlp.tools.util.PlainTextByLineStream;
import opennlp.tools.util.Span;
import org.junit.jupiter.api.Assertions;
import org.junit.jupiter.api.BeforeAll;
import org.junit.jupiter.api.Test;

/* loaded from: input_file:opennlp/tools/eval/SourceForgeModelEval.class */
public class SourceForgeModelEval extends AbstractEvalTest {

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:opennlp/tools/eval/SourceForgeModelEval$LeipzigTestSample.class */
    public static class LeipzigTestSample {
        private final List<String> text;

        private LeipzigTestSample(String[] strArr) {
            Objects.requireNonNull(strArr, "text must not be null");
            this.text = Collections.unmodifiableList(new ArrayList(Arrays.asList(strArr)));
        }

        public String[] getText() {
            return (String[]) this.text.toArray(new String[0]);
        }

        public String toString() {
            StringBuilder sb = new StringBuilder("eng");
            sb.append('\t');
            Iterator<String> it = this.text.iterator();
            while (it.hasNext()) {
                sb.append(it.next()).append(' ');
            }
            if (sb.length() > 0) {
                sb.setLength(sb.length() - 1);
            }
            return sb.toString();
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:opennlp/tools/eval/SourceForgeModelEval$LeipzigTestSampleStream.class */
    public static class LeipzigTestSampleStream extends FilterObjectStream<String, LeipzigTestSample> {
        private final int sentencePerDocument;
        private final Tokenizer tokenizer;

        private LeipzigTestSampleStream(int i, Tokenizer tokenizer, InputStreamFactory inputStreamFactory) throws IOException {
            super(new PlainTextByLineStream(inputStreamFactory, StandardCharsets.UTF_8));
            this.sentencePerDocument = i;
            this.tokenizer = tokenizer;
        }

        /* renamed from: read, reason: merged with bridge method [inline-methods] */
        public LeipzigTestSample m6read() throws IOException {
            String str;
            ArrayList arrayList = new ArrayList();
            for (int i = 0; i < this.sentencePerDocument && (str = (String) this.samples.read()) != null; i++) {
                String[] strArr = this.tokenizer.tokenize(str);
                if (strArr.length == 0) {
                    throw new IOException("Empty lines are not allowed!");
                }
                arrayList.addAll(Arrays.asList(strArr).subList(1, strArr.length));
            }
            if (arrayList.size() > 0) {
                return new LeipzigTestSample((String[]) arrayList.toArray(new String[0]));
            }
            return null;
        }
    }

    @BeforeAll
    static void verifyTrainingData() throws Exception {
        verifyTrainingData(new LeipzigTestSampleStream(25, SimpleTokenizer.INSTANCE, new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(), "leipzig/eng_news_2010_300K-sentences.txt"))), new BigInteger("172812413483919324675263268750583851712"));
    }

    @Test
    void evalSentenceModel() throws Exception {
        SentenceModel sentenceModel = new SentenceModel(new File(getOpennlpDataDir(), "models-sf/en-sent.bin"));
        MessageDigest messageDigest = MessageDigest.getInstance(AbstractEvalTest.HASH_ALGORITHM);
        SentenceDetectorME sentenceDetectorME = new SentenceDetectorME(sentenceModel);
        StringBuilder sb = new StringBuilder();
        LeipzigTestSampleStream leipzigTestSampleStream = new LeipzigTestSampleStream(25, SimpleTokenizer.INSTANCE, new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(), "leipzig/eng_news_2010_300K-sentences.txt")));
        while (true) {
            try {
                LeipzigTestSample leipzigTestSample = (LeipzigTestSample) leipzigTestSampleStream.read();
                if (leipzigTestSample == null) {
                    break;
                } else {
                    sb.append(String.join(" ", leipzigTestSample.getText())).append(" ");
                }
            } catch (Throwable th) {
                try {
                    leipzigTestSampleStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
                throw th;
            }
        }
        leipzigTestSampleStream.close();
        for (String str : sentenceDetectorME.sentDetect(sb.toString())) {
            messageDigest.update(str.getBytes(StandardCharsets.UTF_8));
        }
        Assertions.assertEquals(new BigInteger("228544068397077998410949364710969159291"), new BigInteger(1, messageDigest.digest()));
    }

    @Test
    void evalTokenModel() throws Exception {
        TokenizerModel tokenizerModel = new TokenizerModel(new File(getOpennlpDataDir(), "models-sf/en-token.bin"));
        MessageDigest messageDigest = MessageDigest.getInstance(AbstractEvalTest.HASH_ALGORITHM);
        TokenizerME tokenizerME = new TokenizerME(tokenizerModel);
        LeipzigTestSampleStream leipzigTestSampleStream = new LeipzigTestSampleStream(1, WhitespaceTokenizer.INSTANCE, new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(), "leipzig/eng_news_2010_300K-sentences.txt")));
        while (true) {
            try {
                LeipzigTestSample leipzigTestSample = (LeipzigTestSample) leipzigTestSampleStream.read();
                if (leipzigTestSample == null) {
                    leipzigTestSampleStream.close();
                    Assertions.assertEquals(new BigInteger("180602607571756839321060482558626151930"), new BigInteger(1, messageDigest.digest()));
                    return;
                }
                for (String str : tokenizerME.tokenize(String.join(" ", leipzigTestSample.getText()))) {
                    messageDigest.update(str.getBytes(StandardCharsets.UTF_8));
                }
            } catch (Throwable th) {
                try {
                    leipzigTestSampleStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
                throw th;
            }
        }
    }

    private ObjectStream<LeipzigTestSample> createLineWiseStream() throws IOException {
        return new LeipzigTestSampleStream(1, SimpleTokenizer.INSTANCE, new MarkableFileInputStreamFactory(new File(getOpennlpDataDir(), "leipzig/eng_news_2010_300K-sentences.txt")));
    }

    private void evalNameFinder(TokenNameFinderModel tokenNameFinderModel, BigInteger bigInteger) throws Exception {
        MessageDigest messageDigest = MessageDigest.getInstance(AbstractEvalTest.HASH_ALGORITHM);
        NameFinderME nameFinderME = new NameFinderME(tokenNameFinderModel);
        ObjectStream<LeipzigTestSample> createLineWiseStream = createLineWiseStream();
        while (true) {
            try {
                LeipzigTestSample leipzigTestSample = (LeipzigTestSample) createLineWiseStream.read();
                if (leipzigTestSample == null) {
                    break;
                }
                for (Span span : nameFinderME.find(leipzigTestSample.getText())) {
                    messageDigest.update((span.getType() + span.getStart() + span.getEnd()).getBytes(StandardCharsets.UTF_8));
                }
            } catch (Throwable th) {
                if (createLineWiseStream != null) {
                    try {
                        createLineWiseStream.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
                throw th;
            }
        }
        if (createLineWiseStream != null) {
            createLineWiseStream.close();
        }
        Assertions.assertEquals(bigInteger, new BigInteger(1, messageDigest.digest()));
    }

    @Test
    void evalNerDateModel() throws Exception {
        evalNameFinder(new TokenNameFinderModel(new File(getOpennlpDataDir(), "models-sf/en-ner-date.bin")), new BigInteger("116570003910213570906062355532299200317"));
    }

    @Test
    void evalNerLocationModel() throws Exception {
        evalNameFinder(new TokenNameFinderModel(new File(getOpennlpDataDir(), "models-sf/en-ner-location.bin")), new BigInteger("44810593886021404716125849669208680993"));
    }

    @Test
    void evalNerMoneyModel() throws Exception {
        evalNameFinder(new TokenNameFinderModel(new File(getOpennlpDataDir(), "models-sf/en-ner-money.bin")), new BigInteger("65248897509365807977219790824670047287"));
    }

    @Test
    void evalNerOrganizationModel() throws Exception {
        evalNameFinder(new TokenNameFinderModel(new File(getOpennlpDataDir(), "models-sf/en-ner-organization.bin")), new BigInteger("50454559690338630659278005157657197233"));
    }

    @Test
    void evalNerPercentageModel() throws Exception {
        evalNameFinder(new TokenNameFinderModel(new File(getOpennlpDataDir(), "models-sf/en-ner-percentage.bin")), new BigInteger("320996882594215344113023719117249515343"));
    }

    @Test
    void evalNerPersonModel() throws Exception {
        evalNameFinder(new TokenNameFinderModel(new File(getOpennlpDataDir(), "models-sf/en-ner-person.bin")), new BigInteger("143619582249937129618340838626447763744"));
    }

    @Test
    void evalNerTimeModel() throws Exception {
        evalNameFinder(new TokenNameFinderModel(new File(getOpennlpDataDir(), "models-sf/en-ner-time.bin")), new BigInteger("282941772380683328816791801782579055940"));
    }

    @Test
    void evalChunkerModel() throws Exception {
        MessageDigest messageDigest = MessageDigest.getInstance(AbstractEvalTest.HASH_ALGORITHM);
        POSTaggerME pOSTaggerME = new POSTaggerME(new POSModel(new File(getOpennlpDataDir(), "models-sf/en-pos-perceptron.bin")));
        ChunkerME chunkerME = new ChunkerME(new ChunkerModel(new File(getOpennlpDataDir(), "models-sf/en-chunker.bin")));
        ObjectStream<LeipzigTestSample> createLineWiseStream = createLineWiseStream();
        while (true) {
            try {
                LeipzigTestSample leipzigTestSample = (LeipzigTestSample) createLineWiseStream.read();
                if (leipzigTestSample == null) {
                    break;
                }
                POSSample pOSSample = new POSSample(leipzigTestSample.getText(), pOSTaggerME.tag(leipzigTestSample.getText()));
                for (String str : chunkerME.chunk(pOSSample.getSentence(), pOSSample.getTags())) {
                    messageDigest.update(str.getBytes(StandardCharsets.UTF_8));
                }
            } catch (Throwable th) {
                if (createLineWiseStream != null) {
                    try {
                        createLineWiseStream.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
                throw th;
            }
        }
        if (createLineWiseStream != null) {
            createLineWiseStream.close();
        }
        Assertions.assertEquals(new BigInteger("304922886851384639120257052245406261332"), new BigInteger(1, messageDigest.digest()));
    }

    private void evalPosModel(POSModel pOSModel, BigInteger bigInteger) throws Exception {
        MessageDigest messageDigest = MessageDigest.getInstance(AbstractEvalTest.HASH_ALGORITHM);
        POSTaggerME pOSTaggerME = new POSTaggerME(pOSModel);
        ObjectStream<LeipzigTestSample> createLineWiseStream = createLineWiseStream();
        while (true) {
            try {
                LeipzigTestSample leipzigTestSample = (LeipzigTestSample) createLineWiseStream.read();
                if (leipzigTestSample == null) {
                    break;
                }
                for (String str : pOSTaggerME.tag(leipzigTestSample.getText())) {
                    messageDigest.update(str.getBytes(StandardCharsets.UTF_8));
                }
            } catch (Throwable th) {
                if (createLineWiseStream != null) {
                    try {
                        createLineWiseStream.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
                throw th;
            }
        }
        if (createLineWiseStream != null) {
            createLineWiseStream.close();
        }
        Assertions.assertEquals(bigInteger, new BigInteger(1, messageDigest.digest()));
    }

    @Test
    void evalMaxentModel() throws Exception {
        evalPosModel(new POSModel(new File(getOpennlpDataDir(), "models-sf/en-pos-maxent.bin")), new BigInteger("231995214522232523777090597594904492687"));
    }

    @Test
    void evalPerceptronModel() throws Exception {
        evalPosModel(new POSModel(new File(getOpennlpDataDir(), "models-sf/en-pos-perceptron.bin")), new BigInteger("209440430718727101220960491543652921728"));
    }

    @Test
    void evalParserModel() throws Exception {
        ParserModel parserModel = new ParserModel(new File(getOpennlpDataDir(), "models-sf/en-parser-chunking.bin"));
        MessageDigest messageDigest = MessageDigest.getInstance(AbstractEvalTest.HASH_ALGORITHM);
        Parser create = ParserFactory.create(parserModel);
        ObjectStream<LeipzigTestSample> createLineWiseStream = createLineWiseStream();
        while (true) {
            try {
                LeipzigTestSample leipzigTestSample = (LeipzigTestSample) createLineWiseStream.read();
                if (leipzigTestSample == null) {
                    break;
                }
                Parse[] parseLine = ParserTool.parseLine(String.join(" ", leipzigTestSample.getText()), create, 1);
                if (parseLine.length > 0) {
                    StringBuffer stringBuffer = new StringBuffer();
                    parseLine[0].show(stringBuffer);
                    messageDigest.update(stringBuffer.toString().getBytes(StandardCharsets.UTF_8));
                } else {
                    messageDigest.update("empty".getBytes(StandardCharsets.UTF_8));
                }
            } catch (Throwable th) {
                if (createLineWiseStream != null) {
                    try {
                        createLineWiseStream.close();
                    } catch (Throwable th2) {
                        th.addSuppressed(th2);
                    }
                }
                throw th;
            }
        }
        if (createLineWiseStream != null) {
            createLineWiseStream.close();
        }
        Assertions.assertEquals(new BigInteger("68039262350771988792233880373220954061"), new BigInteger(1, messageDigest.digest()));
    }
}
