package org.apache.stanbol.commons.opennlp;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import opennlp.tools.chunker.ChunkerME;
import opennlp.tools.chunker.ChunkerModel;
import opennlp.tools.postag.POSModel;
import opennlp.tools.postag.POSTaggerME;
import opennlp.tools.sentdetect.SentenceDetector;
import opennlp.tools.sentdetect.SentenceDetectorME;
import opennlp.tools.sentdetect.SentenceModel;
import opennlp.tools.tokenize.SimpleTokenizer;
import opennlp.tools.tokenize.Tokenizer;
import opennlp.tools.util.Sequence;
import opennlp.tools.util.Span;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/stanbol/commons/opennlp/TextAnalyzer.class */
public class TextAnalyzer {
    private static final Logger log = LoggerFactory.getLogger(TextAnalyzer.class);
    private final OpenNLP openNLP;
    private final TextAnalyzerConfig config;
    private POSTaggerME posTagger;
    private boolean posTaggerNotAvailable;
    private SentenceDetector sentenceDetector;
    private boolean sentenceDetectorNotAvailable;
    private ChunkerME chunker;
    private boolean chunkerNotAvailable;
    private PosTypeChunker posTypeChunker;
    private boolean posTypeChunkerNotAvailable;
    private Tokenizer tokenizer;
    private final String language;

    /* loaded from: input_file:org/apache/stanbol/commons/opennlp/TextAnalyzer$AnalysedText.class */
    public class AnalysedText {
        protected final String sentence;
        protected final List<Token> tokens;
        protected final List<Chunk> chunks;
        private final int offset;
        protected String language;

        /* loaded from: input_file:org/apache/stanbol/commons/opennlp/TextAnalyzer$AnalysedText$Chunk.class */
        public class Chunk {
            protected final Span span;
            protected final Span chunkSpan;
            protected final double probability;
            private String __text;
            private List<Token> __chunkTokens;

            private Chunk(Span span, double d) {
                this.chunkSpan = span;
                this.span = new Span(AnalysedText.this.tokens.get(span.getStart()).getStart(), AnalysedText.this.tokens.get(span.getEnd()).getEnd());
                this.probability = d;
            }

            public List<Token> getTokens() {
                if (this.__chunkTokens == null) {
                    this.__chunkTokens = AnalysedText.this.tokens.subList(this.chunkSpan.getStart(), this.chunkSpan.getEnd());
                }
                return this.__chunkTokens;
            }

            public int getStart() {
                return this.chunkSpan.getStart();
            }

            public int getEnd() {
                return this.chunkSpan.getEnd();
            }

            public int getSize() {
                return this.chunkSpan.length();
            }

            public double getProbability() {
                return this.probability;
            }

            public String getText() {
                if (this.__text == null) {
                    this.__text = this.span.getCoveredText(AnalysedText.this.sentence).toString();
                }
                return this.__text;
            }

            public String toString() {
                return getText();
            }
        }

        /* loaded from: input_file:org/apache/stanbol/commons/opennlp/TextAnalyzer$AnalysedText$Token.class */
        public class Token {
            protected final Span span;
            protected String token;
            protected final String[] posTags;
            protected final double[] posProbabilities;
            protected final boolean hasAlphaNumeric;

            private Token(AnalysedText analysedText, Span span, String str, String str2, double d) {
                this(span, str, new String[]{str2}, new double[]{d});
            }

            private Token(Span span, String str, String[] strArr, double[] dArr) {
                this.span = span;
                if (strArr == null || strArr.length < 1) {
                    this.posTags = null;
                } else {
                    this.posTags = strArr;
                }
                this.token = str;
                if (this.posTags == null) {
                    this.posProbabilities = null;
                } else {
                    if (strArr.length != dArr.length) {
                        throw new IllegalStateException("POS Tag array and POS probability array MUST BE of the same size!");
                    }
                    this.posProbabilities = dArr;
                }
                boolean z = false;
                for (int i = 0; !z && i < str.length(); i++) {
                    z = Character.isLetterOrDigit(str.charAt(i));
                }
                this.hasAlphaNumeric = z;
            }

            public int getStart() {
                return this.span.getStart();
            }

            public int getEnd() {
                return this.span.getEnd();
            }

            public String getPosTag() {
                if (this.posTags == null) {
                    return null;
                }
                return this.posTags[0];
            }

            public String[] getPosTags() {
                return this.posTags;
            }

            public double getPosProbability() {
                if (this.posProbabilities == null) {
                    return -1.0d;
                }
                return this.posProbabilities[0];
            }

            public double[] getPosProbabilities() {
                return this.posProbabilities;
            }

            public String getText() {
                if (this.token == null) {
                    this.token = this.span.getCoveredText(AnalysedText.this.sentence).toString();
                }
                return this.token;
            }

            public boolean hasAplhaNumericChar() {
                return this.hasAlphaNumeric;
            }

            public String toString() {
                String str;
                StringBuilder append = new StringBuilder().append(getText());
                if (this.posTags != null) {
                    str = '_' + (this.posTags.length == 1 ? this.posTags[0] : Arrays.toString(this.posTags));
                } else {
                    str = "";
                }
                return append.append(str).toString();
            }
        }

        private AnalysedText(TextAnalyzer textAnalyzer, String str, String str2) {
            this(str, str2, 0);
        }

        /* JADX WARN: Multi-variable type inference failed */
        /* JADX WARN: Type inference failed for: r0v51, types: [java.lang.String[]] */
        /* JADX WARN: Type inference failed for: r0v54, types: [double[]] */
        private AnalysedText(String str, String str2, int i) {
            String[][] strArr;
            double[][] dArr;
            Span[] spanArr;
            double[] dArr2;
            if (str == null || str.isEmpty()) {
                throw new IllegalArgumentException("The parsed Sentence MUST NOT be NULL nor empty!");
            }
            this.sentence = str;
            if (str2 == null || str2.isEmpty()) {
                throw new IllegalArgumentException("The parsed language MUST NOT be NULL nor empty");
            }
            this.language = str2;
            if (i < 0) {
                throw new IllegalArgumentException("The parsed offset MUST NOT be a negative number (offset=" + i + ")");
            }
            this.offset = i;
            Span[] spanArr2 = TextAnalyzer.this.getTokenizer().tokenizePos(str);
            POSTaggerME posTagger = TextAnalyzer.this.getPosTagger();
            ChunkerME chunker = TextAnalyzer.this.getChunker();
            PosTypeChunker posTypeChunker = TextAnalyzer.this.getPosTypeChunker();
            String[] strArr2 = new String[spanArr2.length];
            for (int i2 = 0; i2 < spanArr2.length; i2++) {
                strArr2[i2] = spanArr2[i2].getCoveredText(str).toString();
            }
            if (posTagger != null) {
                strArr = new String[strArr2.length];
                dArr = new double[strArr2.length];
                Sequence[] sequenceArr = posTagger.topKSequences(strArr2);
                String[] strArr3 = new String[sequenceArr.length];
                double[] dArr3 = new double[sequenceArr.length];
                for (int i3 = 0; i3 < spanArr2.length; i3++) {
                    boolean z = false;
                    int i4 = 0;
                    while (i4 < sequenceArr.length && !z) {
                        String str3 = (String) sequenceArr[i4].getOutcomes().get(i3);
                        z = i4 > 0 && str3.equals(strArr3[0]);
                        if (!z) {
                            strArr3[i4] = str3;
                            dArr3[i4] = sequenceArr[i4].getProbs()[i3];
                            i4++;
                        }
                    }
                    strArr[i3] = new String[i4];
                    System.arraycopy(strArr3, 0, strArr[i3], 0, i4);
                    dArr[i3] = new double[i4];
                    System.arraycopy(dArr3, 0, dArr[i3], 0, i4);
                }
                if (chunker != null) {
                    spanArr = chunker.chunkAsSpans(strArr2, (String[]) sequenceArr[0].getOutcomes().toArray(new String[strArr2.length]));
                    dArr2 = chunker.probs();
                } else if (posTypeChunker != null) {
                    spanArr = posTypeChunker.chunkAsSpans(strArr2, strArr, dArr);
                    dArr2 = new double[spanArr.length];
                    Arrays.fill(dArr2, 1.0d);
                } else {
                    spanArr = null;
                    dArr2 = null;
                }
            } else {
                strArr = (String[][]) null;
                dArr = (double[][]) null;
                spanArr = null;
                dArr2 = null;
            }
            ArrayList arrayList = new ArrayList(spanArr2.length);
            for (int i5 = 0; i5 < spanArr2.length; i5++) {
                arrayList.add(new Token(spanArr2[i5], strArr2[i5], strArr == null ? null : strArr[i5], dArr == null ? null : dArr[i5]));
            }
            this.tokens = Collections.unmodifiableList(arrayList);
            if (spanArr == null) {
                this.chunks = null;
                return;
            }
            ArrayList arrayList2 = new ArrayList(spanArr.length);
            for (int i6 = 0; i6 < spanArr.length; i6++) {
                arrayList2.add(new Chunk(spanArr[i6], dArr2[i6]));
            }
            this.chunks = Collections.unmodifiableList(arrayList2);
        }

        public List<Token> getTokens() {
            return this.tokens;
        }

        public List<Chunk> getChunks() {
            return this.chunks;
        }

        public String getText() {
            return this.sentence;
        }

        public String getLanguage() {
            return this.language;
        }

        public int getOffset() {
            return this.offset;
        }
    }

    /* loaded from: input_file:org/apache/stanbol/commons/opennlp/TextAnalyzer$TextAnalysisIterator.class */
    private final class TextAnalysisIterator implements Iterator<AnalysedText> {
        private final String text;
        private final Span[] sentenceSpans;
        private int current;
        private final String language;

        private TextAnalysisIterator(String str, String str2) {
            this.current = 0;
            this.text = str;
            this.language = str2;
            if (str == null || str.isEmpty()) {
                this.sentenceSpans = new Span[0];
                return;
            }
            SentenceDetector sentenceDetector = TextAnalyzer.this.getSentenceDetector();
            if (sentenceDetector != null) {
                this.sentenceSpans = sentenceDetector.sentPosDetect(str);
            } else {
                this.sentenceSpans = new Span[]{new Span(0, str.length())};
            }
        }

        @Override // java.util.Iterator
        public boolean hasNext() {
            return this.sentenceSpans.length > this.current;
        }

        /* JADX WARN: Can't rename method to resolve collision */
        @Override // java.util.Iterator
        public AnalysedText next() {
            Span span = this.sentenceSpans[this.current];
            String obj = span.getCoveredText(this.text).toString();
            this.current++;
            return new AnalysedText(obj, this.language, span.getStart());
        }

        @Override // java.util.Iterator
        public void remove() {
            throw new UnsupportedOperationException("Removal of Sentences of the prsed Text is not supported!");
        }
    }

    /* loaded from: input_file:org/apache/stanbol/commons/opennlp/TextAnalyzer$TextAnalyzerConfig.class */
    public static final class TextAnalyzerConfig {
        protected boolean forceSimpleTokenizer = false;
        protected boolean forceKeywordTokenizer = false;
        protected boolean enablePosTagger = true;
        protected boolean enableChunker = true;
        protected boolean enableSentenceDetector = true;
        protected boolean enablePosTypeChunker = true;
        protected boolean forcePosTypeChunker = true;
        private double minPosTagProbability = 0.75d;

        public final boolean isSimpleTokenizerForced() {
            return this.forceSimpleTokenizer;
        }

        public final void forceSimpleTokenizer(boolean z) {
            this.forceSimpleTokenizer = z;
            if (z) {
                this.forceKeywordTokenizer = false;
            }
        }

        public final boolean isKeywordTokenizerForced() {
            return this.forceKeywordTokenizer;
        }

        public final void forceKeywordTokenizer(boolean z) {
            this.forceKeywordTokenizer = z;
            if (z) {
                this.forceSimpleTokenizer = false;
            }
        }

        public final boolean isPosTaggerEnable() {
            return this.enablePosTagger;
        }

        public final void enablePosTagger(boolean z) {
            this.enablePosTagger = z;
        }

        public final boolean isChunkerEnabled() {
            return this.enableChunker;
        }

        public final void enableChunker(boolean z) {
            this.enableChunker = z;
        }

        public final boolean isSentenceDetectorEnabled() {
            return this.enableSentenceDetector;
        }

        public final void enableSentenceDetector(boolean z) {
            this.enableSentenceDetector = z;
        }

        public final boolean isPosTypeChunkerEnabled() {
            return this.enablePosTypeChunker;
        }

        public final void enablePosTypeChunker(boolean z) {
            this.enablePosTypeChunker = z;
            if (z) {
                return;
            }
            forcePosTypeChunker(z);
        }

        public final boolean isPosTypeChunkerForced() {
            return this.forcePosTypeChunker;
        }

        public final void forcePosTypeChunker(boolean z) {
            this.forcePosTypeChunker = z;
            if (z) {
                enablePosTypeChunker(true);
            }
        }

        public final double getMinPosTypeProbability() {
            return this.minPosTagProbability;
        }

        public final void setMinPosTagProbability(double d) {
            if (d > 1.0d) {
                throw new IllegalArgumentException("The minimum POS tag probability MUST be set to a value <= 1 (parsed:" + this.minPosTagProbability + "");
            }
            this.minPosTagProbability = d;
        }
    }

    public TextAnalyzer(OpenNLP openNLP, String str) {
        this(openNLP, str, null);
    }

    public TextAnalyzer(OpenNLP openNLP, String str, TextAnalyzerConfig textAnalyzerConfig) {
        if (openNLP == null) {
            throw new IllegalArgumentException("The OpenNLP component MUST NOT be NULL");
        }
        this.config = textAnalyzerConfig == null ? new TextAnalyzerConfig() : textAnalyzerConfig;
        this.openNLP = openNLP;
        this.language = str;
    }

    protected final POSTaggerME getPosTagger() {
        if (!this.config.enablePosTagger) {
            return null;
        }
        if (this.posTagger == null && !this.posTaggerNotAvailable) {
            try {
                POSModel partOfSpeachModel = this.openNLP.getPartOfSpeachModel(this.language);
                if (partOfSpeachModel != null) {
                    this.posTagger = new POSTaggerME(partOfSpeachModel);
                } else {
                    log.debug("No POS Model for language '{}'", this.language);
                    this.posTaggerNotAvailable = true;
                }
            } catch (IOException e) {
                log.info("Unable to load POS Model for language '" + this.language + "'", e);
                this.posTaggerNotAvailable = true;
            }
        }
        return this.posTagger;
    }

    public final Tokenizer getTokenizer() {
        if (this.tokenizer == null) {
            if (this.config.forceSimpleTokenizer) {
                this.tokenizer = SimpleTokenizer.INSTANCE;
            } else if (this.config.forceKeywordTokenizer) {
                this.tokenizer = KeywordTokenizer.INSTANCE;
            } else {
                this.tokenizer = this.openNLP.getTokenizer(this.language);
                if (this.tokenizer == null) {
                    log.debug("No Tokenizer for Language '{}': fall back to SimpleTokenizer!", this.language);
                    this.tokenizer = SimpleTokenizer.INSTANCE;
                }
            }
        }
        return this.tokenizer;
    }

    protected final ChunkerME getChunker() {
        if (!this.config.enableChunker || this.config.forcePosTypeChunker) {
            return null;
        }
        if (this.chunker == null && !this.chunkerNotAvailable) {
            try {
                ChunkerModel chunkerModel = this.openNLP.getChunkerModel(this.language);
                if (chunkerModel != null) {
                    this.chunker = new ChunkerME(chunkerModel);
                } else {
                    log.debug("No Chunker Model for language {}", this.language);
                    this.chunkerNotAvailable = true;
                }
            } catch (IOException e) {
                log.info("Unable to load Chunker Model for language " + this.language, e);
                this.chunkerNotAvailable = true;
            }
        }
        return this.chunker;
    }

    protected final PosTypeChunker getPosTypeChunker() {
        if (!this.config.enableChunker || !this.config.enablePosTagger) {
            return null;
        }
        if (this.posTypeChunker == null && !this.posTypeChunkerNotAvailable) {
            this.posTypeChunker = PosTypeChunker.getInstance(this.language, this.config.minPosTagProbability);
            this.posTypeChunkerNotAvailable = this.posTypeChunker == null;
        }
        return this.posTypeChunker;
    }

    protected final SentenceDetector getSentenceDetector() {
        if (!this.config.enableSentenceDetector) {
            return null;
        }
        if (this.sentenceDetector == null && !this.sentenceDetectorNotAvailable) {
            try {
                SentenceModel sentenceModel = this.openNLP.getSentenceModel(this.language);
                if (sentenceModel != null) {
                    this.sentenceDetector = new SentenceDetectorME(sentenceModel);
                } else {
                    log.debug("No Sentence Detection Model for language '{}'", this.language);
                    this.sentenceDetectorNotAvailable = true;
                }
            } catch (IOException e) {
                log.info("Unable to load Sentence Detection Model for language '" + this.language + "'", e);
                this.sentenceDetectorNotAvailable = true;
            }
        }
        return this.sentenceDetector;
    }

    public final OpenNLP getOpenNLP() {
        return this.openNLP;
    }

    public final TextAnalyzerConfig getConfig() {
        return this.config;
    }

    public final String getLanguage() {
        return this.language;
    }

    public AnalysedText analyseSentence(String str) {
        return new AnalysedText(str, this.language);
    }

    public Iterator<AnalysedText> analyse(String str) {
        return new TextAnalysisIterator(str, this.language);
    }
}
