package org.apache.tika.langdetect.opennlp;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import opennlp.tools.langdetect.Language;
import opennlp.tools.langdetect.LanguageDetectorModel;
import opennlp.tools.util.normalizer.CharSequenceNormalizer;
import opennlp.tools.util.normalizer.EmojiCharSequenceNormalizer;
import opennlp.tools.util.normalizer.NumberCharSequenceNormalizer;
import opennlp.tools.util.normalizer.ShrinkCharSequenceNormalizer;
import opennlp.tools.util.normalizer.TwitterCharSequenceNormalizer;
import org.apache.tika.language.detect.LanguageConfidence;
import org.apache.tika.language.detect.LanguageDetector;
import org.apache.tika.language.detect.LanguageResult;

/* loaded from: input_file:org/apache/tika/langdetect/opennlp/OpenNLPDetector.class */
public class OpenNLPDetector extends LanguageDetector {
    static LanguageDetectorModel LANG_MODEL;
    private final ProbingLanguageDetector detector = new ProbingLanguageDetector(LANG_MODEL, getNormalizers());
    private final StringBuilder buffer = new StringBuilder();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/tika/langdetect/opennlp/OpenNLPDetector$AlphaIdeographSequenceNormalizer.class */
    public static class AlphaIdeographSequenceNormalizer implements CharSequenceNormalizer {
        private static final Pattern REGEX = Pattern.compile("[^\\p{IsAlphabetic}\\p{IsIdeographic}]+");
        private static final AlphaIdeographSequenceNormalizer INSTANCE = new AlphaIdeographSequenceNormalizer();

        public static AlphaIdeographSequenceNormalizer getInstance() {
            return INSTANCE;
        }

        private AlphaIdeographSequenceNormalizer() {
        }

        public CharSequence normalize(CharSequence charSequence) {
            return REGEX.matcher(charSequence).replaceAll(" ");
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/tika/langdetect/opennlp/OpenNLPDetector$TikaUrlCharSequenceNormalizer.class */
    public static class TikaUrlCharSequenceNormalizer implements CharSequenceNormalizer {
        private static final Pattern URL_REGEX = Pattern.compile("https?://[-_.?&~;+=/#0-9A-Za-z]{10,10000}");
        private static final Pattern MAIL_REGEX = Pattern.compile("[-_.0-9A-Za-z]{1,100}@[-_0-9A-Za-z]{1,100}[-_.0-9A-Za-z]{1,100}");
        private static final TikaUrlCharSequenceNormalizer INSTANCE = new TikaUrlCharSequenceNormalizer();

        public static TikaUrlCharSequenceNormalizer getInstance() {
            return INSTANCE;
        }

        private TikaUrlCharSequenceNormalizer() {
        }

        public CharSequence normalize(CharSequence charSequence) {
            return MAIL_REGEX.matcher(URL_REGEX.matcher(charSequence).replaceAll(" ")).replaceAll(" ");
        }
    }

    static void loadBuiltInModels() throws IOException {
        InputStream resourceAsStream = OpenNLPDetector.class.getResourceAsStream("/opennlp_langdetect_model_20190626.bin");
        Throwable th = null;
        try {
            LANG_MODEL = new LanguageDetectorModel(resourceAsStream);
            if (resourceAsStream != null) {
                if (0 == 0) {
                    resourceAsStream.close();
                    return;
                }
                try {
                    resourceAsStream.close();
                } catch (Throwable th2) {
                    th.addSuppressed(th2);
                }
            }
        } catch (Throwable th3) {
            if (resourceAsStream != null) {
                if (0 != 0) {
                    try {
                        resourceAsStream.close();
                    } catch (Throwable th4) {
                        th.addSuppressed(th4);
                    }
                } else {
                    resourceAsStream.close();
                }
            }
            throw th3;
        }
    }

    private static CharSequenceNormalizer[] getNormalizers() {
        return new CharSequenceNormalizer[]{TikaUrlCharSequenceNormalizer.getInstance(), AlphaIdeographSequenceNormalizer.getInstance(), EmojiCharSequenceNormalizer.getInstance(), TwitterCharSequenceNormalizer.getInstance(), NumberCharSequenceNormalizer.getInstance(), ShrinkCharSequenceNormalizer.getInstance()};
    }

    public LanguageDetector loadModels() throws IOException {
        return new OpenNLPDetector();
    }

    public LanguageDetector loadModels(Set<String> set) throws IOException {
        throw new UnsupportedOperationException("This lang detector doesn't allow subsetting models");
    }

    public boolean hasModel(String str) {
        for (String str2 : this.detector.getSupportedLanguages()) {
            if (str.equals(str2)) {
                return true;
            }
        }
        return false;
    }

    public LanguageDetector setPriors(Map<String, Float> map) throws IOException {
        throw new UnsupportedOperationException();
    }

    public void reset() {
        this.buffer.setLength(0);
    }

    public void addText(char[] cArr, int i, int i2) {
        int min = Math.min(i2, this.detector.getMaxLength() - this.buffer.length());
        if (i2 <= 0) {
            return;
        }
        this.buffer.append(cArr, i, min);
    }

    public List<LanguageResult> detectAll() {
        Language[] predictLanguages = this.detector.predictLanguages(this.buffer.toString());
        ArrayList arrayList = new ArrayList();
        for (Language language : predictLanguages) {
            arrayList.add(new LanguageResult(language.getLang(), getConfidence(language.getConfidence()), (float) language.getConfidence()));
        }
        return arrayList;
    }

    public void setMaxLength(int i) {
        this.detector.setMaxLength(i);
    }

    public String[] getSupportedLanguages() {
        return this.detector.getSupportedLanguages();
    }

    private static LanguageConfidence getConfidence(double d) {
        return d > 0.9d ? LanguageConfidence.HIGH : d > 0.85d ? LanguageConfidence.MEDIUM : d > 0.2d ? LanguageConfidence.LOW : LanguageConfidence.NONE;
    }

    static {
        try {
            loadBuiltInModels();
        } catch (IOException e) {
            throw new RuntimeException("Can't find built-in language models");
        }
    }
}
