package org.apache.tika.eval.core.tokens;

import java.io.IOException;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Path;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang3.mutable.MutableInt;
import org.apache.commons.lang3.tuple.Pair;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/tika/eval/core/tokens/CommonTokenCountManager.class */
public class CommonTokenCountManager {
    private static final Logger LOG = LoggerFactory.getLogger((Class<?>) CommonTokenCountManager.class);
    private static final Charset COMMON_TOKENS_CHARSET = StandardCharsets.UTF_8;
    private static final String TERM_FREQS = "#SUM_TERM_FREQS\t";
    private final Path commonTokensDir;
    private final String defaultLangCode;
    Map<String, LangModel> commonTokenMap;
    Set<String> alreadyTriedToLoad;
    private Matcher digitsMatcher;

    public CommonTokenCountManager() {
        this(null, null);
    }

    public CommonTokenCountManager(Path path, String str) {
        this.commonTokenMap = new ConcurrentHashMap();
        this.alreadyTriedToLoad = new HashSet();
        this.digitsMatcher = Pattern.compile("(\\d+)").matcher("");
        str = str == null ? "" : str;
        this.defaultLangCode = str;
        this.commonTokensDir = path;
        if ("".equals(str)) {
            this.commonTokenMap.put(str, LangModel.EMPTY_MODEL);
            return;
        }
        tryToLoad(str);
        if (this.commonTokenMap.get(str) == null) {
            LOG.warn("No common tokens for default language: '" + str + "'");
            this.commonTokenMap.put(str, LangModel.EMPTY_MODEL);
        }
    }

    @Deprecated
    public CommonTokenResult countTokenOverlaps(String str, Map<String, MutableInt> map) throws IOException {
        String actualLangCode = getActualLangCode(str);
        int i = 0;
        int i2 = 0;
        int i3 = 0;
        int i4 = 0;
        LangModel langModel = this.commonTokenMap.get(actualLangCode);
        for (Map.Entry<String, MutableInt> entry : map.entrySet()) {
            String key = entry.getKey();
            int intValue = entry.getValue().intValue();
            if (AlphaIdeographFilterFactory.isAlphabetic(key.toCharArray(), key.length())) {
                i4 += intValue;
                i3++;
            }
            if (langModel.contains(key)) {
                i2 += intValue;
                i++;
            }
        }
        return new CommonTokenResult(actualLangCode, i, i2, i3, i4);
    }

    public Set<String> getTokens(String str) {
        return Collections.unmodifiableSet(new HashSet(this.commonTokenMap.get(getActualLangCode(str)).getTokens()));
    }

    public Set<String> getLangs() {
        return this.commonTokenMap.keySet();
    }

    public Pair<String, LangModel> getLangTokens(String str) {
        String actualLangCode = getActualLangCode(str);
        return Pair.of(actualLangCode, this.commonTokenMap.get(actualLangCode));
    }

    private String getActualLangCode(String str) {
        if (str == null || "".equals(str)) {
            return this.defaultLangCode;
        }
        if (this.commonTokenMap.containsKey(str)) {
            return str;
        }
        tryToLoad(str);
        return this.commonTokenMap.get(str) == null ? this.defaultLangCode : str;
    }

    public void close() throws IOException {
        this.commonTokenMap.clear();
    }

    /* JADX WARN: Removed duplicated region for block: B:17:0x006c  */
    /* JADX WARN: Removed duplicated region for block: B:24:0x00c8 A[Catch: IOException -> 0x0203, all -> 0x022c, TRY_ENTER, TryCatch #3 {IOException -> 0x0203, blocks: (B:86:0x0036, B:88:0x005f, B:19:0x0070, B:20:0x0091, B:23:0x0086, B:24:0x00c8, B:26:0x00e2, B:29:0x00f9, B:47:0x010a, B:49:0x0114, B:51:0x0128, B:52:0x013c, B:32:0x0146, B:34:0x0160, B:40:0x016c, B:41:0x0175, B:38:0x0176, B:43:0x018e, B:61:0x01a2, B:59:0x01b6, B:64:0x01ac, B:65:0x01ef, B:70:0x01c6, B:79:0x01d3, B:77:0x01e7, B:82:0x01dd, B:84:0x01ee, B:14:0x0041), top: B:85:0x0036, outer: #5 }] */
    /*
        Code decompiled incorrectly, please refer to instructions dump.
        To view partially-correct add '--show-bad-code' argument
    */
    private synchronized void tryToLoad(java.lang.String r8) {
        /*
            Method dump skipped, instructions count: 566
            To view this dump add '--comments-level debug' option
        */
        throw new UnsupportedOperationException("Method not decompiled: org.apache.tika.eval.core.tokens.CommonTokenCountManager.tryToLoad(java.lang.String):void");
    }
}
