package org.apache.solr.update.processor;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrInputDocument;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.update.AddUpdateCommand;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/solr/update/processor/LanguageIdentifierUpdateProcessor.class */
public abstract class LanguageIdentifierUpdateProcessor extends UpdateRequestProcessor implements LangIdParams {
    private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
    protected boolean enabled;
    protected String[] inputFields;
    protected String[] mapFields;
    protected Pattern mapPattern;
    protected String mapReplaceStr;
    protected String langField;
    protected String langsField;
    protected String docIdField;
    protected String fallbackValue;
    protected String[] fallbackFields;
    protected boolean enableMapping;
    protected boolean mapKeepOrig;
    protected boolean overwrite;
    protected boolean mapOverwrite;
    protected boolean mapIndividual;
    protected boolean enforceSchema;
    protected double threshold;
    protected HashSet<String> langWhitelist;
    protected HashSet<String> mapIndividualFieldsSet;
    protected HashSet<String> allMapFieldsSet;
    protected HashMap<String, String> lcMap;
    protected HashMap<String, String> mapLcMap;
    protected IndexSchema schema;
    protected int maxFieldValueChars;
    protected int maxTotalChars;
    protected final Pattern tikaSimilarityPattern;
    protected final Pattern langPattern;

    public LanguageIdentifierUpdateProcessor(SolrQueryRequest solrQueryRequest, SolrQueryResponse solrQueryResponse, UpdateRequestProcessor updateRequestProcessor) {
        super(updateRequestProcessor);
        this.inputFields = new String[0];
        this.mapFields = new String[0];
        this.fallbackFields = new String[0];
        this.tikaSimilarityPattern = Pattern.compile(".*\\((.*?)\\)");
        this.langPattern = Pattern.compile("\\{lang\\}");
        this.schema = solrQueryRequest.getSchema();
        initParams(solrQueryRequest.getParams());
    }

    private void initParams(SolrParams solrParams) {
        if (solrParams != null) {
            setEnabled(solrParams.getBool(LangIdParams.LANGUAGE_ID, true));
            if (solrParams.get(LangIdParams.FIELDS_PARAM, "").length() > 0) {
                this.inputFields = solrParams.get(LangIdParams.FIELDS_PARAM, "").split(",");
            }
            this.langField = solrParams.get(LangIdParams.LANG_FIELD, DOCID_LANGFIELD_DEFAULT);
            this.langsField = solrParams.get(LangIdParams.LANGS_FIELD, DOCID_LANGSFIELD_DEFAULT);
            SchemaField uniqueKeyField = this.schema.getUniqueKeyField();
            this.docIdField = solrParams.get(LangIdParams.DOCID_PARAM, uniqueKeyField == null ? LangIdParams.DOCID_FIELD_DEFAULT : uniqueKeyField.getName());
            this.fallbackValue = solrParams.get(LangIdParams.FALLBACK);
            if (solrParams.get(LangIdParams.FALLBACK_FIELDS, "").length() > 0) {
                this.fallbackFields = solrParams.get(LangIdParams.FALLBACK_FIELDS).split(",");
            }
            this.overwrite = solrParams.getBool(LangIdParams.OVERWRITE, false);
            this.langWhitelist = new HashSet<>();
            this.threshold = solrParams.getDouble(LangIdParams.THRESHOLD, DOCID_THRESHOLD_DEFAULT.doubleValue());
            if (solrParams.get(LangIdParams.LANG_WHITELIST, "").length() > 0) {
                for (String str : solrParams.get(LangIdParams.LANG_WHITELIST, "").split(",")) {
                    this.langWhitelist.add(str);
                }
            }
            this.enableMapping = solrParams.getBool(LangIdParams.MAP_ENABLE, false);
            if (solrParams.get(LangIdParams.MAP_FL, "").length() > 0) {
                this.mapFields = solrParams.get(LangIdParams.MAP_FL, "").split(",");
            } else {
                this.mapFields = this.inputFields;
            }
            this.mapKeepOrig = solrParams.getBool(LangIdParams.MAP_KEEP_ORIG, false);
            this.mapOverwrite = solrParams.getBool(LangIdParams.MAP_OVERWRITE, false);
            this.mapIndividual = solrParams.getBool(LangIdParams.MAP_INDIVIDUAL, false);
            String[] strArr = new String[0];
            String[] split = solrParams.get(LangIdParams.MAP_INDIVIDUAL_FL, "").length() > 0 ? solrParams.get(LangIdParams.MAP_INDIVIDUAL_FL, "").split(",") : this.mapFields;
            this.mapIndividualFieldsSet = new HashSet<>(Arrays.asList(split));
            this.allMapFieldsSet = new HashSet<>(Arrays.asList(this.mapFields));
            if (Arrays.equals(this.mapFields, split)) {
                this.allMapFieldsSet.addAll(this.mapIndividualFieldsSet);
            }
            this.lcMap = new HashMap<>();
            if (solrParams.get(LangIdParams.LCMAP) != null) {
                for (String str2 : solrParams.get(LangIdParams.LCMAP).split("[, ]")) {
                    String[] split2 = str2.split(":");
                    if (split2.length == 2) {
                        this.lcMap.put(split2[0], split2[1]);
                    } else {
                        log.error("Unsupported format for langid.lcmap: " + str2 + ". Skipping this mapping.");
                    }
                }
            }
            this.mapLcMap = new HashMap<>();
            if (solrParams.get(LangIdParams.MAP_LCMAP) != null) {
                for (String str3 : solrParams.get(LangIdParams.MAP_LCMAP).split("[, ]")) {
                    String[] split3 = str3.split(":");
                    if (split3.length == 2) {
                        this.mapLcMap.put(split3[0], split3[1]);
                    } else {
                        log.error("Unsupported format for langid.map.lcmap: " + str3 + ". Skipping this mapping.");
                    }
                }
            }
            this.enforceSchema = solrParams.getBool(LangIdParams.ENFORCE_SCHEMA, true);
            this.mapPattern = Pattern.compile(solrParams.get(LangIdParams.MAP_PATTERN, LangIdParams.MAP_PATTERN_DEFAULT));
            this.mapReplaceStr = solrParams.get(LangIdParams.MAP_REPLACE, LangIdParams.MAP_REPLACE_DEFAULT);
            this.maxFieldValueChars = solrParams.getInt(LangIdParams.MAX_FIELD_VALUE_CHARS, LangIdParams.MAX_FIELD_VALUE_CHARS_DEFAULT);
            this.maxTotalChars = solrParams.getInt(LangIdParams.MAX_TOTAL_CHARS, LangIdParams.MAX_TOTAL_CHARS_DEFAULT);
            if (this.maxFieldValueChars > this.maxTotalChars) {
                if (this.maxTotalChars == 20000) {
                    log.warn("langid.maxFieldValueChars (" + this.maxFieldValueChars + ") is less than " + LangIdParams.MAX_TOTAL_CHARS + " (" + this.maxTotalChars + ").  Setting " + LangIdParams.MAX_TOTAL_CHARS + " to " + this.maxFieldValueChars + ".");
                    this.maxTotalChars = this.maxFieldValueChars;
                } else {
                    log.warn("langid.maxFieldValueChars (" + this.maxFieldValueChars + ") is less than " + LangIdParams.MAX_TOTAL_CHARS + " (" + this.maxTotalChars + ").  Setting " + LangIdParams.MAX_FIELD_VALUE_CHARS + " to " + this.maxTotalChars + ".");
                    this.maxFieldValueChars = this.maxTotalChars;
                }
            }
        }
        log.debug("LangId configured");
        if (this.inputFields.length == 0) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Missing or faulty configuration of LanguageIdentifierUpdateProcessor. Input fields must be specified as a comma separated list");
        }
    }

    public void processAdd(AddUpdateCommand addUpdateCommand) throws IOException {
        if (isEnabled()) {
            process(addUpdateCommand.getSolrInputDocument());
        } else {
            log.debug("Processor not enabled, not running");
        }
        super.processAdd(addUpdateCommand);
    }

    protected SolrInputDocument process(SolrInputDocument solrInputDocument) {
        String resolveLanguage;
        String str;
        HashSet hashSet = new HashSet();
        String fallbackLang = getFallbackLang(solrInputDocument, this.fallbackFields, this.fallbackValue);
        if (this.langField == null || !solrInputDocument.containsKey(this.langField) || (solrInputDocument.containsKey(this.langField) && this.overwrite)) {
            resolveLanguage = resolveLanguage(detectLanguage(solrInputDocument), fallbackLang);
            hashSet.add(resolveLanguage);
            log.debug("Detected main document language from fields " + Arrays.toString(this.inputFields) + ": " + resolveLanguage);
            if (solrInputDocument.containsKey(this.langField) && this.overwrite) {
                log.debug("Overwritten old value " + solrInputDocument.getFieldValue(this.langField));
            }
            if (this.langField != null && this.langField.length() != 0) {
                solrInputDocument.setField(this.langField, resolveLanguage);
            }
        } else {
            resolveLanguage = resolveLanguage((String) solrInputDocument.getFieldValue(this.langField), fallbackLang);
            hashSet.add(resolveLanguage);
            log.debug("Field " + this.langField + " already contained value " + resolveLanguage + ", not overwriting.");
        }
        if (this.enableMapping) {
            Iterator<String> it = this.allMapFieldsSet.iterator();
            while (it.hasNext()) {
                String next = it.next();
                if (solrInputDocument.containsKey(next)) {
                    if (this.mapIndividual && this.mapIndividualFieldsSet.contains(next)) {
                        str = resolveLanguage(detectLanguage(solrInputDocument), resolveLanguage);
                        hashSet.add(str);
                        log.debug("Mapping field " + next + " using individually detected language " + str);
                    } else {
                        str = resolveLanguage;
                        log.debug("Mapping field " + next + " using document global language " + str);
                    }
                    String mappedField = getMappedField(next, str);
                    if (mappedField == null) {
                        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Invalid output field mapping for " + next + " field and language: " + str);
                    }
                    log.debug("Mapping field {} to {}", solrInputDocument.getFieldValue(this.docIdField), str);
                    solrInputDocument.setField(mappedField, solrInputDocument.getField(next).getValue());
                    if (!this.mapKeepOrig) {
                        log.debug("Removing old field {}", next);
                        solrInputDocument.removeField(next);
                    }
                }
            }
        }
        if (this.langsField != null && this.langsField.length() != 0) {
            solrInputDocument.setField(this.langsField, hashSet.toArray());
        }
        return solrInputDocument;
    }

    private String getFallbackLang(SolrInputDocument solrInputDocument, String[] strArr, String str) {
        String str2 = null;
        int length = strArr.length;
        int i = 0;
        while (true) {
            if (i >= length) {
                break;
            }
            String str3 = strArr[i];
            if (solrInputDocument.containsKey(str3)) {
                str2 = (String) solrInputDocument.getFieldValue(str3);
                log.debug("Language fallback to field " + str3);
                break;
            }
            i++;
        }
        if (str2 == null) {
            log.debug("Language fallback to value " + str);
            str2 = str;
        }
        return str2;
    }

    protected abstract List<DetectedLanguage> detectLanguage(SolrInputDocument solrInputDocument);

    protected String resolveLanguage(String str, String str2) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(new DetectedLanguage(str, Double.valueOf(1.0d)));
        return resolveLanguage(arrayList, str2);
    }

    protected String resolveLanguage(List<DetectedLanguage> list, String str) {
        String str2;
        if (list.size() == 0) {
            log.debug("No language detected, using fallback {}", str);
            str2 = str;
        } else {
            DetectedLanguage detectedLanguage = list.get(0);
            String normalizeLangCode = normalizeLangCode(detectedLanguage.getLangCode());
            if (this.langWhitelist.isEmpty() || this.langWhitelist.contains(normalizeLangCode)) {
                log.debug("Language detected {} with certainty {}", normalizeLangCode, detectedLanguage.getCertainty());
                if (detectedLanguage.getCertainty().doubleValue() >= this.threshold) {
                    str2 = normalizeLangCode;
                } else {
                    log.debug("Detected language below threshold {}, using fallback {}", Double.valueOf(this.threshold), str);
                    str2 = str;
                }
            } else {
                log.debug("Detected a language not in whitelist ({}), using fallback {}", detectedLanguage.getLangCode(), str);
                str2 = str;
            }
        }
        if (str2 == null || str2.length() == 0) {
            log.warn("Language resolved to null or empty string. Fallback not configured?");
            str2 = "";
        }
        return str2;
    }

    protected String normalizeLangCode(String str) {
        if (!this.lcMap.containsKey(str)) {
            return str;
        }
        String str2 = this.lcMap.get(str);
        log.debug("Doing langcode normalization mapping from " + str + " to " + str2);
        return str2;
    }

    protected String getMappedField(String str, String str2) {
        String replaceFirst = this.langPattern.matcher(this.mapPattern.matcher(str).replaceFirst(this.mapReplaceStr)).replaceFirst(this.mapLcMap.containsKey(str2) ? this.mapLcMap.get(str2) : str2);
        if (this.enforceSchema && this.schema.getFieldOrNull(replaceFirst) == null) {
            log.warn("Unsuccessful field name mapping from {} to {}, field does not exist and enforceSchema=true; skipping mapping.", str, replaceFirst);
            return null;
        }
        log.debug("Doing mapping from " + str + " with language " + str2 + " to field " + replaceFirst);
        return replaceFirst;
    }

    public boolean isEnabled() {
        return this.enabled;
    }

    public void setEnabled(boolean z) {
        this.enabled = z;
    }
}
