package uk.ac.shef.dcs.jate.solr;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.lang.math.NumberUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.Pair;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.RequestHandlerBase;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.response.SolrQueryResponse;
import org.apache.solr.schema.CopyField;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.update.CommitUpdateCommand;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import uk.ac.shef.dcs.jate.JATEException;
import uk.ac.shef.dcs.jate.JATEProperties;
import uk.ac.shef.dcs.jate.app.App;
import uk.ac.shef.dcs.jate.app.AppParams;
import uk.ac.shef.dcs.jate.model.JATETerm;
import uk.ac.shef.dcs.jate.util.SolrUtil;

/* loaded from: input_file:uk/ac/shef/dcs/jate/solr/TermRecognitionRequestHandler.class */
public class TermRecognitionRequestHandler extends RequestHandlerBase {
    public static final String FIELD_CONTENT_NGRAM = "solr_field_content_ngrams";

    @Deprecated
    public static final String FIELD_DOMAIN_TERMS = "field_domain_terms";
    public static final String TERM_RANKING_ALGORITHM = "algorithm";
    public static final String CANDIDATE_EXTRACTION = "extraction";
    public static final String BOOSTING = "boosting";
    public static final String INDEX_TERM = "indexTerm";
    public static final String JATE_PROPERTY_FILE = AppParams.JATE_PROPERTIES_FILE.getParamKey();
    public static final String PREFILTER_MIN_TERM_TOTAL_FREQUENCY = AppParams.PREFILTER_MIN_TERM_TOTAL_FREQUENCY.getParamKey();
    public static final String PREFILTER_MIN_TERM_CONTEXT_FREQUENCY = AppParams.PREFILTER_MIN_TERM_TOTAL_FREQUENCY.getParamKey();
    public static final String CUTOFF_THRESHOLD = AppParams.CUTOFF_THRESHOLD.getParamKey();
    public static final String CUTOFF_TOP_K = AppParams.CUTOFF_TOP_K.getParamKey();
    public static final String CUTOFF_TOP_K_PERCENT = AppParams.CUTOFF_TOP_K_PERCENT.getParamKey();
    public static final String REFERENCE_FREQUENCY_FILE = AppParams.REFERENCE_FREQUENCY_FILE.getParamKey();
    public static final Float DEFAULT_BOOST_VALUE = Float.valueOf(1.0f);
    private final Logger log = LoggerFactory.getLogger(getClass());
    private final TermRecognitionProcessor generalTRProcessor = TermRecognitionProcessorFactory.createTermRecognitionProcessor();

    /* loaded from: input_file:uk/ac/shef/dcs/jate/solr/TermRecognitionRequestHandler$Algorithm.class */
    public enum Algorithm {
        C_VALUE("CValue"),
        ATTF("ATTF"),
        CHI_SQUARE("ChiSquare"),
        GLOSSEX("GlossEx"),
        RAKE("RAKE"),
        RIDF("RIDF"),
        TERM_EX("TermEx"),
        TF_IDF("TTF-IDF"),
        TTF("TTF"),
        WEIRDNESS("Weirdness");

        private final String algorithmName;

        Algorithm(String str) {
            this.algorithmName = str;
        }

        public String getAlgorithmName() {
            return this.algorithmName;
        }
    }

    @Override // org.apache.solr.handler.RequestHandlerBase
    public void handleRequestBody(SolrQueryRequest solrQueryRequest, SolrQueryResponse solrQueryResponse) throws Exception {
        this.log.info("Term recognition request handler...");
        setTopInitArgsAsInvariants(solrQueryRequest);
        String str = solrQueryRequest.getParams().get(JATE_PROPERTY_FILE);
        String str2 = solrQueryRequest.getParams().get(TERM_RANKING_ALGORITHM);
        Boolean bool = solrQueryRequest.getParams().getBool(CANDIDATE_EXTRACTION);
        String str3 = solrQueryRequest.getParams().get(AppParams.OUTPUT_FILE.getParamKey());
        Boolean bool2 = solrQueryRequest.getParams().getBool(INDEX_TERM);
        Boolean bool3 = solrQueryRequest.getParams().getBool(BOOSTING);
        Algorithm algorithm = getAlgorithm(str2);
        JATEProperties jateProperties = App.getJateProperties(str);
        SolrIndexSearcher searcher = solrQueryRequest.getSearcher();
        try {
            if (bool.booleanValue()) {
                this.log.info("start candidate extraction (i.e., re-index of whole corpus) ...");
                this.generalTRProcessor.candidateExtraction(searcher.getCore(), str);
                this.log.info("complete candidate terms indexing.");
            }
            List<JATETerm> rankingAndFiltering = this.generalTRProcessor.rankingAndFiltering(searcher.getCore(), str, initialiseTRRunTimeParams(solrQueryRequest), algorithm);
            this.log.info(String.format("complete term recognition extraction! Finalized Term size [%s]", Integer.valueOf(rankingAndFiltering.size())));
            if (isExport(str3)) {
                this.generalTRProcessor.export(rankingAndFiltering);
            }
            if (bool2.booleanValue()) {
                this.log.info("start to index filtered candidate terms ...");
                indexTerms(rankingAndFiltering, jateProperties, searcher, bool3.booleanValue(), bool.booleanValue());
                searcher.getCore().getUpdateHandler().commit(new CommitUpdateCommand(solrQueryRequest, true));
                this.log.info("complete the indexing of candidate terms.");
            }
        } finally {
            searcher.close();
        }
    }

    private boolean isExport(String str) {
        return str != null && StringUtils.isNotEmpty(str);
    }

    private Map<String, String> initialiseTRRunTimeParams(SolrQueryRequest solrQueryRequest) {
        HashMap hashMap = new HashMap();
        Float f = solrQueryRequest.getParams().getFloat(CUTOFF_THRESHOLD);
        if (f != null) {
            hashMap.put(AppParams.CUTOFF_THRESHOLD.getParamKey(), f.toString());
        }
        Integer num = solrQueryRequest.getParams().getInt(CUTOFF_TOP_K);
        if (num != null) {
            hashMap.put(AppParams.CUTOFF_TOP_K.getParamKey(), num.toString());
        }
        Float f2 = solrQueryRequest.getParams().getFloat(CUTOFF_TOP_K_PERCENT);
        if (f2 != null) {
            hashMap.put(AppParams.CUTOFF_TOP_K_PERCENT.getParamKey(), f2.toString());
        }
        Integer num2 = solrQueryRequest.getParams().getInt(PREFILTER_MIN_TERM_TOTAL_FREQUENCY);
        if (num2 != null) {
            hashMap.put(AppParams.PREFILTER_MIN_TERM_TOTAL_FREQUENCY.getParamKey(), num2.toString());
        }
        Integer num3 = solrQueryRequest.getParams().getInt(PREFILTER_MIN_TERM_CONTEXT_FREQUENCY);
        if (num3 != null) {
            hashMap.put(AppParams.PREFILTER_MIN_TERM_CONTEXT_FREQUENCY.getParamKey(), num3.toString());
        }
        String str = solrQueryRequest.getParams().get(REFERENCE_FREQUENCY_FILE);
        if (str != null) {
            hashMap.put(AppParams.REFERENCE_FREQUENCY_FILE.getParamKey(), str);
        }
        String str2 = solrQueryRequest.getParams().get(AppParams.OUTPUT_FILE.getParamKey());
        if (str2 != null) {
            hashMap.put(AppParams.OUTPUT_FILE.getParamKey(), str2);
        }
        Boolean bool = solrQueryRequest.getParams().getBool(AppParams.COLLECT_TERM_INFO.getParamKey());
        if (bool != null) {
            hashMap.put(AppParams.COLLECT_TERM_INFO.getParamKey(), bool.toString());
        }
        return hashMap;
    }

    @Override // org.apache.solr.handler.RequestHandlerBase, org.apache.solr.core.SolrInfoBean
    public String getDescription() {
        return "Automatic term recognition and indexing by whole corpus/index analysis.";
    }

    public void indexTerms(List<JATETerm> list, JATEProperties jATEProperties, SolrIndexSearcher solrIndexSearcher, boolean z, boolean z2) throws JATEException {
        int maxDoc = solrIndexSearcher.maxDoc();
        String solrFieldNameJATEDomainTerms = jATEProperties.getSolrFieldNameJATEDomainTerms();
        String solrFieldNameJATECTerms = jATEProperties.getSolrFieldNameJATECTerms();
        this.log.info(String.format("indexing [%s] terms into field [%s] for total [%s] documents ...", Integer.valueOf(list.size()), solrFieldNameJATEDomainTerms, Integer.valueOf(maxDoc)));
        if (list.size() == 0) {
            return;
        }
        SolrCore core = solrIndexSearcher.getCore();
        IndexSchema latestSchema = core.getLatestSchema();
        AutoCloseable autoCloseable = null;
        try {
            try {
                IndexWriter indexWriter = core.getSolrCoreState().getIndexWriter(core).get();
                Map<String, List<CopyField>> copyFieldsMap = latestSchema.getCopyFieldsMap();
                for (int i = 0; i < maxDoc; i++) {
                    try {
                        Document doc = solrIndexSearcher.doc(i);
                        if (z2) {
                            SolrUtil.copyFields(copyFieldsMap, DEFAULT_BOOST_VALUE.floatValue(), doc);
                        }
                        Terms termVector = SolrUtil.getTermVector(i, solrFieldNameJATECTerms, solrIndexSearcher);
                        if (termVector != null) {
                            iterateAddDomainTermFields(z, solrFieldNameJATEDomainTerms, latestSchema, doc, getSelectedWeightedCandidates(list, SolrUtil.getNormalisedTerms(termVector)));
                            this.log.debug(String.format("document [%s] version before debugging: %s", doc.get("id"), doc.get(CommonParams.VERSION_FIELD)));
                            String str = doc.get(CommonParams.VERSION_FIELD);
                            doc.removeField(CommonParams.VERSION_FIELD);
                            doc.add(latestSchema.getField(CommonParams.VERSION_FIELD).createField(versionIncrement(str)));
                            indexWriter.updateDocument(new Term("id", doc.get("id")), doc);
                        }
                    } catch (IOException e) {
                        throw new JATEException(String.format("Failed to retrieve current document (docId: [%s]) due to an unexpected I/O exception: %s", Integer.valueOf(i), e.toString()));
                    }
                }
                indexWriter.forceMerge(1, false);
                indexWriter.commit();
                if (indexWriter != null) {
                    try {
                        indexWriter.close();
                    } catch (IOException e2) {
                        this.log.error(e2.toString());
                    }
                }
                if (core != null) {
                    core.close();
                }
                this.log.info(String.format("finalised terms have been indexed into [%s] field for all documents", solrFieldNameJATEDomainTerms));
            } catch (IOException e3) {
                throw new JATEException(String.format("Failed to index filtered domain terms due to I/O exception when loading solr index writer: %s", e3.toString()));
            }
        } catch (Throwable th) {
            if (0 != 0) {
                try {
                    autoCloseable.close();
                } catch (IOException e4) {
                    this.log.error(e4.toString());
                }
            }
            if (core != null) {
                core.close();
            }
            throw th;
        }
    }

    private String versionIncrement(String str) {
        String str2 = str;
        if (NumberUtils.isNumber(str)) {
            str2 = String.valueOf(Long.parseLong(str) + 1);
        }
        return str2;
    }

    private void iterateAddDomainTermFields(boolean z, String str, IndexSchema indexSchema, Document document, List<Pair<String, Double>> list) {
        document.removeFields(str);
        for (Pair<String, Double> pair : list) {
            if (pair != null) {
                if (z) {
                    document.add(indexSchema.getField(str).createField(pair.first()));
                } else {
                    document.add(indexSchema.getField(str).createField(pair.first()));
                }
            }
        }
    }

    private List<Pair<String, Double>> getSelectedWeightedCandidates(List<JATETerm> list, List<String> list2) {
        ArrayList arrayList = new ArrayList();
        list2.parallelStream().forEach(str -> {
            list.parallelStream().forEach(jATETerm -> {
                if (jATETerm == null || str == null || jATETerm.getString() == null || !jATETerm.getString().equalsIgnoreCase(str)) {
                    return;
                }
                arrayList.add(new Pair(jATETerm.getString(), Double.valueOf(jATETerm.getScore())));
            });
        });
        return arrayList;
    }

    private void setTopInitArgsAsInvariants(SolrQueryRequest solrQueryRequest) {
        HashMap hashMap = new HashMap(this.initArgs.size());
        for (int i = 0; i < this.initArgs.size(); i++) {
            Object val = this.initArgs.getVal(i);
            if (val != null && !(val instanceof NamedList)) {
                hashMap.put(this.initArgs.getName(i), val.toString());
            }
        }
        if (hashMap.isEmpty()) {
            return;
        }
        solrQueryRequest.setParams(SolrParams.wrapDefaults(new MapSolrParams(hashMap), solrQueryRequest.getParams()));
    }

    private Algorithm getAlgorithm(String str) throws JATEException {
        if (StringUtils.isEmpty(str)) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "ATE algorithm is not specified. Please check API documentation for all the supported ATR algorithms.");
        }
        if (str.equalsIgnoreCase(Algorithm.C_VALUE.getAlgorithmName())) {
            this.log.debug(String.format("[%s] algorithm is set to rank term candidates. ", Algorithm.C_VALUE.getAlgorithmName()));
            return Algorithm.C_VALUE;
        }
        if (str.equalsIgnoreCase(Algorithm.ATTF.getAlgorithmName())) {
            this.log.debug(String.format("[%s] algorithm is set to rank term candidates. ", Algorithm.ATTF.getAlgorithmName()));
            return Algorithm.ATTF;
        }
        if (str.equalsIgnoreCase(Algorithm.CHI_SQUARE.getAlgorithmName())) {
            this.log.debug(String.format("[%s] algorithm is set to rank term candidates. ", Algorithm.CHI_SQUARE.getAlgorithmName()));
            return Algorithm.CHI_SQUARE;
        }
        if (str.equalsIgnoreCase(Algorithm.GLOSSEX.getAlgorithmName())) {
            this.log.debug(String.format("[%s] algorithm is set to rank term candidates. ", Algorithm.GLOSSEX.getAlgorithmName()));
            return Algorithm.GLOSSEX;
        }
        if (str.equalsIgnoreCase(Algorithm.RAKE.getAlgorithmName())) {
            this.log.debug(String.format("[%s] algorithm is set to rank term candidates. ", Algorithm.RAKE.getAlgorithmName()));
            return Algorithm.RAKE;
        }
        if (str.equalsIgnoreCase(Algorithm.RIDF.getAlgorithmName())) {
            this.log.debug(String.format("[%s] algorithm is set to rank term candidates. ", Algorithm.RIDF.getAlgorithmName()));
            return Algorithm.RIDF;
        }
        if (str.equalsIgnoreCase(Algorithm.TERM_EX.getAlgorithmName())) {
            this.log.debug(String.format("[%s] algorithm is set to rank term candidates. ", Algorithm.TERM_EX.getAlgorithmName()));
            return Algorithm.TERM_EX;
        }
        if (str.equalsIgnoreCase(Algorithm.TF_IDF.getAlgorithmName())) {
            this.log.debug(String.format("[%s] algorithm is set to rank term candidates. ", Algorithm.TF_IDF.getAlgorithmName()));
            return Algorithm.TF_IDF;
        }
        if (str.equalsIgnoreCase(Algorithm.TTF.getAlgorithmName())) {
            this.log.debug(String.format("[%s] algorithm is set to rank term candidates. ", Algorithm.TTF.getAlgorithmName()));
            return Algorithm.TTF;
        }
        if (!str.equalsIgnoreCase(Algorithm.WEIRDNESS.getAlgorithmName())) {
            throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, String.format("Current algorithm [%s] is not supported. Please check API documentation for all the supported ATR algorithms.", str));
        }
        this.log.debug(String.format("[%s] algorithm is set to rank term candidates. ", Algorithm.WEIRDNESS.getAlgorithmName()));
        return Algorithm.WEIRDNESS;
    }
}
