package uk.ac.shef.dcs.jate.feature;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ForkJoinPool;
import org.apache.hadoop.fs.CommonConfigurationKeys;
import org.apache.log4j.Logger;
import org.apache.solr.search.SolrIndexSearcher;
import uk.ac.shef.dcs.jate.JATEException;
import uk.ac.shef.dcs.jate.JATEProperties;

/* loaded from: input_file:uk/ac/shef/dcs/jate/feature/CooccurrenceFBMaster.class */
public class CooccurrenceFBMaster extends AbstractFeatureBuilder {
    private static final Logger LOG = Logger.getLogger(CooccurrenceFBMaster.class.getName());
    private FrequencyCtxBased frequencyCtxBased;
    private FrequencyCtxBased ref_frequencyCtxBased;
    private FrequencyTermBased frequencyTermBased;
    private int minTTF;
    private int minTCF;
    private static final int MAX_TASKS_PER_WORKER = 10000;

    public CooccurrenceFBMaster(SolrIndexSearcher solrIndexSearcher, JATEProperties jATEProperties, FrequencyTermBased frequencyTermBased, Integer num, FrequencyCtxBased frequencyCtxBased, FrequencyCtxBased frequencyCtxBased2, Integer num2) {
        super(solrIndexSearcher, jATEProperties);
        this.frequencyCtxBased = frequencyCtxBased;
        this.frequencyTermBased = frequencyTermBased;
        this.ref_frequencyCtxBased = frequencyCtxBased2;
        this.minTTF = num.intValue();
        this.minTCF = num2.intValue();
    }

    @Override // uk.ac.shef.dcs.jate.feature.AbstractFeatureBuilder
    public AbstractFeature build() throws JATEException {
        ArrayList arrayList = new ArrayList(this.frequencyCtxBased.getMapCtx2TTF().keySet());
        Collections.sort(arrayList);
        int maxCPUCores = this.properties.getMaxCPUCores();
        int i = maxCPUCores == 0 ? 1 : maxCPUCores;
        int maxPerThread = getMaxPerThread(arrayList, i);
        StringBuilder sb = new StringBuilder("Building features using cpu cores=");
        sb.append(i).append(", total ctx where reference terms appear =").append(arrayList.size()).append(", max per worker=").append(maxPerThread);
        LOG.info(sb.toString());
        LOG.info("Filtering candidates with min.ttf=" + this.minTTF + " min.tcf=" + this.minTCF);
        HashSet hashSet = new HashSet();
        Iterator<ContextWindow> it = arrayList.iterator();
        while (it.hasNext()) {
            Map<String, Integer> tfic = this.frequencyCtxBased.getTFIC(it.next());
            if (this.minTTF == 0 && this.minTCF == 0) {
                hashSet.addAll(tfic.keySet());
            } else {
                for (String str : tfic.keySet()) {
                    if (this.frequencyTermBased.getTTF(str) >= this.minTTF && this.frequencyCtxBased.getContexts(str).size() >= this.minTCF) {
                        hashSet.add(str);
                    }
                }
            }
        }
        Cooccurrence cooccurrence = new Cooccurrence(hashSet.size(), this.ref_frequencyCtxBased.getMapTerm2Ctx().size());
        LOG.info("Beginning building features. Total terms=" + hashSet.size() + ", total contexts=" + arrayList.size());
        int intValue = ((Integer) new ForkJoinPool(i).invoke(new CooccurrenceFBWorker(cooccurrence, arrayList, this.frequencyTermBased, this.minTTF, this.frequencyCtxBased, this.ref_frequencyCtxBased, this.minTCF, maxPerThread))).intValue();
        Map<String, ContextOverlap> ctxOverlapZones = this.frequencyCtxBased.getCtxOverlapZones();
        if (ctxOverlapZones.size() > 0) {
            LOG.info("Correcting double counted co-occurrences in context overlapping zones, total zones=" + ctxOverlapZones.size());
            HashMap hashMap = new HashMap();
            HashMap hashMap2 = new HashMap();
            for (Map.Entry<String, ContextOverlap> entry : ctxOverlapZones.entrySet()) {
                String key = entry.getKey();
                ContextOverlap value = entry.getValue();
                hashMap.clear();
                for (String str2 : value.getTerms()) {
                    Integer num = (Integer) hashMap.get(str2);
                    hashMap.put(str2, Integer.valueOf(Integer.valueOf(num == null ? 0 : num.intValue()).intValue() + 1));
                }
                if (hashMap.size() > 1) {
                    ContextOverlap contextOverlap = this.ref_frequencyCtxBased.getCtxOverlapZones().get(key);
                    hashMap2.clear();
                    if (contextOverlap != null) {
                        for (String str3 : contextOverlap.getTerms()) {
                            Integer num2 = (Integer) hashMap2.get(str3);
                            hashMap2.put(str3, Integer.valueOf(Integer.valueOf(num2 == null ? 0 : num2.intValue()).intValue() + 1));
                        }
                    }
                    if (hashMap2.size() > 1) {
                        for (Map.Entry entry2 : hashMap.entrySet()) {
                            int intValue2 = ((Integer) entry2.getValue()).intValue();
                            for (Map.Entry entry3 : hashMap2.entrySet()) {
                                int intValue3 = ((Integer) entry3.getValue()).intValue();
                                if (!((String) entry2.getKey()).equals(entry3.getKey())) {
                                    int i2 = intValue2 < intValue3 ? intValue2 : intValue3;
                                    int lookupTerm = cooccurrence.lookupTerm((String) entry2.getKey());
                                    int lookupRefTerm = cooccurrence.lookupRefTerm((String) entry3.getKey());
                                    if (lookupTerm != -1 && lookupRefTerm != -1) {
                                        cooccurrence.deduce(lookupTerm, lookupRefTerm, i2);
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
        StringBuilder sb2 = new StringBuilder("Complete building features, total contexts processed=" + intValue);
        sb2.append("; total indexed candidate terms=").append(cooccurrence.termCounter).append(CommonConfigurationKeys.NFS_EXPORTS_ALLOWED_HOSTS_SEPARATOR).append(" total indexed reference terms=").append(cooccurrence.ctxTermCounter);
        LOG.info(sb2.toString());
        return cooccurrence;
    }

    private int getMaxPerThread(List<ContextWindow> list, int i) {
        int size = list.size() / i;
        if (size < 100) {
            size = 100;
        } else if (size > 10000) {
            size = 10000;
        }
        return size;
    }
}
