package edu.pitt.dbmi.nlp.noble.util;

import edu.pitt.dbmi.nlp.noble.terminology.impl.NobleCoderTerminology;
import edu.pitt.dbmi.nlp.noble.tools.TextTools;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;

/* loaded from: input_file:edu/pitt/dbmi/nlp/noble/util/BlacklistHandler.class */
public class BlacklistHandler {
    public static final double CUTOFF = 0.002d;
    public static final double TOP_CUTOFF = 0.1d;
    public static final String BLACKLIST = "table_blacklist.d.0";
    private NobleCoderTerminology.Storage st;
    private Map<String, Set<String>> blacklist;
    private boolean debug = true;

    public BlacklistHandler(NobleCoderTerminology nobleCoderTerminology) {
        this.st = nobleCoderTerminology.getStorage();
    }

    private List<String> getTopWords() {
        final Map<String, NobleCoderTerminology.WordStat> wordStatMap = this.st.getWordStatMap();
        TreeMap treeMap = new TreeMap(new Comparator<String>() { // from class: edu.pitt.dbmi.nlp.noble.util.BlacklistHandler.1
            @Override // java.util.Comparator
            public int compare(String str, String str2) {
                int i = ((NobleCoderTerminology.WordStat) wordStatMap.get(str2)).termCount - ((NobleCoderTerminology.WordStat) wordStatMap.get(str)).termCount;
                return i == 0 ? str2.compareTo(str) : i;
            }
        });
        for (String str : wordStatMap.keySet()) {
            int i = wordStatMap.get(str).termCount;
            if (i > 100) {
                treeMap.put(str, Integer.valueOf(i));
            }
        }
        if (this.debug) {
            System.out.println("total words: " + wordStatMap.size() + ", frequent words (> 100): " + treeMap.size() + ", cutoff: 0.002");
        }
        return getTopWords(treeMap.keySet(), (int) (wordStatMap.size() * 0.002d));
    }

    private List<String> getTopWords(Collection<String> collection, int i) {
        int i2 = 1;
        ArrayList arrayList = new ArrayList();
        for (String str : collection) {
            int i3 = i2;
            i2++;
            if (i3 > i) {
                break;
            }
            arrayList.add(str);
        }
        return arrayList;
    }

    public Map<String, Set<String>> getBlacklist() {
        if (this.blacklist == null) {
            List<String> topWords = getTopWords();
            List<String> topWords2 = getTopWords(topWords, (int) (topWords.size() * 0.1d));
            if (this.debug) {
                System.out.println("top words: " + topWords.size() + ", super top words: " + topWords2.size() + ", cutoff: 0.1");
            }
            LinkedHashMap linkedHashMap = new LinkedHashMap();
            for (String str : topWords) {
                boolean contains = topWords2.contains(str);
                LinkedHashSet linkedHashSet = new LinkedHashSet();
                for (String str2 : this.st.getWordMap().get(str)) {
                    if (contains(str2, contains ? topWords2 : topWords)) {
                        linkedHashSet.add(str2);
                    }
                }
                linkedHashMap.put(str, linkedHashSet);
            }
            this.blacklist = linkedHashMap;
        }
        return this.blacklist;
    }

    public boolean hasBlacklist() {
        return new File(this.st.getLocation(), BLACKLIST).exists();
    }

    public void save() throws IOException {
        JDBMMap jDBMMap = new JDBMMap(this.st.getLocation().getAbsolutePath() + File.separator + "table", "blacklist", false);
        jDBMMap.putAll(getBlacklist());
        jDBMMap.commit();
        jDBMMap.compact();
        jDBMMap.dispose();
    }

    public void load() throws IOException {
        if (hasBlacklist()) {
            this.blacklist = new JDBMMap(this.st.getLocation().getAbsolutePath() + File.separator + "table", "blacklist", true);
        }
    }

    private boolean contains(String str, List<String> list) {
        boolean z = true;
        Iterator<String> it = TextTools.getWords(str).iterator();
        while (true) {
            if (!it.hasNext()) {
                break;
            }
            if (!list.contains(it.next())) {
                z = false;
                break;
            }
        }
        return z;
    }

    public static void main(String[] strArr) throws Exception {
        BlacklistHandler blacklistHandler = new BlacklistHandler(new NobleCoderTerminology("RadLex"));
        System.out.println("calculating blacklist ...");
        Map<String, Set<String>> blacklist = blacklistHandler.getBlacklist();
        System.out.println("top words: " + blacklist.keySet());
        int i = 0;
        Iterator<String> it = blacklist.keySet().iterator();
        while (it.hasNext()) {
            i += blacklist.get(it.next()).size();
        }
        System.out.println("identified " + blacklist.size() + " high frequency words with " + i + " associated terms ..");
        System.out.println("saving ..");
        blacklistHandler.save();
    }
}
