package net.clementlevallois.umigon.ngram.ops;

import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import net.clementlevallois.utils.Multiset;

/* loaded from: input_file:net/clementlevallois/umigon/ngram/ops/NGramDuplicatesCleaner.class */
public class NGramDuplicatesCleaner {
    Set<String> stopWords;
    Multiset<String> multisetWords;
    Iterator<Map.Entry<String, Integer>> itFreqList;
    Map.Entry<String, Integer> entry;
    Set<String> wordsToBeRemoved;
    String currWord;
    Set<String> setCurrentSubNGrams;
    Iterator<String> setCurrentSubNGramsIterator;
    String innerNGram;
    String[] termsInBigram;

    public NGramDuplicatesCleaner(Set<String> set) {
        this.stopWords = set;
    }

    public NGramDuplicatesCleaner() {
        this.stopWords = new HashSet();
    }

    public Map<String, Integer> removeDuplicates(Map<String, Integer> map, int i, boolean z) {
        float f = map.keySet().size() < 500 ? 2.0f : 1.5f;
        this.multisetWords = new Multiset<>();
        this.wordsToBeRemoved = new HashSet();
        this.itFreqList = map.entrySet().iterator();
        while (this.itFreqList.hasNext()) {
            this.entry = this.itFreqList.next();
            this.entry.getKey();
            if (this.entry.getValue().intValue() == 1 && z) {
                this.itFreqList.remove();
            }
        }
        for (int i2 = i - 1; i2 > 0; i2--) {
            this.itFreqList = map.entrySet().iterator();
            while (this.itFreqList.hasNext()) {
                this.entry = this.itFreqList.next();
                this.currWord = this.entry.getKey().trim();
                if (this.currWord.chars().filter(i3 -> {
                    return i3 == 32;
                }).count() == i2) {
                    if (i2 == 1) {
                        this.termsInBigram = this.currWord.split(" ");
                        String trim = this.termsInBigram[0].trim();
                        String trim2 = this.termsInBigram[1].trim();
                        if (this.stopWords.contains(trim) && this.stopWords.contains(trim2)) {
                            this.wordsToBeRemoved.add(this.currWord);
                        }
                        if (this.stopWords.contains(trim)) {
                            this.wordsToBeRemoved.add(trim);
                        }
                        if (this.stopWords.contains(trim2)) {
                            this.wordsToBeRemoved.add(trim2);
                        }
                        Integer num = map.get(trim);
                        Integer num2 = map.get(trim2);
                        if (num != null && num.intValue() < this.entry.getValue().intValue() * f) {
                            this.wordsToBeRemoved.add(trim.trim());
                        }
                        if (num2 != null && num2.intValue() < this.entry.getValue().intValue() * f) {
                            this.wordsToBeRemoved.add(trim2.trim());
                        }
                    } else {
                        this.setCurrentSubNGrams = NGramFinder.ngramsFinderJustAGivenLength(i2, this.currWord).getElementSet();
                        this.setCurrentSubNGramsIterator = this.setCurrentSubNGrams.iterator();
                        while (this.setCurrentSubNGramsIterator.hasNext()) {
                            this.innerNGram = this.setCurrentSubNGramsIterator.next().trim();
                            if (map.keySet().contains(this.innerNGram) && map.get(this.innerNGram).intValue() < this.entry.getValue().intValue() * f) {
                                if (!this.stopWords.contains(this.currWord.split(" ")[0])) {
                                    this.wordsToBeRemoved.add(this.innerNGram);
                                }
                            }
                        }
                    }
                }
            }
        }
        this.itFreqList = map.entrySet().iterator();
        while (this.itFreqList.hasNext()) {
            this.entry = this.itFreqList.next();
            this.currWord = this.entry.getKey();
            if (this.wordsToBeRemoved.add(this.currWord) & (!this.stopWords.contains(this.currWord))) {
                this.multisetWords.addSeveral(this.entry.getKey().trim(), this.entry.getValue());
            }
        }
        return this.multisetWords.getInternalMap();
    }
}
