package net.loomchild.maligna.filter.modifier.modify.clean;

import java.util.List;
import net.loomchild.maligna.filter.modifier.modify.split.SplitAlgorithm;
import net.loomchild.maligna.model.vocabulary.Vocabulary;
import net.loomchild.maligna.model.vocabulary.VocabularyUtil;

/* loaded from: input_file:net/loomchild/maligna/filter/modifier/modify/clean/UnifyRareWordsCleanAlgorithm.class */
public class UnifyRareWordsCleanAlgorithm extends CleanAlgorithm {
    public static final String DEFAULT_OTHER_WORD = "{OTHER}";
    private Vocabulary vocabulary;
    private SplitAlgorithm splitAlgorithm;
    private String otherWord;

    public UnifyRareWordsCleanAlgorithm(Vocabulary vocabulary, SplitAlgorithm splitAlgorithm, String str) {
        this.vocabulary = vocabulary;
        this.splitAlgorithm = splitAlgorithm;
        this.otherWord = str;
    }

    public UnifyRareWordsCleanAlgorithm(Vocabulary vocabulary) {
        this(vocabulary, VocabularyUtil.DEFAULT_TOKENIZE_ALGORITHM, DEFAULT_OTHER_WORD);
    }

    @Override // net.loomchild.maligna.filter.modifier.modify.clean.CleanAlgorithm
    public String clean(String str) {
        List<String> split = this.splitAlgorithm.split(str);
        StringBuilder sb = new StringBuilder();
        for (String str2 : split) {
            if (sb.length() > 0) {
                sb.append(" ");
            }
            if (this.vocabulary.containsWord(str2)) {
                sb.append(str2);
            } else {
                sb.append(this.otherWord);
            }
        }
        return sb.toString();
    }
}
