/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.pipe;

import cc.mallet.pipe.Pipe;
import cc.mallet.pipe.SimpleTokenizer;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureCounter;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.IDSorter;
import cc.mallet.types.Instance;
import java.io.File;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.Arrays;

public class FeatureCountPipe
extends Pipe {
    FeatureCounter counter;
    static final long serialVersionUID = 1L;

    public FeatureCountPipe() {
        super(new Alphabet(), null);
        this.counter = new FeatureCounter(this.getDataAlphabet());
    }

    public FeatureCountPipe(Alphabet dataAlphabet, Alphabet targetAlphabet) {
        super(dataAlphabet, targetAlphabet);
        this.counter = new FeatureCounter(dataAlphabet);
    }

    @Override
    public Instance pipe(Instance instance) {
        if (instance.getData() instanceof FeatureSequence) {
            FeatureSequence features = (FeatureSequence)instance.getData();
            int position = 0;
            while (position < features.size()) {
                this.counter.increment(features.getIndexAtPosition(position));
                ++position;
            }
        } else {
            throw new IllegalArgumentException("Looking for a FeatureSequence, found a " + instance.getData().getClass());
        }
        return instance;
    }

    public Alphabet getPrunedAlphabet(int minimumCount) {
        Alphabet currentAlphabet = this.getDataAlphabet();
        Alphabet prunedAlphabet = new Alphabet();
        int feature = 0;
        while (feature < currentAlphabet.size()) {
            if (this.counter.get(feature) >= minimumCount) {
                prunedAlphabet.lookupObject(currentAlphabet.lookupIndex(feature));
            }
            ++feature;
        }
        prunedAlphabet.stopGrowth();
        return prunedAlphabet;
    }

    public void writePrunedWords(File prunedFile, int minimumCount) throws IOException {
        PrintWriter out = new PrintWriter(prunedFile);
        Alphabet currentAlphabet = this.getDataAlphabet();
        int feature = 0;
        while (feature < currentAlphabet.size()) {
            if (this.counter.get(feature) < minimumCount) {
                out.println(currentAlphabet.lookupObject(feature));
            }
            ++feature;
        }
        out.close();
    }

    public void addPrunedWordsToStoplist(SimpleTokenizer tokenizer, int minimumCount) {
        Alphabet currentAlphabet = this.getDataAlphabet();
        int feature = 0;
        while (feature < currentAlphabet.size()) {
            if (this.counter.get(feature) < minimumCount) {
                tokenizer.stop((String)currentAlphabet.lookupObject(feature));
            }
            ++feature;
        }
    }

    public void writeCommonWords(File commonFile, int totalWords) throws IOException {
        PrintWriter out = new PrintWriter(commonFile);
        Alphabet currentAlphabet = this.getDataAlphabet();
        Object[] sortedWords = new IDSorter[currentAlphabet.size()];
        int type = 0;
        while (type < currentAlphabet.size()) {
            sortedWords[type] = new IDSorter(type, this.counter.get(type));
            ++type;
        }
        Arrays.sort(sortedWords);
        int max = totalWords;
        if (currentAlphabet.size() < max) {
            max = currentAlphabet.size();
        }
        int rank = 0;
        while (rank < max) {
            int type2 = ((IDSorter)sortedWords[rank]).getID();
            out.println(currentAlphabet.lookupObject(type2));
            ++rank;
        }
        out.close();
    }
}

