/*
 * Decompiled with CFR 0.152.
 */
package dragon.nlp.tool.xtract;

import dragon.nlp.tool.xtract.WordPairStat;
import dragon.nlp.tool.xtract.WordPairStatList;
import dragon.util.FileUtil;
import java.io.BufferedReader;
import java.util.ArrayList;
import java.util.Date;

public class WordPairFilter {
    private String workDir;
    private int maxSpan;
    private double minStrength;
    private double minSpread;
    private double minZScore;

    public WordPairFilter(String workDir, int maxSpan, double minStrength, double minSpread, double minZScore) {
        this.minStrength = minStrength;
        this.minSpread = minSpread;
        this.minZScore = minZScore;
        this.workDir = workDir;
        this.maxSpan = maxSpan;
    }

    public WordPairStat[] execute() {
        WordPairStatList list = new WordPairStatList(this.workDir + "/pairstat.list", this.maxSpan, false);
        int wordNum = this.readWordNum();
        double[][] arrWordStat = this.computeWordStat(wordNum, list);
        return this.filterWordPair(arrWordStat, list);
    }

    private double[][] computeWordStat(int wordNum, WordPairStatList list) {
        int i;
        System.out.println(new Date().toString() + " Computing Word Stat...");
        double[][] arrWordStat = new double[wordNum][3];
        for (i = 0; i < wordNum; ++i) {
            for (int j = 0; j < 3; ++j) {
                arrWordStat[i][j] = 0.0;
            }
        }
        for (i = 0; i < list.size(); ++i) {
            WordPairStat curPair = list.get(i);
            double[] dArray = arrWordStat[curPair.getFirstWord()];
            dArray[0] = dArray[0] + (double)curPair.getTotalFrequency();
            double[] dArray2 = arrWordStat[curPair.getFirstWord()];
            dArray2[1] = dArray2[1] + (double)(curPair.getTotalFrequency() * curPair.getTotalFrequency());
            double[] dArray3 = arrWordStat[curPair.getFirstWord()];
            dArray3[2] = dArray3[2] + 1.0;
            double[] dArray4 = arrWordStat[curPair.getSecondWord()];
            dArray4[0] = dArray4[0] + (double)curPair.getTotalFrequency();
            double[] dArray5 = arrWordStat[curPair.getSecondWord()];
            dArray5[1] = dArray5[1] + (double)(curPair.getTotalFrequency() * curPair.getTotalFrequency());
            double[] dArray6 = arrWordStat[curPair.getSecondWord()];
            dArray6[2] = dArray6[2] + 1.0;
        }
        for (i = 0; i < wordNum; ++i) {
            if (!(arrWordStat[i][2] > 0.0)) continue;
            arrWordStat[i][0] = arrWordStat[i][0] / arrWordStat[i][2];
            arrWordStat[i][1] = Math.sqrt(arrWordStat[i][1] / arrWordStat[i][2] - Math.pow(arrWordStat[i][0], 2.0));
        }
        return arrWordStat;
    }

    private WordPairStat[] filterWordPair(double[][] arrWordStat, WordPairStatList list) {
        int i;
        ArrayList<WordPairStat> selectedList = new ArrayList<WordPairStat>();
        for (i = 0; i < list.size(); ++i) {
            WordPairStat filteredPair;
            WordPairStat curPair;
            double strength;
            if (i % 10000 == 0) {
                System.out.println(new Date().toString() + " processed: " + i);
            }
            if ((strength = arrWordStat[(curPair = list.get(i)).getFirstWord()][1] == 0.0 ? 0.0 : ((double)curPair.getTotalFrequency() - arrWordStat[curPair.getFirstWord()][0]) / arrWordStat[curPair.getFirstWord()][1]) < this.minStrength) {
                strength = arrWordStat[curPair.getSecondWord()][1] == 0.0 ? 0.0 : ((double)curPair.getTotalFrequency() - arrWordStat[curPair.getSecondWord()][0]) / arrWordStat[curPair.getSecondWord()][1];
            }
            if (!(strength >= this.minStrength) || (filteredPair = this.filterWordPair(curPair)) == null) continue;
            selectedList.add(filteredPair);
        }
        WordPairStat[] arrSelected = new WordPairStat[selectedList.size()];
        for (i = 0; i < arrSelected.length; ++i) {
            arrSelected[i] = (WordPairStat)selectedList.get(i);
        }
        return arrSelected;
    }

    private WordPairStat filterWordPair(WordPairStat pair) {
        int freq;
        int i;
        double sum = 0.0;
        double squareSum = 0.0;
        for (i = 1; i <= this.maxSpan; ++i) {
            freq = pair.getFrequency(i);
            sum += (double)freq;
            squareSum += (double)(freq * freq);
            freq = pair.getFrequency(-i);
            sum += (double)freq;
            squareSum += (double)(freq * freq);
        }
        double mean = sum / 2.0 / (double)this.maxSpan;
        double spread = squareSum / 2.0 / (double)this.maxSpan - mean * mean;
        if (spread < this.minSpread) {
            return null;
        }
        boolean found = false;
        spread = Math.sqrt(spread);
        for (i = 1; i <= this.maxSpan; ++i) {
            freq = pair.getFrequency(i);
            if (((double)freq - mean) / spread >= this.minZScore) {
                found = true;
            } else {
                pair.addFrequency(i, -freq);
            }
            freq = pair.getFrequency(-i);
            if (((double)freq - mean) / spread >= this.minZScore) {
                found = true;
                continue;
            }
            pair.addFrequency(-i, -freq);
        }
        if (found) {
            return pair;
        }
        return null;
    }

    private int readWordNum() {
        try {
            BufferedReader br = FileUtil.getTextReader(this.workDir + "/wordkey.list");
            int num = Integer.parseInt(br.readLine());
            br.close();
            return num;
        }
        catch (Exception e) {
            e.printStackTrace();
            return 0;
        }
    }
}

