/*
 * Decompiled with CFR 0.152.
 */
package dragon.config;

import dragon.config.BasicConfigureNode;
import dragon.config.CollectionReaderConfig;
import dragon.config.ConfigUtil;
import dragon.config.ConfigureNode;
import dragon.config.LemmatiserConfig;
import dragon.config.TaggerConfig;
import dragon.nlp.tool.Lemmatiser;
import dragon.nlp.tool.Tagger;
import dragon.nlp.tool.xtract.SimpleXtract;
import dragon.onlinedb.CollectionReader;
import dragon.util.FileUtil;
import dragon.util.SortedArray;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.util.ArrayList;
import java.util.Date;

public class PhraseExtractAppConfig {
    public static void main(String[] args) {
        if (args.length != 2) {
            System.out.println("Please input two parameters: configuration xml file and phrase extraction applicaiton id");
            return;
        }
        ConfigUtil util = new ConfigUtil();
        BasicConfigureNode root = new BasicConfigureNode(args[0]);
        ConfigureNode phraseAppNode = util.getConfigureNode(root, "phraseextractapp", Integer.parseInt(args[1]));
        if (phraseAppNode == null) {
            return;
        }
        PhraseExtractAppConfig phraseApp = new PhraseExtractAppConfig();
        phraseApp.phraseExtract(phraseAppNode);
    }

    public void phraseExtract(ConfigureNode phraseAppNode) {
        int maxSpan = phraseAppNode.getInt("maxspan", 4);
        String indexFolder = phraseAppNode.getString("indexfolder");
        String phraseFile = phraseAppNode.getString("phrasefile");
        boolean indexing = phraseAppNode.getBoolean("indexing", true);
        double strength = phraseAppNode.getDouble("strength", 1.0);
        double peakZScore = phraseAppNode.getDouble("peakzscore", 1.0);
        double spread = phraseAppNode.getDouble("spread", maxSpan);
        double expandRatio = phraseAppNode.getDouble("expandratio", 0.75);
        String vobFile = phraseAppNode.getString("vobfile", null);
        int maxPhraseLength = phraseAppNode.getInt("maxphraselength", 4);
        if (indexing) {
            CollectionReaderConfig collectionConfig = new CollectionReaderConfig();
            LemmatiserConfig lemmatiserConfig = new LemmatiserConfig();
            TaggerConfig taggerConfig = new TaggerConfig();
            int lemmatiserID = phraseAppNode.getInt("lemmatiser", 0);
            Lemmatiser lemmatiser = lemmatiserConfig.getLemmatiser(phraseAppNode, lemmatiserID);
            int taggerID = phraseAppNode.getInt("tagger", 0);
            Tagger tagger = taggerConfig.getTagger(phraseAppNode, taggerID);
            String collectionIDs = phraseAppNode.getString("collectionreader");
            String[] arrCollection = collectionIDs.split(";");
            CollectionReader[] arrCollectionReader = new CollectionReader[arrCollection.length];
            for (int i = 0; i < arrCollection.length; ++i) {
                arrCollectionReader[i] = collectionConfig.getCollectionReader(phraseAppNode, Integer.parseInt(arrCollection[i]));
            }
            String wordDelimitor = this.getWordDelimitor(phraseAppNode.getString("notworddelimitor", "."));
            this.phraseExtract(indexFolder, maxSpan, arrCollectionReader, lemmatiser, tagger, wordDelimitor, strength, peakZScore, spread, expandRatio, maxPhraseLength, phraseFile, vobFile);
        } else {
            this.phraseExtract(indexFolder, maxSpan, strength, peakZScore, spread, expandRatio, maxPhraseLength, phraseFile, vobFile);
        }
    }

    public void phraseExtract(String indexFolder, int maxSpan, double strength, double peakZScore, double spread, double expandRatio, int maxPhraseLength, String phraseFile, String vobFile) {
        SimpleXtract xtract = new SimpleXtract(maxSpan, indexFolder);
        xtract.extract(strength, spread, peakZScore, expandRatio, phraseFile);
        if (vobFile != null) {
            this.generateVocabulary(phraseFile, maxPhraseLength, vobFile);
        }
    }

    public void phraseExtract(String indexFolder, int maxSpan, CollectionReader[] crs, Lemmatiser lemmatiser, Tagger tagger, String wordDelimitor, double strength, double peakZScore, double spread, double expandRatio, int maxPhraseLength, String phraseFile, String vobFile) {
        SimpleXtract xtract = new SimpleXtract(maxSpan, indexFolder);
        xtract.index(crs, tagger, lemmatiser, wordDelimitor);
        xtract.extract(strength, spread, peakZScore, expandRatio, phraseFile);
        if (vobFile != null) {
            this.generateVocabulary(phraseFile, maxPhraseLength, vobFile);
        }
    }

    public void generateVocabulary(String phraseFile, int maxPhraseLen, String vobFile) {
        this.generateVocabulary(this.postProcessExtractedPhrase(phraseFile), maxPhraseLen, vobFile);
    }

    private void generateVocabulary(ArrayList phraseList, int maxLen, String outputFile) {
        try {
            int i;
            System.out.println(new Date().toString() + " Printing vocabulary file...");
            BufferedWriter bw = FileUtil.getTextWriter(outputFile);
            int min = Integer.MAX_VALUE;
            int max = 0;
            ArrayList newList = new ArrayList(phraseList.size());
            for (i = 0; i < phraseList.size(); ++i) {
                int num = this.getTokenNum((String)phraseList.get(i));
                if (num > maxLen) continue;
                newList.add(phraseList.get(i));
                if (num > max) {
                    max = num;
                }
                if (num >= min) continue;
                min = num;
            }
            bw.write(newList.size() + "\t" + min + "\t" + max + "\n");
            for (i = 0; i < newList.size(); ++i) {
                bw.write((String)newList.get(i));
                bw.write(9);
                bw.write(String.valueOf(i));
                bw.write(10);
            }
            bw.close();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    private SortedArray postProcessExtractedPhrase(String phraseFile) {
        try {
            String line;
            System.out.println(new Date().toString() + " Postprocessing Extracted Phrases...");
            SortedArray list = new SortedArray();
            BufferedReader br = FileUtil.getTextReader(phraseFile);
            br.readLine();
            while ((line = br.readLine()) != null) {
                int pos = line.indexOf(9);
                if (pos >= 0) {
                    line = line.substring(0, pos);
                }
                if ((line = this.postProcessPhrase(line)).indexOf(32) <= 0) continue;
                list.add(line);
            }
            return list;
        }
        catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    private String postProcessPhrase(String content) {
        try {
            content = content.replace('-', ' ');
            content = content.replace('_', ' ');
            content = content.replace('\'', ' ');
            content = content.replaceAll("   ", " ");
            content = content.replaceAll("  ", " ");
            content = content.replaceAll("  ", " ");
            content = this.removePersonTitle(content);
            return content.toLowerCase();
        }
        catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    private String removePersonTitle(String content) {
        int pos = (content = content.trim()).indexOf(32);
        if (pos > 0 && content.charAt(pos - 1) == '.' && content.lastIndexOf(46, pos - 2) < 0) {
            return this.removePersonTitle(content.substring(pos + 1));
        }
        return content;
    }

    private int getTokenNum(String term) {
        int count = 0;
        for (int i = 0; i < term.length(); ++i) {
            if (!Character.isWhitespace(term.charAt(i))) continue;
            ++count;
        }
        return count + 1;
    }

    private String getWordDelimitor(String notWordDelimitor) {
        StringBuffer sb = new StringBuffer();
        String delimitors = " \r\n\t_-.;,?/\"'`:(){}!+[]><=%$#*@&^~|\\";
        if (notWordDelimitor == null && notWordDelimitor.length() == 0) {
            return delimitors;
        }
        for (int i = 0; i < delimitors.length(); ++i) {
            if (notWordDelimitor.indexOf(delimitors.charAt(i)) >= 0) continue;
            sb.append(delimitors.charAt(i));
        }
        return sb.toString();
    }
}

