/*
 * Decompiled with CFR 0.152.
 */
package dragon.ir.query;

import dragon.ir.kngbase.KnowledgeBase;
import dragon.ir.query.AbstractQueryGenerator;
import dragon.ir.query.IRQuery;
import dragon.ir.query.RelSimpleQuery;
import dragon.matrix.DoubleSparseMatrix;
import dragon.nlp.Document;
import dragon.nlp.DocumentParser;
import dragon.nlp.Phrase;
import dragon.nlp.SimpleElementList;
import dragon.nlp.Token;
import dragon.nlp.compare.IndexComparator;
import dragon.nlp.compare.WeightComparator;
import dragon.nlp.extract.PhraseExtractor;
import dragon.nlp.extract.TokenExtractor;
import dragon.onlinedb.Article;
import dragon.util.FormatUtil;
import dragon.util.SortedArray;
import java.text.DecimalFormat;
import java.util.ArrayList;

public class PhraseQEGenerator
extends AbstractQueryGenerator {
    private DoubleSparseMatrix translationMatrix;
    private SimpleElementList phraseKeyList;
    private SimpleElementList tokenKeyList;
    private PhraseExtractor phraseExtractor;
    private TokenExtractor tokenExtractor;
    private DocumentParser parser;
    private double transCoefficient;
    private int expandTermNum;
    private boolean useTitle;
    private boolean useAbt;
    private boolean useBody;
    private boolean useMeta;

    public PhraseQEGenerator(KnowledgeBase tokenKngBase, TokenExtractor tokenExtractor, double transCoefficient, int expandTermNum) {
        this(tokenKngBase, null, tokenExtractor, transCoefficient, expandTermNum);
    }

    public PhraseQEGenerator(KnowledgeBase phraseKngBase, PhraseExtractor phraseExtractor, TokenExtractor tokenExtractor, double transCoefficient, int expandTermNum) {
        this.translationMatrix = phraseKngBase.getKnowledgeMatrix();
        this.phraseKeyList = phraseKngBase.getRowKeyList();
        this.tokenKeyList = phraseKngBase.getColumnKeyList();
        this.phraseExtractor = phraseExtractor;
        if (phraseExtractor != null) {
            phraseExtractor.setSubConceptOption(false);
            phraseExtractor.setSingleAdjectiveOption(false);
            phraseExtractor.setSingleNounOption(false);
            phraseExtractor.setSingleVerbOption(false);
            this.parser = phraseExtractor.getDocumentParser();
        } else {
            this.parser = tokenExtractor.getDocumentParser();
        }
        this.tokenExtractor = tokenExtractor;
        this.transCoefficient = transCoefficient;
        this.expandTermNum = expandTermNum;
        this.useTitle = true;
        this.useAbt = false;
        this.useBody = false;
        this.useMeta = false;
    }

    public void initialize(boolean useTitle, boolean useAbt, boolean useBody, boolean useMeta) {
        this.useTitle = useTitle;
        this.useAbt = useAbt;
        this.useBody = useBody;
        this.useMeta = useMeta;
    }

    @Override
    public IRQuery generate(Article topic) {
        return new RelSimpleQuery(this.genQueryString(this.genQuery(topic)));
    }

    private String genQueryString(ArrayList queryTerms) {
        StringBuffer buf = new StringBuffer();
        DecimalFormat df = FormatUtil.getNumericFormat(1, 3);
        for (int i = 0; i < queryTerms.size(); ++i) {
            Token curToken = (Token)queryTerms.get(i);
            buf.append("T(");
            buf.append(df.format(curToken.getWeight()));
            buf.append(",TERM=");
            buf.append(curToken.getName());
            buf.append(") ");
        }
        return buf.toString().trim();
    }

    private ArrayList genQuery(Article article) {
        Token oldToken;
        Token curToken;
        int i;
        SortedArray tokenList = new SortedArray();
        SortedArray phraseList = new SortedArray();
        Document doc = this.getDocument(article);
        ArrayList list = this.tokenExtractor.extractFromDoc(doc);
        int total = 0;
        for (i = 0; i < list.size(); ++i) {
            curToken = (Token)list.get(i);
            total += curToken.getFrequency();
            if (tokenList.add(curToken)) continue;
            oldToken = (Token)tokenList.get(tokenList.insertedPos());
            oldToken.addFrequency(curToken.getFrequency());
        }
        for (i = 0; i < tokenList.size(); ++i) {
            curToken = (Token)tokenList.get(i);
            curToken.setWeight((double)curToken.getFrequency() / (double)total);
        }
        int termNum = tokenList.size() + this.expandTermNum;
        if (this.phraseExtractor != null) {
            list = this.phraseExtractor.extractFromDoc(doc);
            for (i = 0; i < list.size(); ++i) {
                Phrase curPhrase = (Phrase)list.get(i);
                if (curPhrase.getStartingWord().equals(curPhrase.getEndingWord()) || phraseList.add(curToken = new Token(curPhrase.getName(), -1, curPhrase.getFrequency()))) continue;
                oldToken = (Token)phraseList.get(phraseList.insertedPos());
                oldToken.addFrequency(curToken.getFrequency());
            }
            list = this.translation(phraseList);
        } else {
            list = this.translation(tokenList);
        }
        if (list != null) {
            for (i = 0; i < tokenList.size(); ++i) {
                curToken = (Token)tokenList.get(i);
                curToken.setWeight(curToken.getWeight() * (1.0 - this.transCoefficient));
            }
            for (i = 0; i < list.size(); ++i) {
                Token newToken = (Token)list.get(i);
                newToken.setWeight(newToken.getWeight() * this.transCoefficient);
                if (tokenList.add(newToken)) continue;
                oldToken = (Token)tokenList.get(tokenList.insertedPos());
                oldToken.setWeight(oldToken.getWeight() + newToken.getWeight());
            }
            if (tokenList.size() > termNum) {
                tokenList.setComparator(new WeightComparator(true));
                while (tokenList.size() > termNum) {
                    tokenList.remove(tokenList.size() - 1);
                }
                double sum = 0.0;
                for (i = 0; i < tokenList.size(); ++i) {
                    curToken = (Token)tokenList.get(i);
                    sum += curToken.getWeight();
                }
                for (i = 0; i < tokenList.size(); ++i) {
                    curToken = (Token)tokenList.get(i);
                    curToken.setWeight(curToken.getWeight() / sum);
                }
            }
        }
        return tokenList;
    }

    private Document getDocument(Article article) {
        Document doc = new Document();
        if (this.useTitle) {
            doc.addParagraph(this.parser.parseParagraph(article.getTitle()));
        }
        if (this.useAbt) {
            doc.addParagraph(this.parser.parseParagraph(article.getAbstract()));
        }
        if (this.useBody) {
            doc.addParagraph(this.parser.parseParagraph(article.getBody()));
        }
        if (this.useMeta) {
            doc.addParagraph(this.parser.parseParagraph(article.getMeta()));
        }
        return doc;
    }

    private ArrayList translation(ArrayList phraseList) {
        Token curToken;
        int i;
        int total = 0;
        ArrayList<Token> newList = new ArrayList<Token>();
        for (i = 0; i < phraseList.size(); ++i) {
            curToken = (Token)phraseList.get(i);
            int index = this.phraseKeyList.search(curToken.getName());
            if (index < 0 || this.translationMatrix.getNonZeroNumInRow(index) <= 0) continue;
            curToken.setIndex(index);
            total += curToken.getFrequency();
            newList.add(curToken);
        }
        if (newList.size() == 0) {
            return null;
        }
        SortedArray tokenList = new SortedArray(new IndexComparator());
        for (i = 0; i < newList.size(); ++i) {
            curToken = (Token)newList.get(i);
            int[] arrIndex = this.translationMatrix.getNonZeroColumnsInRow(curToken.getIndex());
            double[] arrProb = this.translationMatrix.getNonZeroDoubleScoresInRow(curToken.getIndex());
            for (int j = 0; j < arrIndex.length; ++j) {
                Token newToken = new Token(this.tokenKeyList.search(arrIndex[j]), arrIndex[j], 0);
                newToken.setWeight((double)curToken.getFrequency() / (double)total * arrProb[j]);
                if (tokenList.add(newToken)) continue;
                Token oldToken = (Token)tokenList.get(tokenList.insertedPos());
                oldToken.setWeight(oldToken.getWeight() + newToken.getWeight());
            }
        }
        return tokenList;
    }
}

