package dragon.nlp.tool.xtract;

import dragon.matrix.IntSuperSparseMatrix;
import dragon.nlp.Document;
import dragon.nlp.DocumentParser;
import dragon.nlp.Paragraph;
import dragon.nlp.Sentence;
import dragon.nlp.SimpleElementList;
import dragon.nlp.SimplePairList;
import dragon.nlp.Word;
import dragon.nlp.extract.EngDocumentParser;
import dragon.nlp.tool.Lemmatiser;
import dragon.nlp.tool.Tagger;
import dragon.onlinedb.Article;
import dragon.onlinedb.CollectionReader;
import java.io.File;
import java.util.Date;

/* loaded from: input_file:dragon/nlp/tool/xtract/EngWordPairIndexer.class */
public class EngWordPairIndexer implements WordPairIndexer {
    protected int maxSpan;
    protected Tagger tagger;
    protected Lemmatiser lemmatiser;
    protected WordPairGenerator pairGenerator;
    protected SentenceBase sentenceBase;
    protected IntSuperSparseMatrix[] arrPairSentLeftMatrix;
    protected IntSuperSparseMatrix[] arrPairSentRightMatrix;
    protected SimpleElementList docKeyList;
    protected SimpleElementList wordKeyList;
    protected SimplePairList pairKeyList;
    protected WordPairStatList wordpairStatList;
    protected DocumentParser parser;
    protected int flushInterval;
    protected int indexedNum;

    public EngWordPairIndexer(String str, int i, Tagger tagger, Lemmatiser lemmatiser) {
        this(str, i, tagger, lemmatiser, new EngWordPairGenerator(i));
    }

    public EngWordPairIndexer(String str, int i, Tagger tagger, Lemmatiser lemmatiser, WordPairGenerator wordPairGenerator) {
        this.maxSpan = i;
        this.tagger = tagger;
        this.lemmatiser = lemmatiser;
        this.pairGenerator = wordPairGenerator;
        this.flushInterval = 10000;
        new File(str).mkdirs();
        this.parser = new EngDocumentParser();
        this.sentenceBase = new SentenceBase(new StringBuffer().append(str).append("/sentencebase.index").toString(), new StringBuffer().append(str).append("/sentencebase.matrix").toString());
        this.docKeyList = new SimpleElementList(new StringBuffer().append(str).append("/dockey.list").toString(), true);
        this.wordKeyList = new SimpleElementList(new StringBuffer().append(str).append("/wordkey.list").toString(), true);
        this.pairKeyList = new SimplePairList(new StringBuffer().append(str).append("/pairkey.list").toString(), true);
        this.wordpairStatList = new WordPairStatList(new StringBuffer().append(str).append("/pairstat.list").toString(), i, true);
        this.arrPairSentRightMatrix = new IntSuperSparseMatrix[i];
        for (int i2 = 1; i2 <= i; i2++) {
            this.arrPairSentRightMatrix[i2 - 1] = new IntSuperSparseMatrix(new StringBuffer().append(str).append("/pairsentr").append(i2).append(".index").toString(), new StringBuffer().append(str).append("/pairsentr").append(i2).append(".matrix").toString(), false, false);
            this.arrPairSentRightMatrix[i2 - 1].setFlushInterval(Integer.MAX_VALUE);
        }
        this.arrPairSentLeftMatrix = new IntSuperSparseMatrix[i];
        for (int i3 = 1; i3 <= i; i3++) {
            this.arrPairSentLeftMatrix[i3 - 1] = new IntSuperSparseMatrix(new StringBuffer().append(str).append("/pairsentl").append(i3).append(".index").toString(), new StringBuffer().append(str).append("/pairsentl").append(i3).append(".matrix").toString(), false, false);
            this.arrPairSentLeftMatrix[i3 - 1].setFlushInterval(Integer.MAX_VALUE);
        }
    }

    @Override // dragon.nlp.tool.xtract.WordPairIndexer
    public DocumentParser getDocumentParser() {
        return this.parser;
    }

    @Override // dragon.nlp.tool.xtract.WordPairIndexer
    public void setDocumentParser(DocumentParser documentParser) {
        this.parser = documentParser;
    }

    @Override // dragon.nlp.tool.xtract.WordPairIndexer
    public void close() {
        this.sentenceBase.close();
        this.docKeyList.close();
        this.wordKeyList.close();
        this.wordpairStatList.close();
        this.pairKeyList.close();
        for (int i = 0; i < this.maxSpan; i++) {
            this.arrPairSentRightMatrix[i].finalizeData();
            this.arrPairSentRightMatrix[i].close();
        }
        for (int i2 = 0; i2 < this.maxSpan; i2++) {
            this.arrPairSentLeftMatrix[i2].finalizeData();
            this.arrPairSentLeftMatrix[i2].close();
        }
    }

    @Override // dragon.nlp.tool.xtract.WordPairIndexer
    public void flush() {
        for (int i = 0; i < this.maxSpan; i++) {
            this.arrPairSentRightMatrix[i].flush();
        }
        for (int i2 = 0; i2 < this.maxSpan; i2++) {
            this.arrPairSentLeftMatrix[i2].flush();
        }
    }

    @Override // dragon.nlp.tool.xtract.WordPairIndexer
    public void index(CollectionReader collectionReader) {
        try {
            this.indexedNum = 0;
            Article nextArticle = collectionReader.getNextArticle();
            while (nextArticle != null) {
                if (this.indexedNum > 0 && this.indexedNum % this.flushInterval == 0) {
                    flush();
                }
                indexArticle(nextArticle);
                this.indexedNum++;
                nextArticle = collectionReader.getNextArticle();
            }
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    @Override // dragon.nlp.tool.xtract.WordPairIndexer
    public boolean indexArticle(Article article) {
        try {
            if (this.docKeyList.contains(article.getKey())) {
                return true;
            }
            System.out.println(new StringBuffer().append(new Date().toString()).append(" ").append(article.getKey()).toString());
            this.docKeyList.add(article.getKey());
            Document document = new Document();
            document.addParagraph(this.parser.parseParagraph(article.getTitle()));
            document.addParagraph(this.parser.parseParagraph(article.getAbstract()));
            document.addParagraph(this.parser.parseParagraph(article.getBody()));
            for (Paragraph firstParagraph = document.getFirstParagraph(); firstParagraph != null; firstParagraph = firstParagraph.next) {
                for (Sentence firstSentence = firstParagraph.getFirstSentence(); firstSentence != null; firstSentence = firstSentence.next) {
                    indexSentence(firstSentence);
                }
            }
            return true;
        } catch (Exception e) {
            e.printStackTrace();
            return false;
        }
    }

    private boolean indexSentence(Sentence sentence) {
        try {
            if (sentence.getWordNum() < 2) {
                return true;
            }
            preprocessSentence(sentence);
            int generate = this.pairGenerator.generate(sentence);
            if (generate <= 0) {
                return true;
            }
            int addSentence = this.sentenceBase.addSentence(sentence);
            for (int i = 0; i < generate; i++) {
                WordPairStat wordPairs = this.pairGenerator.getWordPairs(i);
                wordPairs.setIndex(this.pairKeyList.add(wordPairs.getFirstWord(), wordPairs.getSecondWord()));
                this.wordpairStatList.add(wordPairs);
                for (int i2 = 1; i2 <= this.maxSpan; i2++) {
                    if (wordPairs.getFrequency(i2) > 0) {
                        this.arrPairSentRightMatrix[i2 - 1].add(wordPairs.getIndex(), addSentence, wordPairs.getFrequency(i2));
                    }
                }
                for (int i3 = 1; i3 <= this.maxSpan; i3++) {
                    if (wordPairs.getFrequency(-i3) > 0) {
                        this.arrPairSentLeftMatrix[i3 - 1].add(wordPairs.getIndex(), addSentence, wordPairs.getFrequency(-i3));
                    }
                }
            }
            return true;
        } catch (Exception e) {
            e.printStackTrace();
            return false;
        }
    }

    protected void preprocessSentence(Sentence sentence) {
        if (this.tagger != null) {
            this.tagger.tag(sentence);
        }
        Word firstWord = sentence.getFirstWord();
        while (true) {
            Word word = firstWord;
            if (word == null) {
                return;
            }
            if (word.getPOSIndex() != 1) {
                word.setLemma(word.getContent().toLowerCase());
            } else if (this.lemmatiser != null) {
                word.setLemma(this.lemmatiser.lemmatize(word.getContent(), 1));
            } else {
                word.setLemma(word.getContent().toLowerCase());
            }
            word.setIndex(this.wordKeyList.add(word.getLemma()));
            firstWord = word.next;
        }
    }
}
