/*
 * Decompiled with CFR 0.152.
 */
package dragon.nlp.extract;

import dragon.nlp.Document;
import dragon.nlp.DocumentParser;
import dragon.nlp.Paragraph;
import dragon.nlp.Sentence;
import dragon.nlp.Word;
import java.util.ArrayList;

public class EngDocumentParser
implements DocumentParser {
    public static final String defParaDelimitor = "\n\n";
    public static final String defSentDelimitor = ".;?!";
    public static final String defWordDelimitor = " \r\n\t_-.;,?/\"'`:(){}!+[]><=%$#*@&^~|\\";
    public static final String punctuations = "\r\n\t_-.;,?/\"'`:(){}!+[]><=%$#*@&^~|\\";
    protected String wordDelimitor;
    protected String paraDelimitor = "\n\n";
    protected String sentDelimitor = ".;?!";

    public EngDocumentParser() {
        this(null);
    }

    public EngDocumentParser(String wordDelimitor) {
        this.wordDelimitor = wordDelimitor == null ? defWordDelimitor : wordDelimitor;
    }

    @Override
    public Document parse(String doc) {
        if (doc == null || doc.length() == 0) {
            return null;
        }
        Document newDoc = new Document();
        doc = doc.replaceAll("\r\n", "\n");
        doc = doc.replace('\r', '\n');
        try {
            int start = 0;
            while (start < doc.length() - 1) {
                int end = doc.indexOf(this.paraDelimitor, start);
                if (end > 0) {
                    if (end > start) {
                        newDoc.addParagraph(this.parseParagraph(doc.substring(start, end)));
                    }
                    start = end + 2;
                    continue;
                }
                newDoc.addParagraph(this.parseParagraph(doc.substring(start)));
                start = doc.length();
            }
            return newDoc;
        }
        catch (Exception e) {
            e.printStackTrace();
            return null;
        }
    }

    @Override
    public Paragraph parseParagraph(String paragraph) {
        int i = 0;
        int j = 0;
        int start = 0;
        if (paragraph == null || paragraph.length() == 0) {
            return null;
        }
        Paragraph newPara = new Paragraph();
        if (this.sentDelimitor.indexOf(46) < 0) {
            this.sentDelimitor = "." + this.sentDelimitor;
        }
        paragraph = paragraph.replace('\n', ' ');
        int len = (paragraph = paragraph.trim()).length();
        if (len == 0) {
            return null;
        }
        if (this.sentDelimitor.indexOf(paragraph.charAt(len - 1)) < 0) {
            paragraph = paragraph + ".";
            ++len;
        }
        while (start < len) {
            String sent;
            int min = -1;
            j = start;
            while (min == -1 && j < len) {
                min = paragraph.indexOf(46, j);
                if (min >= 0) {
                    j = min + 1;
                    if (this.isSentencePeriod(min, paragraph)) continue;
                    min = -1;
                    continue;
                }
                j = len;
            }
            for (i = 0; i < this.sentDelimitor.length(); ++i) {
                j = paragraph.indexOf(this.sentDelimitor.charAt(i), start);
                if (j < 0 || this.sentDelimitor.charAt(i) == '.' || min >= 0 && j >= min) continue;
                min = j;
            }
            if (min > 0) {
                sent = paragraph.substring(start, min + 1);
                newPara.addSentence(this.parseSentence(sent));
                start = min + 1;
                continue;
            }
            sent = paragraph.substring(start) + ".";
            newPara.addSentence(this.parseSentence(sent));
            start = len;
        }
        return newPara;
    }

    @Override
    public Sentence parseSentence(String sentence) {
        if (sentence == null || sentence.length() == 0) {
            return null;
        }
        int flag = 0;
        int start = 0;
        Sentence newSent = new Sentence();
        boolean checkPeriod = this.wordDelimitor.indexOf(46) < 0;
        boolean checkApostrophes = this.wordDelimitor.indexOf(39) < 0;
        int len = (sentence = sentence.trim()).length();
        if (len <= 0) {
            return null;
        }
        if (this.sentDelimitor.indexOf(sentence.charAt(len - 1)) >= 0) {
            newSent.setPunctuation(sentence.charAt(len - 1));
        } else {
            sentence = sentence + ".";
            newSent.setPunctuation('.');
            ++len;
        }
        for (int i = 0; i < len - 1; ++i) {
            int ret;
            char ch = sentence.charAt(i);
            if (checkPeriod && ch == '.') {
                if (!this.isPeriodAsWord(i, start, sentence)) {
                    if (flag >= 2) {
                        newSent.addWord(this.parseWord(sentence.substring(start, i)));
                    }
                    flag = 2;
                    start = i;
                    continue;
                }
            } else if (checkApostrophes && ch == '\'' && (ret = this.isApostrophesAsWord(i, start, sentence)) < 2) {
                if (flag >= 2) {
                    newSent.addWord(this.parseWord(sentence.substring(start, i)));
                }
                flag = 2;
                start = i;
                continue;
            }
            if (ch == ' ') {
                if (flag >= 2) {
                    newSent.addWord(this.parseWord(sentence.substring(start, i)));
                }
                flag = 1;
                continue;
            }
            if (this.wordDelimitor.indexOf(ch) >= 0) {
                if (flag >= 2) {
                    newSent.addWord(this.parseWord(sentence.substring(start, i)));
                }
                start = i;
                flag = 2;
                continue;
            }
            if (flag == 2) {
                newSent.addWord(this.parseWord(sentence.substring(start, i)));
                start = i;
            } else if (flag == 1 || flag == 0) {
                start = i;
            }
            flag = 3;
        }
        if (flag >= 2 && len - 1 > start) {
            newSent.addWord(this.parseWord(sentence.substring(start, len - 1)));
        }
        return newSent;
    }

    protected Word parseWord(String content) {
        Word cur = new Word(content);
        if (this.isNumber(content)) {
            cur.setType(2);
        } else if (content.length() == 1 && punctuations.indexOf(content) >= 0) {
            cur.setType(4);
        }
        return cur;
    }

    @Override
    public ArrayList parseTokens(String content) {
        if (content == null) {
            return null;
        }
        if ((content = content.trim()).length() == 0) {
            return null;
        }
        int len = content.length();
        int flag = 0;
        int start = 0;
        ArrayList<String> tokenList = new ArrayList<String>();
        boolean checkPeriod = this.wordDelimitor.indexOf(46) < 0;
        boolean checkApostrophes = this.wordDelimitor.indexOf(39) < 0;
        for (int i = 0; i < len; ++i) {
            int ret;
            char ch = content.charAt(i);
            if (checkPeriod && ch == '.') {
                if (!this.isPeriodAsToken(i, start, content)) {
                    if (flag >= 2) {
                        tokenList.add(content.substring(start, i));
                    }
                    flag = 1;
                    continue;
                }
            } else if (checkApostrophes && ch == '\'' && (ret = this.isApostrophesAsWord(i, start, content)) < 2) {
                if (flag >= 2) {
                    tokenList.add(content.substring(start, i));
                }
                flag = 1;
                continue;
            }
            if (this.wordDelimitor.indexOf(ch) >= 0) {
                if (flag >= 2) {
                    tokenList.add(content.substring(start, i));
                }
                flag = 1;
                continue;
            }
            if (flag != 1 && flag != 0) continue;
            start = i;
            flag = 2;
        }
        if (flag >= 2) {
            tokenList.add(content.substring(start, len));
        }
        return tokenList;
    }

    protected boolean isPeriodAsWord(int periodPos, int startPos, String context) {
        char ch;
        int len = context.length();
        if (periodPos == startPos || !Character.isLetter(context.charAt(periodPos - 1)) || context.charAt(periodPos - 1) > '\u00ff') {
            return false;
        }
        if (periodPos - 2 > 0 && context.charAt(periodPos - 2) == '.') {
            return true;
        }
        if (periodPos == startPos + 1) {
            return true;
        }
        if (periodPos - startPos >= 4) {
            return false;
        }
        return !(periodPos < len - 2 ? (ch = context.charAt(periodPos + 1)) == '\r' || ch == '\n' : periodPos == len - 1);
    }

    protected boolean isPeriodAsToken(int periodPos, int startPos, String context) {
        int len = context.length();
        if (periodPos == startPos || !Character.isLetter(context.charAt(periodPos - 1)) || context.charAt(periodPos - 1) > '\u00ff') {
            return false;
        }
        if (periodPos > 2 && context.charAt(periodPos - 2) == '.') {
            return true;
        }
        if (periodPos - startPos >= 4) {
            return false;
        }
        if (periodPos < len - 1) {
            return Character.isLetter(context.charAt(periodPos + 1));
        }
        return false;
    }

    protected int isApostrophesAsWord(int apoPos, int startPos, String context) {
        if (apoPos == 0 || context.charAt(apoPos - 1) == ' ' || apoPos == context.length() - 1) {
            return 0;
        }
        char ch = context.charAt(apoPos + 1);
        if (ch == ' ' || !Character.isLetter(ch)) {
            return 0;
        }
        if (ch == 's' && apoPos + 2 < context.length() && context.charAt(apoPos + 2) == ' ') {
            return 1;
        }
        return 2;
    }

    protected boolean isSentencePeriod(int pos, String context) {
        int start;
        if (pos == 0) {
            return false;
        }
        if (context.charAt(pos - 1) > '\u00ff') {
            return true;
        }
        int len = context.length();
        if (pos < len - 1 && context.charAt(pos + 1) != ' ') {
            return false;
        }
        if (Character.isUpperCase(context.charAt(pos - 1)) && (pos == 1 || Character.isWhitespace(context.charAt(pos - 2)))) {
            return false;
        }
        if (pos - 2 > 0 && context.charAt(pos - 2) == '.') {
            return false;
        }
        return pos >= len - 2 || this.isUpper(context.charAt(pos + 2)) || (start = context.lastIndexOf(32, pos)) < 0 || pos - start > 5 || !this.isUpper(context.charAt(start + 1));
    }

    private boolean isUpper(char ch) {
        return ch < 'a' || ch > 'z';
    }

    protected boolean isNumber(String str) {
        try {
            Double.parseDouble(str);
            return true;
        }
        catch (Exception e) {
            return false;
        }
    }
}

