/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner;

import com.megginson.sax.XMLWriter;
import edu.northwestern.at.morphadorner.MorphAdornerSettings;
import edu.northwestern.at.morphadorner.PendingElement;
import edu.northwestern.at.morphadorner.SentenceAndWordNumber;
import edu.northwestern.at.morphadorner.WordAttributeNames;
import edu.northwestern.at.morphadorner.XMLWriterState;
import edu.northwestern.at.utils.FileNameUtils;
import edu.northwestern.at.utils.MapFactory;
import edu.northwestern.at.utils.QueueStack;
import edu.northwestern.at.utils.SetFactory;
import edu.northwestern.at.utils.SortedArrayList;
import edu.northwestern.at.utils.StringUtils;
import edu.northwestern.at.utils.corpuslinguistics.partsofspeech.PartOfSpeechTags;
import edu.northwestern.at.utils.corpuslinguistics.sentencemelder.XMLSentenceMelder;
import edu.northwestern.at.utils.math.ArithUtils;
import edu.northwestern.at.utils.xml.ExtendedXMLFilterImpl;
import java.text.NumberFormat;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.AttributesImpl;

/*
 * This class specifies class file version 49.0 but uses Java 6 signatures.  Assumed Java 6.
 */
public class IDFixerFilter
extends ExtendedXMLFilterImpl {
    protected static final NumberFormat ID_FORMATTER = NumberFormat.getInstance();
    protected static final NumberFormat PAGE_FORMATTER = NumberFormat.getInstance();
    protected static final NumberFormat WORD_FORMATTER = NumberFormat.getInstance();
    protected int wordOrdinal = 0;
    protected String lastID = "";
    protected String id = "";
    protected String idAttrName = WordAttributeNames.id;
    protected String baseFileName;
    protected PartOfSpeechTags posTags;
    protected String elementURI = null;
    protected boolean outputWhitespace = true;
    protected boolean outputNonredundantAttributesOnly = false;
    protected boolean outputNonredundantTokenAttribute = false;
    protected boolean outputSentenceBoundaryMilestones = false;
    protected boolean outputPseudoPageBoundaryMilestones = false;
    protected int pseudoPageSize = 500;
    protected int pseudoPageCount = 0;
    protected int pseudoPageWordCount = 0;
    protected boolean pseudoPageStarted = false;
    protected int emittedWordCount = 0;
    protected XMLSentenceMelder sentenceMelder;
    protected boolean isFirstWord = false;
    protected PendingElement pendingWordElement = null;
    protected Map<Integer, Integer> splitWords;
    protected Map<Integer, Integer> splitWordsCopy;
    protected QueueStack<String> foreignStack = new QueueStack();
    protected QueueStack<XMLWriterState> jumpStack = new QueueStack();
    protected QueueStack<String> divStack = new QueueStack();
    protected Set<String> pseudoPageContainerDivTypes = SetFactory.createNewSet();
    protected SortedArrayList<SentenceAndWordNumber> sortedWords;
    protected XMLWriter writer;
    protected int totalWordsToEmit = 0;
    protected int pageNumber = 0;
    protected int wordNumberWithinPage = 0;
    protected int idSpacing = 10;
    protected MorphAdornerSettings.XMLIDType idType = MorphAdornerSettings.XMLIDType.READING_CONTEXT_ORDER;
    protected boolean outputWordOrdinal = true;
    protected static Map<String, String> languageTags;

    public IDFixerFilter(XMLReader reader, PartOfSpeechTags posTags, String outFile, int maxID, SortedArrayList<SentenceAndWordNumber> sortedWords, Map<Integer, Integer> splitWords, int totalWords, int totalPageBreaks) {
        super(reader);
        this.idAttrName = MorphAdornerSettings.xgOptions.getIdArgumentName();
        this.outputNonredundantAttributesOnly = MorphAdornerSettings.outputNonredundantAttributesOnly;
        this.outputNonredundantTokenAttribute = MorphAdornerSettings.outputNonredundantTokenAttribute;
        this.outputSentenceBoundaryMilestones = MorphAdornerSettings.outputSentenceBoundaryMilestones;
        this.outputWordOrdinal = MorphAdornerSettings.outputWordOrdinal;
        this.outputPseudoPageBoundaryMilestones = MorphAdornerSettings.outputPseudoPageBoundaryMilestones;
        this.pseudoPageSize = MorphAdornerSettings.pseudoPageSize;
        String[] divTypes = StringUtils.makeTokenArray(MorphAdornerSettings.pseudoPageContainerDivTypes);
        for (int i = 0; i < divTypes.length; ++i) {
            this.pseudoPageContainerDivTypes.add(divTypes[i].toLowerCase());
        }
        this.outputWhitespace = MorphAdornerSettings.outputWhitespaceElements;
        this.sortedWords = sortedWords;
        this.setSplitWords(splitWords);
        this.setPosTags(posTags);
        this.setIDFormat(outFile, maxID, totalPageBreaks);
        this.totalWordsToEmit = totalWords;
        this.elementURI = null;
        sortedWords = new SortedArrayList();
    }

    @Override
    public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
        String teiform;
        AttributesImpl newAtts = new AttributesImpl(atts);
        boolean outputNow = true;
        String p = atts.getValue(WordAttributeNames.p);
        this.foreignStack.push(this.getForeignLanguageTag(qName, atts));
        if (p != null && p.length() > 0) {
            this.setAttributeValue(newAtts, WordAttributeNames.p, "\\" + this.baseFileName + p);
        }
        if ((teiform = atts.getValue("TEIform")) != null && teiform.length() > 0) {
            this.removeAttribute(newAtts, "TEIform");
        }
        if (qName.equals("pb")) {
            ++this.pageNumber;
            this.wordNumberWithinPage = 0;
        }
        if (qName.equals("w")) {
            boolean idChanged;
            outputNow = false;
            this.id = atts.getValue(this.idAttrName);
            String tok = atts.getValue(WordAttributeNames.tok);
            String spe = atts.getValue(WordAttributeNames.spe);
            String pos = atts.getValue(WordAttributeNames.pos);
            String eos = atts.getValue(WordAttributeNames.eos);
            String lem = atts.getValue(WordAttributeNames.lem);
            String reg = atts.getValue(WordAttributeNames.reg);
            String part = atts.getValue(WordAttributeNames.part);
            if ((tok = StringUtils.replaceAll(tok, "\ue501", "-")).indexOf("\ue503") >= 0) {
                tok = StringUtils.replaceAll(tok, "\ue503", "");
                spe = StringUtils.replaceAll(spe, "\ue503", "");
                lem = StringUtils.replaceAll(lem, "\ue503", "");
            }
            int thisID = Integer.parseInt(this.id);
            boolean bl = idChanged = !this.id.equals(this.lastID);
            if (this.pendingWordElement != null && idChanged) {
                this.emitWordElement(this.pendingWordElement.getURI(), this.pendingWordElement.getLocalName(), this.pendingWordElement.getQName(), this.pendingWordElement.getAttributes(), this.pendingWordElement.getText(), true, false);
                this.pendingWordElement = null;
            }
            if (this.splitWords.containsKey(thisID)) {
                int nParts = this.splitWordsCopy.get(thisID);
                part = nParts == this.splitWords.get(thisID) ? "I" : (nParts <= 1 ? "F" : "M");
                this.splitWordsCopy.put(thisID, --nParts);
            } else {
                part = "N";
            }
            ++this.wordNumberWithinPage;
            String idString = this.baseFileName + "-";
            switch (this.idType) {
                case READING_CONTEXT_ORDER: {
                    idString = idString + ID_FORMATTER.format(thisID * this.idSpacing);
                    break;
                }
                case WORD_WITHIN_PAGE_BLOCK: {
                    idString = idString + PAGE_FORMATTER.format(this.pageNumber) + "-" + WORD_FORMATTER.format(this.wordNumberWithinPage * this.idSpacing);
                }
            }
            if (!part.equals("N")) {
                int partNumber = this.splitWords.get(thisID) - this.splitWordsCopy.get(thisID);
                idString = idString + "." + partNumber;
            }
            this.setAttributeValue(newAtts, this.idAttrName, idString);
            if (idChanged) {
                ++this.wordOrdinal;
            }
            if (this.outputWordOrdinal) {
                this.setAttributeValue(newAtts, WordAttributeNames.ord, this.wordOrdinal + "");
            }
            this.lastID = this.id;
            if (!(this.foreignStack.isEmpty() || this.foreignStack.peek().length() <= 0 || this.posTags.isNumberTag(pos) || this.posTags.isSymbolTag(pos) || this.posTags.isPunctuationTag(pos))) {
                pos = this.foreignStack.peek();
                lem = spe;
            }
            if (spe == null) {
                spe = tok;
            }
            if (pos == null) {
                pos = spe;
            }
            if (lem == null) {
                lem = spe;
            }
            if (eos == null) {
                eos = "0";
            }
            if (reg == null) {
                reg = spe;
            }
            this.setAttributeValue(newAtts, WordAttributeNames.eos, eos);
            this.setAttributeValue(newAtts, WordAttributeNames.lem, lem);
            this.setAttributeValue(newAtts, WordAttributeNames.pos, pos);
            this.setAttributeValue(newAtts, WordAttributeNames.reg, reg);
            this.setAttributeValue(newAtts, WordAttributeNames.spe, spe);
            this.setAttributeValue(newAtts, WordAttributeNames.tok, tok);
            this.setAttributeValue(newAtts, WordAttributeNames.part, part);
            if (this.outputNonredundantAttributesOnly) {
                if (eos.equals("0")) {
                    this.removeAttribute(newAtts, WordAttributeNames.eos);
                }
                if (spe.equals(tok)) {
                    this.removeAttribute(newAtts, WordAttributeNames.spe);
                }
                if (lem.equals(spe)) {
                    this.removeAttribute(newAtts, WordAttributeNames.lem);
                }
                if (pos.equals(spe)) {
                    this.removeAttribute(newAtts, WordAttributeNames.pos);
                }
                if (reg.equals(spe)) {
                    this.removeAttribute(newAtts, WordAttributeNames.reg);
                }
            }
            if (idChanged) {
                if (this.outputWhitespace) {
                    if (this.sentenceMelder.shouldOutputBlank(spe, this.isFirstWord)) {
                        this.sentenceMelder.outputBlank();
                    }
                    this.sentenceMelder.processWord(spe);
                }
                this.isFirstWord = eos.equals("1");
            }
            this.pendingWordElement = new PendingElement(uri, localName, qName, newAtts);
        } else {
            if (this.pendingWordElement != null) {
                this.emitWordElement(this.pendingWordElement.getURI(), this.pendingWordElement.getLocalName(), this.pendingWordElement.getQName(), this.pendingWordElement.getAttributes(), this.pendingWordElement.getText(), true, false);
                this.pendingWordElement = null;
            }
            if (qName.equalsIgnoreCase("div")) {
                String divType = atts.getValue("type");
                if (divType == null || divType.length() == 0) {
                    divType = "*div";
                }
                this.divStack.push(divType.toLowerCase());
            } else if (!qName.equalsIgnoreCase("foreign") && !MorphAdornerSettings.xgOptions.isSoftTag(qName)) {
                if (MorphAdornerSettings.xgOptions.isJumpTag(qName)) {
                    this.jumpStack.push(new XMLWriterState(this.isFirstWord, this.sentenceMelder));
                }
                this.sentenceMelder.reset();
                this.isFirstWord = true;
            }
        }
        if (this.elementURI == null) {
            this.elementURI = uri;
            if (this.outputWhitespace) {
                this.sentenceMelder.setURI(this.elementURI);
            }
        }
        if (outputNow && !qName.startsWith("zzzz")) {
            super.startElement(uri, localName, qName, newAtts);
        }
    }

    @Override
    public void characters(char[] ch, int start, int length) throws SAXException {
        if (this.pendingWordElement != null) {
            this.pendingWordElement.appendText(ch, start, length);
        } else {
            super.characters(ch, start, length);
        }
    }

    public void emitWordElement(String uri, String localName, String qName, AttributesImpl atts, String wordText, boolean allowOutputWhitespace, boolean forceEOS) throws SAXException {
        String eos;
        boolean isLastWordPart;
        String p = atts.getValue(WordAttributeNames.p);
        String part = atts.getValue(WordAttributeNames.part);
        boolean isFirstWordPart = part == null || part.equals("N") || part.equals("I");
        boolean bl = isLastWordPart = part == null || part.equals("N") || part.equals("F");
        if (isLastWordPart && this.outputPseudoPageBoundaryMilestones && this.pseudoPageWordCount == 0 && !this.pseudoPageStarted) {
            if (p != null && p.length() > 0) {
                int bsPos = p.lastIndexOf("\\");
                if (bsPos > 0) {
                    p = p.substring(0, bsPos);
                }
                p = p + "\\milestone[" + (this.pseudoPageCount + 1) + "]";
            }
            this.emitPseudoPageElement(this.createPseudoPageElement(uri, false, true, p));
        }
        ++this.pseudoPageWordCount;
        boolean emitSentenceBoundary = false;
        if (forceEOS) {
            this.setAttributeValue(atts, WordAttributeNames.eos, "1");
        }
        boolean bl2 = emitSentenceBoundary = (eos = atts.getValue(WordAttributeNames.eos)) != null && eos.equals("1");
        if (this.outputNonredundantAttributesOnly || this.outputNonredundantTokenAttribute) {
            String tok = atts.getValue(WordAttributeNames.tok);
            if (tok.equals(wordText)) {
                this.removeAttribute(atts, WordAttributeNames.tok);
            }
            if (part != null && part.equals("N")) {
                this.removeAttribute(atts, WordAttributeNames.part);
            }
        }
        this.removeAttribute(atts, WordAttributeNames.sn);
        this.removeAttribute(atts, WordAttributeNames.wn);
        super.startElement(uri, localName, qName, atts);
        wordText = StringUtils.replaceAll(wordText, "\ue501", "-");
        wordText = StringUtils.replaceAll(wordText, "\ue503", "");
        super.characters(wordText.toCharArray(), 0, wordText.length());
        super.endElement(uri, localName, qName);
        String id = atts.getValue(this.idAttrName);
        String ord = atts.getValue(WordAttributeNames.ord);
        if (ord == null) {
            ord = "0";
        }
        this.sortedWords.add(new SentenceAndWordNumber(id, Integer.parseInt(ord), part, emitSentenceBoundary));
        ++this.emittedWordCount;
        if (this.outputWhitespace && allowOutputWhitespace && this.isFirstWord && isLastWordPart) {
            this.sentenceMelder.outputBlank();
        }
        if (isLastWordPart && this.outputPseudoPageBoundaryMilestones && (this.pseudoPageWordCount >= this.pseudoPageSize || this.emittedWordCount >= this.totalWordsToEmit)) {
            if (p != null && p.length() > 0) {
                int bsPos = p.lastIndexOf("\\");
                if (bsPos > 0) {
                    p = p.substring(0, bsPos);
                }
                p = p + "\\milestone[" + (this.pseudoPageCount + 1) + "]";
            }
            this.emitPseudoPageElement(this.createPseudoPageElement(uri, false, false, p));
        }
    }

    @Override
    public void endElement(String uri, String localName, String qName) throws SAXException {
        boolean removedDiv = false;
        String removedDivType = "";
        if (!this.foreignStack.isEmpty()) {
            this.foreignStack.pop();
        } else if (qName.equals("div") && !this.divStack.isEmpty()) {
            removedDivType = this.divStack.pop();
            removedDiv = true;
        }
        boolean isJumpTag = MorphAdornerSettings.xgOptions.isJumpTag(qName);
        boolean isSoftTag = MorphAdornerSettings.xgOptions.isSoftTag(qName);
        boolean isHardTag = !isJumpTag && !isSoftTag;
        boolean isWordTag = qName.equals("w");
        if (this.pendingWordElement != null && !isWordTag) {
            boolean forceEOS = isHardTag && MorphAdornerSettings.closeSentenceAtEndOfHardTag || isJumpTag && MorphAdornerSettings.closeSentenceAtEndOfJumpTag;
            this.emitWordElement(this.pendingWordElement.getURI(), this.pendingWordElement.getLocalName(), this.pendingWordElement.getQName(), this.pendingWordElement.getAttributes(), this.pendingWordElement.getText(), isWordTag || isSoftTag, forceEOS);
            this.pendingWordElement = null;
        }
        if (!isWordTag && !qName.startsWith("zzzz")) {
            super.endElement(uri, localName, qName);
        }
        if (isJumpTag) {
            if (!this.jumpStack.isEmpty()) {
                XMLWriterState state = this.jumpStack.pop();
                this.isFirstWord = state.getIsFirstWord();
                this.sentenceMelder.setState(state.getSentenceMelderState());
            }
        } else if (!isSoftTag) {
            this.sentenceMelder.reset();
            this.isFirstWord = true;
        }
        String p = null;
        if (this.outputPseudoPageBoundaryMilestones && removedDiv && this.pseudoPageContainerDivTypes.contains(removedDivType)) {
            if (p != null && p.length() > 0) {
                int bsPos = p.lastIndexOf("\\");
                if (bsPos > 0) {
                    p = p.substring(0, bsPos);
                }
                p = p + "\\milestone[" + (this.pseudoPageCount + 1) + "]";
            }
            if (this.emittedWordCount < this.totalWordsToEmit) {
                this.emitPseudoPageElement(this.createPseudoPageElement(uri, false, false, p));
                this.emitPseudoPageElement(this.createPseudoPageElement(uri, false, true, p));
            }
        }
    }

    public PendingElement createPseudoPageElement(String uri, boolean forcedEmit, boolean start, String path) {
        if (start) {
            ++this.pseudoPageCount;
            this.pseudoPageStarted = true;
        } else {
            this.pseudoPageStarted = false;
        }
        this.pseudoPageWordCount = 0;
        AttributesImpl pageAttributes = new AttributesImpl();
        this.setAttributeValue(pageAttributes, "unit", "pseudopage");
        this.setAttributeValue(pageAttributes, "n", this.pseudoPageCount + "");
        this.setAttributeValue(pageAttributes, "position", start ? "start" : "end");
        if (path != null && path.length() > 0) {
            this.setAttributeValue(pageAttributes, WordAttributeNames.p, path);
        }
        return new PendingElement(uri, "milestone", "milestone", pageAttributes);
    }

    public void emitPseudoPageElement(PendingElement pseudoPageElement) {
        if (pseudoPageElement != null) {
            try {
                super.startElement(pseudoPageElement.getURI(), pseudoPageElement.getLocalName(), pseudoPageElement.getQName(), pseudoPageElement.getAttributes());
                super.endElement(pseudoPageElement.getURI(), pseudoPageElement.getLocalName(), pseudoPageElement.getQName());
            }
            catch (Exception exception) {
                // empty catch block
            }
        }
    }

    public void setPosTags(PartOfSpeechTags posTags) {
        this.posTags = posTags;
    }

    protected void setSplitWords(Map<Integer, Integer> splitWords) {
        this.splitWords = splitWords;
        this.splitWordsCopy = MapFactory.createNewMap();
        this.splitWordsCopy.putAll(splitWords);
    }

    protected void setIDFormat(String outFile, int maxID, int maxPageBreaks) {
        this.baseFileName = FileNameUtils.stripPathName(outFile);
        this.baseFileName = FileNameUtils.changeFileExtension(this.baseFileName, "");
        this.baseFileName = StringUtils.replaceAll(this.baseFileName, ".", "_");
        this.idType = MorphAdornerSettings.xmlIDType;
        this.idSpacing = MorphAdornerSettings.xmlIDSpacing;
        int numIDDigits = (int)ArithUtils.log10(maxID * this.idSpacing) + 1;
        ID_FORMATTER.setMinimumIntegerDigits(numIDDigits);
        int numPageDigits = 1;
        if (maxPageBreaks > 0) {
            numPageDigits = (int)ArithUtils.log10(maxPageBreaks) + 1;
        }
        PAGE_FORMATTER.setMinimumIntegerDigits(numPageDigits);
        int numWordDigits = (int)ArithUtils.log10(999 * this.idSpacing) + 1;
        if (maxPageBreaks <= 0) {
            numWordDigits = numIDDigits;
        }
        WORD_FORMATTER.setMinimumIntegerDigits(numWordDigits);
    }

    public void setWriter(XMLWriter writer) {
        this.writer = writer;
        this.sentenceMelder = new XMLSentenceMelder(writer);
    }

    public String getForeignLanguageTag(String qName, Attributes atts) {
        String languageTag = "";
        String language = atts.getValue("xml:lang");
        if (language == null) {
            language = atts.getValue("lang");
        }
        if (language == null) {
            languageTag = !this.foreignStack.isEmpty() ? this.foreignStack.peek() : (qName.equals("foreign") ? this.posTags.getForeignWordTag("unknown") : "");
        } else {
            int iPos = language.indexOf("-");
            if (iPos >= 0) {
                language = language.substring(0, iPos);
            }
            language = languageTags.containsKey(language) ? languageTags.get(language) : "other";
            languageTag = this.posTags.getForeignWordTag(language);
        }
        return languageTag;
    }

    static {
        PAGE_FORMATTER.setMinimumIntegerDigits(4);
        WORD_FORMATTER.setMinimumIntegerDigits(3);
        ID_FORMATTER.setMinimumIntegerDigits(8);
        PAGE_FORMATTER.setGroupingUsed(false);
        WORD_FORMATTER.setGroupingUsed(false);
        ID_FORMATTER.setGroupingUsed(false);
        languageTags = new TreeMap<String, String>();
        languageTags.put("deu", "german");
        languageTags.put("de", "german");
        languageTags.put("fra", "french");
        languageTags.put("fre", "french");
        languageTags.put("fr", "french");
        languageTags.put("grc", "greek");
        languageTags.put("gre", "greek");
        languageTags.put("ell", "greek");
        languageTags.put("el", "greek");
        languageTags.put("heb", "hebrew");
        languageTags.put("he", "hebrew");
        languageTags.put("ita", "italian");
        languageTags.put("it", "italian");
        languageTags.put("lat", "latin");
        languageTags.put("la", "latin");
    }
}

