/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.reader.muc7;

import de.julielab.jcore.reader.muc7.MUC7Coreference;
import de.julielab.jcore.types.Paragraph;
import de.julielab.jcore.types.Section;
import de.julielab.jcore.types.muc7.Coref;
import de.julielab.jcore.types.muc7.ENAMEX;
import de.julielab.jcore.types.muc7.MUC7Header;
import de.julielab.jcore.types.muc7.NUMEX;
import de.julielab.jcore.types.muc7.TIMEX;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Level;
import org.apache.uima.util.Logger;
import org.apache.uima.util.Progress;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

public class MUC7Reader
extends CollectionReader_ImplBase {
    private static Logger logger = null;
    private static final String ELEMENT_DOCS = "DOC";
    private static final String ELEMENT_DOCID = "DOCID";
    private static final String ELEMENT_STORYID = "STORYID";
    private static final String ELEMENT_SLUG = "SLUG";
    private static final String ELEMENT_DATE = "DATE";
    private static final String ELEMENT_NWORDS = "NWORDS";
    private static final String ELEMENT_PREAMBLE = "PREAMBLE";
    private static final String ELEMENT_TEXT = "TEXT";
    private static final String ELEMENT_PARAGRAPH = "p";
    private static final String ELEMENT_COREF = "COREF";
    private static final String ELEMENT_TIMEX = "TIMEX";
    private static final String ELEMENT_ENAMEX = "ENAMEX";
    private static final String ELEMENT_NUMEX = "NUMEX";
    private static final String ELEMENT_NE_MIN = "MIN";
    private static final String ELEMENT_NE_TYPE = "TYPE";
    private static final String ELEMENT_TRAILER = "TRAILER";
    public static final String[] ELEMENT_TEXT_TO_BE_PROCESSED = new String[]{"SLUG", "DATE", "NWORDS", "PREAMBLE", "TEXT", "TRAILER"};
    private static int startPosition;
    private static HashMap<Integer, MUC7Coreference> corefHashMap;
    private List<File> files;
    private HashMap<String, ArrayList<Node>> docIDDocNodeHash;
    private Iterator<String> keyIter;
    private JCas jcas;
    private DocumentBuilder builder;
    public static final String PARAM_INPUTDIR = "InputDirectory";

    private HashMap<String, ArrayList<Node>> buildDocIDDocNodeHash(List<File> files) {
        HashMap<String, ArrayList<Node>> docIDDocNodeHash = new HashMap<String, ArrayList<Node>>();
        for (File file : files) {
            logger.log(Level.INFO, "buildDocIDDocNodeHash() -- Reading file " + file.getName());
            try {
                Document doc = this.builder.parse(file);
                NodeList documentNL = doc.getElementsByTagName(ELEMENT_DOCS);
                int i = 0;
                while (i < documentNL.getLength()) {
                    Node docNode = documentNL.item(i);
                    Node docIDNode = this.getChildrenNodes(docNode, ELEMENT_DOCID, new ArrayList<Node>()).get(0);
                    String docID = docIDNode.getTextContent();
                    ArrayList<Object> docNodes = docIDDocNodeHash.containsKey(docID) ? docIDDocNodeHash.get(docID) : new ArrayList();
                    docNodes.add(docNode);
                    docIDDocNodeHash.put(docID, docNodes);
                    ++i;
                }
            }
            catch (SAXException e) {
                e.printStackTrace();
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
        return docIDDocNodeHash;
    }

    private String annotateTextToBeProcessed(Node docNode) {
        String textToBeProcessed = "";
        ArrayList<Object> al = new ArrayList();
        al = this.getChildrenNodes(docNode, ELEMENT_TEXT_TO_BE_PROCESSED, new ArrayList<Node>());
        int i = 0;
        while (i < al.size()) {
            textToBeProcessed = String.valueOf(textToBeProcessed) + ((Node)al.get(i)).getTextContent();
            ++i;
        }
        textToBeProcessed = this.normalizeString(textToBeProcessed);
        this.jcas.setDocumentText(textToBeProcessed);
        return textToBeProcessed;
    }

    private void annotateHeader(Node docNode) {
        String docID = this.normalizeString(this.getChildrenNodes(docNode, ELEMENT_DOCID, new ArrayList<Node>()).get(0).getTextContent());
        String storyID = this.normalizeString(this.getChildrenNodes(docNode, ELEMENT_STORYID, new ArrayList<Node>()).get(0).getTextContent());
        boolean exist = false;
        for (MUC7Header muc7header : this.jcas.getAnnotationIndex(MUC7Header.type)) {
            if (!muc7header.getDocId().equals(docID) || !muc7header.getStoryID().equals(storyID)) continue;
            exist = true;
            break;
        }
        if (!exist) {
            MUC7Header muc7Header = new MUC7Header(this.jcas);
            muc7Header.setDocId(docID);
            muc7Header.setStoryID(storyID);
            muc7Header.addToIndexes(this.jcas);
        }
    }

    private void annotateSlug(Node docNode, String textToBeProcessed) {
        ArrayList<Object> al = new ArrayList();
        int[] beginEnd = new int[2];
        al = this.getChildrenNodes(docNode, ELEMENT_SLUG, new ArrayList<Node>());
        String text = this.normalizeString(((Node)al.get(0)).getTextContent());
        beginEnd = this.getBeginEndOfSequence(text, textToBeProcessed, startPosition);
        startPosition = beginEnd[1];
        this.buildCorefHashMap((Node)al.get(0), beginEnd);
        this.annotateENAMEX((Node)al.get(0), beginEnd);
        this.annotateTIMEX((Node)al.get(0), beginEnd);
        this.annotateNUMEX((Node)al.get(0), beginEnd);
        boolean exist = false;
        for (Section section : this.jcas.getAnnotationIndex(Section.type)) {
            if (section.getBegin() != beginEnd[0] - 1 || section.getEnd() != beginEnd[1] || !section.getSectionType().equals("Slug")) continue;
            exist = true;
            break;
        }
        if (!exist) {
            Section section;
            section = new Section(this.jcas);
            section.setSectionType("Slug");
            section.setBegin(beginEnd[0]);
            section.setEnd(beginEnd[1]);
            section.addToIndexes(this.jcas);
        }
    }

    private void annotateDate(Node docNode, String textToBeProcessed) {
        ArrayList<Object> al = new ArrayList();
        int[] beginEnd = new int[2];
        al = this.getChildrenNodes(docNode, ELEMENT_DATE, new ArrayList<Node>());
        String text = this.normalizeString(((Node)al.get(0)).getTextContent());
        beginEnd = this.getBeginEndOfSequence(text, textToBeProcessed, startPosition);
        startPosition = beginEnd[1];
        this.buildCorefHashMap((Node)al.get(0), beginEnd);
        this.annotateENAMEX((Node)al.get(0), beginEnd);
        this.annotateTIMEX((Node)al.get(0), beginEnd);
        this.annotateNUMEX((Node)al.get(0), beginEnd);
        boolean exist = false;
        for (Section section : this.jcas.getAnnotationIndex(Section.type)) {
            if (section.getBegin() != beginEnd[0] || section.getEnd() != beginEnd[1] || !section.getSectionType().equals("Date")) continue;
            exist = true;
            break;
        }
        if (!exist) {
            Section section;
            section = new Section(this.jcas);
            section.setSectionType("Date");
            section.setBegin(beginEnd[0]);
            section.setEnd(beginEnd[1]);
            section.addToIndexes(this.jcas);
        }
    }

    private void annotateNumOfWords(Node docNode, String textToBeProcessed) {
        ArrayList<Object> al = new ArrayList();
        int[] beginEnd = new int[2];
        al = this.getChildrenNodes(docNode, ELEMENT_NWORDS, new ArrayList<Node>());
        String text = this.normalizeString(((Node)al.get(0)).getTextContent());
        beginEnd = this.getBeginEndOfSequence(text, textToBeProcessed, startPosition);
        startPosition = beginEnd[1];
        this.buildCorefHashMap((Node)al.get(0), beginEnd);
        this.annotateENAMEX((Node)al.get(0), beginEnd);
        this.annotateTIMEX((Node)al.get(0), beginEnd);
        this.annotateNUMEX((Node)al.get(0), beginEnd);
        boolean exist = false;
        for (Section section : this.jcas.getAnnotationIndex(Section.type)) {
            if (section.getBegin() != beginEnd[0] || section.getEnd() != beginEnd[1] || !section.getSectionType().equals("Number of Words")) continue;
            exist = true;
            break;
        }
        if (!exist) {
            Section section;
            section = new Section(this.jcas);
            section.setSectionType("Number of Words");
            section.setBegin(beginEnd[0]);
            section.setEnd(beginEnd[1]);
            section.addToIndexes(this.jcas);
        }
    }

    private void annotatePreamble(Node docNode, String textToBeProcessed) {
        ArrayList<Object> al = new ArrayList();
        int[] beginEnd = new int[2];
        al = this.getChildrenNodes(docNode, ELEMENT_PREAMBLE, new ArrayList<Node>());
        String text = this.normalizeString(((Node)al.get(0)).getTextContent());
        beginEnd = this.getBeginEndOfSequence(text, textToBeProcessed, startPosition);
        startPosition = beginEnd[1];
        this.buildCorefHashMap((Node)al.get(0), beginEnd);
        this.annotateENAMEX((Node)al.get(0), beginEnd);
        this.annotateTIMEX((Node)al.get(0), beginEnd);
        this.annotateNUMEX((Node)al.get(0), beginEnd);
        boolean exist = false;
        for (Section section : this.jcas.getAnnotationIndex(Section.type)) {
            if (section.getBegin() != beginEnd[0] || section.getEnd() != beginEnd[1] || !section.getSectionType().equals("Preamble")) continue;
            exist = true;
            break;
        }
        if (!exist) {
            Section section;
            section = new Section(this.jcas);
            section.setSectionType("Preamble");
            section.setBegin(beginEnd[0]);
            section.setEnd(beginEnd[1]);
            section.addToIndexes(this.jcas);
        }
    }

    private void annotateText(Node docNode, String textToBeProcessed) {
        ArrayList<Object> al = new ArrayList();
        int[] beginEnd = new int[2];
        al = this.getChildrenNodes(docNode, ELEMENT_TEXT, new ArrayList<Node>());
        String text = this.normalizeString(((Node)al.get(0)).getTextContent());
        beginEnd = this.getBeginEndOfSequence(text, textToBeProcessed, startPosition);
        startPosition = beginEnd[0];
        this.annotateParagraphs((Node)al.get(0), textToBeProcessed);
        boolean exist = false;
        for (Section section : this.jcas.getAnnotationIndex(Section.type)) {
            if (section.getBegin() != beginEnd[0] || section.getEnd() != beginEnd[1] || !section.getSectionType().equals("Text")) continue;
            exist = true;
            break;
        }
        if (!exist) {
            Section section;
            section = new Section(this.jcas);
            section.setSectionType("Text");
            section.setBegin(beginEnd[0]);
            section.setEnd(beginEnd[1]);
            section.addToIndexes(this.jcas);
        }
    }

    private void annotateTrailer(Node docNode, String textToBeProcessed) {
        ArrayList<Object> al = new ArrayList();
        int[] beginEnd = new int[2];
        al = this.getChildrenNodes(docNode, ELEMENT_TRAILER, new ArrayList<Node>());
        String text = this.normalizeString(((Node)al.get(0)).getTextContent());
        beginEnd = this.getBeginEndOfSequence(text, textToBeProcessed, startPosition);
        startPosition = beginEnd[1];
        this.buildCorefHashMap((Node)al.get(0), beginEnd);
        this.annotateENAMEX((Node)al.get(0), beginEnd);
        this.annotateTIMEX((Node)al.get(0), beginEnd);
        this.annotateNUMEX((Node)al.get(0), beginEnd);
        boolean exist = false;
        for (Section section : this.jcas.getAnnotationIndex(Section.type)) {
            if (section.getBegin() != beginEnd[0] || section.getEnd() != beginEnd[1] || !section.getSectionType().equals("Trailer")) continue;
            exist = true;
            break;
        }
        if (!exist) {
            Section section;
            section = new Section(this.jcas);
            section.setSectionType("Trailer");
            section.setBegin(beginEnd[0]);
            section.setEnd(beginEnd[1]);
            section.addToIndexes(this.jcas);
        }
    }

    private void annotateParagraphs(Node docNode, String textToBeProcessed) {
        ArrayList<Object> al = new ArrayList();
        int[] beginEnd = new int[2];
        al = this.getChildrenNodes(docNode, ELEMENT_PARAGRAPH, new ArrayList<Node>());
        int i = 0;
        while (i < al.size()) {
            String text = this.normalizeString(((Node)al.get(i)).getTextContent());
            beginEnd = this.getBeginEndOfSequence(text, textToBeProcessed, startPosition);
            startPosition = beginEnd[1];
            this.buildCorefHashMap((Node)al.get(i), beginEnd);
            this.annotateENAMEX((Node)al.get(i), beginEnd);
            this.annotateTIMEX((Node)al.get(i), beginEnd);
            this.annotateNUMEX((Node)al.get(i), beginEnd);
            boolean exist = false;
            for (Paragraph para : this.jcas.getAnnotationIndex(Paragraph.type)) {
                if (para.getBegin() != beginEnd[0] || para.getEnd() != beginEnd[1]) continue;
                exist = true;
                break;
            }
            if (!exist) {
                Paragraph para;
                para = new Paragraph(this.jcas);
                para.setBegin(beginEnd[0]);
                para.setEnd(beginEnd[1]);
                para.addToIndexes(this.jcas);
            }
            ++i;
        }
    }

    private void annotateTIMEX(Node docNode, int[] beginEnd) {
        ArrayList<Object> al = new ArrayList();
        al = this.getChildrenNodes(docNode, ELEMENT_TIMEX, new ArrayList<Node>());
        int i = 0;
        while (i < al.size()) {
            Node timexNode = (Node)al.get(i);
            String min = "";
            if (timexNode.getAttributes().getNamedItem(ELEMENT_NE_MIN) != null) {
                min = this.normalizeString(timexNode.getAttributes().getNamedItem(ELEMENT_NE_MIN).getNodeValue());
            }
            String type = this.normalizeString(timexNode.getAttributes().getNamedItem(ELEMENT_NE_TYPE).getNodeValue());
            String leftContext = this.normalizeString(this.getLeftTextContext(timexNode, docNode));
            int[] beginEndLeftContext = new int[2];
            beginEndLeftContext = this.getBeginEndOfSequence(leftContext, this.normalizeString(docNode.getTextContent()), 0);
            int[] beginEndTimex = new int[2];
            beginEndTimex = this.getBeginEndOfSequence(this.normalizeString(((Node)al.get(i)).getTextContent()), this.normalizeString(docNode.getTextContent()), beginEndLeftContext[1]);
            beginEndTimex[0] = beginEndTimex[0] + beginEnd[0];
            beginEndTimex[1] = beginEndTimex[1] + beginEnd[0];
            boolean exist = false;
            for (TIMEX timex : this.jcas.getAnnotationIndex(TIMEX.type)) {
                if (timex.getBegin() != beginEndTimex[0] || timex.getEnd() != beginEndTimex[1] || !timex.getSpecificType().equals(type) || !timex.getMin().equals(min)) continue;
                exist = true;
                break;
            }
            if (!exist) {
                TIMEX timex;
                timex = new TIMEX(this.jcas);
                timex.setBegin(beginEndTimex[0]);
                timex.setEnd(beginEndTimex[1]);
                timex.setSpecificType(type);
                timex.setMin(min);
                timex.addToIndexes(this.jcas);
            }
            ++i;
        }
    }

    private void annotateENAMEX(Node docNode, int[] beginEnd) {
        ArrayList<Object> al = new ArrayList();
        al = this.getChildrenNodes(docNode, ELEMENT_ENAMEX, new ArrayList<Node>());
        int i = 0;
        while (i < al.size()) {
            Node enamexNode = (Node)al.get(i);
            String min = "";
            if (enamexNode.getAttributes().getNamedItem(ELEMENT_NE_MIN) != null) {
                min = enamexNode.getAttributes().getNamedItem(ELEMENT_NE_MIN).getNodeValue();
            }
            String type = this.normalizeString(enamexNode.getAttributes().getNamedItem(ELEMENT_NE_TYPE).getNodeValue());
            String leftContext = this.normalizeString(this.getLeftTextContext(enamexNode, docNode));
            int[] beginEndLeftContext = new int[2];
            beginEndLeftContext = this.getBeginEndOfSequence(leftContext, this.normalizeString(docNode.getTextContent()), 0);
            int[] beginEndEnamex = new int[2];
            beginEndEnamex = this.getBeginEndOfSequence(this.normalizeString(((Node)al.get(i)).getTextContent()), this.normalizeString(docNode.getTextContent()), beginEndLeftContext[1]);
            beginEndEnamex[0] = beginEndEnamex[0] + beginEnd[0];
            beginEndEnamex[1] = beginEndEnamex[1] + beginEnd[0];
            boolean exist = false;
            for (ENAMEX enamex : this.jcas.getAnnotationIndex(ENAMEX.type)) {
                if (enamex.getBegin() != beginEndEnamex[0] || enamex.getEnd() != beginEndEnamex[1] || !enamex.getSpecificType().equals(type) || !enamex.getMin().equals(min)) continue;
                exist = true;
                break;
            }
            if (!exist) {
                ENAMEX enamex;
                enamex = new ENAMEX(this.jcas);
                enamex.setBegin(beginEndEnamex[0]);
                enamex.setEnd(beginEndEnamex[1]);
                enamex.setSpecificType(type);
                enamex.setMin(min);
                enamex.addToIndexes(this.jcas);
            }
            ++i;
        }
    }

    private void annotateNUMEX(Node docNode, int[] beginEnd) {
        ArrayList<Object> al = new ArrayList();
        al = this.getChildrenNodes(docNode, ELEMENT_NUMEX, new ArrayList<Node>());
        int i = 0;
        while (i < al.size()) {
            Node numexNode = (Node)al.get(i);
            String min = "";
            if (numexNode.getAttributes().getNamedItem(ELEMENT_NE_MIN) != null) {
                min = this.normalizeString(numexNode.getAttributes().getNamedItem(ELEMENT_NE_MIN).getNodeValue()).replaceAll("^ +", "");
            }
            String type = this.normalizeString(numexNode.getAttributes().getNamedItem(ELEMENT_NE_TYPE).getNodeValue());
            String leftContext = this.normalizeString(this.getLeftTextContext(numexNode, docNode));
            int[] beginEndLeftContext = new int[2];
            beginEndLeftContext = this.getBeginEndOfSequence(leftContext, this.normalizeString(docNode.getTextContent()), 0);
            int[] beginEndNumex = new int[2];
            beginEndNumex = this.getBeginEndOfSequence(this.normalizeString(((Node)al.get(i)).getTextContent()), this.normalizeString(docNode.getTextContent()), beginEndLeftContext[1]);
            beginEndNumex[0] = beginEndNumex[0] + beginEnd[0];
            beginEndNumex[1] = beginEndNumex[1] + beginEnd[0];
            boolean exist = false;
            for (NUMEX numex : this.jcas.getAnnotationIndex(NUMEX.type)) {
                if (numex.getBegin() != beginEndNumex[0] || numex.getEnd() != beginEndNumex[1] || !numex.getSpecificType().equals(type) || !numex.getMin().equals(min)) continue;
                exist = true;
                break;
            }
            if (!exist) {
                NUMEX numex;
                numex = new NUMEX(this.jcas);
                numex.setBegin(beginEndNumex[0]);
                numex.setEnd(beginEndNumex[1]);
                numex.setSpecificType(type);
                numex.setMin(min);
                numex.addToIndexes(this.jcas);
            }
            ++i;
        }
    }

    private void buildCorefHashMap(Node docNode, int[] beginEnd) {
        ArrayList<Object> al = new ArrayList();
        al = this.getChildrenNodes(docNode, ELEMENT_COREF, new ArrayList<Node>());
        int i = 0;
        while (i < al.size()) {
            String leftContext = this.normalizeString(this.getLeftTextContext((Node)al.get(i), docNode));
            int[] beginEndLeftContext = new int[2];
            beginEndLeftContext = this.getBeginEndOfSequence(leftContext, this.normalizeString(docNode.getTextContent()), 0);
            int[] beginEndCoref = new int[2];
            beginEndCoref = this.getBeginEndOfSequence(this.normalizeString(((Node)al.get(i)).getTextContent()), this.normalizeString(docNode.getTextContent()), beginEndLeftContext[1]);
            beginEndCoref[0] = beginEndCoref[0] + beginEnd[0];
            beginEndCoref[1] = beginEndCoref[1] + beginEnd[0];
            MUC7Coreference muc7Coref = new MUC7Coreference();
            muc7Coref.setBegin(beginEndCoref[0]);
            muc7Coref.setEnd(beginEndCoref[1]);
            int id = new Integer(((Node)al.get(i)).getAttributes().getNamedItem("ID").getNodeValue());
            muc7Coref.setId(id);
            if (((Node)al.get(i)).getAttributes().getNamedItem("REF") != null) {
                int refID = new Integer(((Node)al.get(i)).getAttributes().getNamedItem("REF").getNodeValue());
                muc7Coref.setRefID(refID);
            } else {
                muc7Coref.setRefID(-1);
            }
            if (((Node)al.get(i)).getAttributes().getNamedItem(ELEMENT_NE_TYPE) != null) {
                String typeOfCoref = this.normalizeString(((Node)al.get(i)).getAttributes().getNamedItem(ELEMENT_NE_TYPE).getNodeValue());
                muc7Coref.setTypeOfCoref(typeOfCoref);
            }
            if (((Node)al.get(i)).getAttributes().getNamedItem(ELEMENT_NE_MIN) != null) {
                String minHead = this.normalizeString(((Node)al.get(i)).getAttributes().getNamedItem(ELEMENT_NE_MIN).getNodeValue());
                muc7Coref.setMinHead(minHead);
            }
            if (!corefHashMap.containsKey(id)) {
                corefHashMap.put(id, muc7Coref);
            }
            ++i;
        }
    }

    private void annotateCorefs() {
        Set<Integer> keys = corefHashMap.keySet();
        for (Integer id : keys) {
            this.buildCorefFromCorefHashMap(id);
        }
        if (corefHashMap.size() > 0) {
            this.buildCorefReferences();
        }
    }

    private Coref buildCorefFromCorefHashMap(int id) {
        MUC7Coreference muc7Coref = corefHashMap.get(id);
        if (this.getCorefFromCAS(muc7Coref.getBegin(), muc7Coref.getEnd()) == null) {
            boolean exist = false;
            for (Coref coref : this.jcas.getAnnotationIndex(Coref.type)) {
                if (coref.getBegin() != muc7Coref.getBegin() || coref.getEnd() != muc7Coref.getEnd() || !coref.getCorefType().equals(muc7Coref.getTypeOfCoref()) || !coref.getMin().equals(muc7Coref.getMinHead()) || coref.getId() != muc7Coref.getId()) continue;
                exist = true;
                break;
            }
            if (!exist) {
                Coref coref;
                coref = new Coref(this.jcas);
                coref.setBegin(muc7Coref.getBegin());
                coref.setEnd(muc7Coref.getEnd());
                coref.setCorefType(muc7Coref.getTypeOfCoref());
                coref.setMin(muc7Coref.getMinHead());
                coref.setId(muc7Coref.getId());
                coref.addToIndexes(this.jcas);
                return coref;
            }
        }
        return new Coref(this.jcas);
    }

    private void buildCorefReferences() {
        JFSIndexRepository indexes = this.jcas.getJFSIndexRepository();
        for (Coref c : indexes.getAnnotationIndex(Coref.type)) {
            int corefID = c.getId();
            int refID = corefHashMap.get(corefID).getRefID();
            if (refID <= -1 || !corefHashMap.containsKey(refID)) continue;
            Coref refCoref = this.getCorefFromCAS(corefHashMap.get(refID).getBegin(), corefHashMap.get(refID).getEnd());
            c.setRef(refCoref);
        }
    }

    private Coref getCorefFromCAS(int begin, int end) {
        JFSIndexRepository indexes = this.jcas.getJFSIndexRepository();
        for (Coref c : indexes.getAnnotationIndex(Coref.type)) {
            if (c.getBegin() != begin || c.getEnd() != end) continue;
            return c;
        }
        return null;
    }

    private String getLeftTextContext(Node centerNode, Node sectionNode) {
        String rightContext = "";
        ArrayList<Node> textNodes = this.getChildrenNodes(sectionNode, "#text", new ArrayList<Node>());
        ArrayList<Node> textCenterNode = this.getChildrenNodes(centerNode, "#text", new ArrayList<Node>());
        Node rightMostTextCenterNode = textCenterNode.get(0);
        int i = 0;
        while (i < textNodes.size() && !textNodes.get(i).equals(rightMostTextCenterNode)) {
            rightContext = String.valueOf(rightContext) + textNodes.get(i).getTextContent();
            ++i;
        }
        return rightContext;
    }

    private List<File> getFilesFromInputDirectory() {
        ArrayList<File> documentFiles = new ArrayList<File>();
        File directory = new File(((String)this.getConfigParameterValue(PARAM_INPUTDIR)).trim());
        if (!directory.exists() || !directory.isDirectory()) {
            logger.log(Level.WARNING, "getFilesFromInputDirectory() " + directory + " does not exist. Client has to set configuration parameter '" + PARAM_INPUTDIR + "'.");
            return null;
        }
        File[] dirFiles = directory.listFiles();
        int i = 0;
        while (i < dirFiles.length) {
            if (!dirFiles[i].isDirectory()) {
                documentFiles.add(dirFiles[i]);
            }
            ++i;
        }
        logger.log(Level.INFO, "MUC7 Reader found " + documentFiles.size() + " files in folder " + directory + ".");
        return documentFiles;
    }

    private ArrayList<Node> getChildrenNodes(Node node, String nodeName, ArrayList<Node> al) {
        if (node.getNodeName().equals(nodeName) && !node.getTextContent().equals("")) {
            al.add(node);
        }
        if (node.hasChildNodes()) {
            NodeList nl = node.getChildNodes();
            int j = 0;
            while (j < nl.getLength()) {
                this.getChildrenNodes(nl.item(j), nodeName, al);
                ++j;
            }
        }
        return al;
    }

    private ArrayList<Node> getChildrenNodes(Node node, String[] nodeNames, ArrayList<Node> al) {
        int i = 0;
        while (i < nodeNames.length) {
            String nodeName = nodeNames[i];
            al = this.getChildrenNodes(node, nodeName, al);
            ++i;
        }
        return al;
    }

    public int[] getBeginEndOfToken(String tokenString, String inputString, int startOfToken) {
        int[] nArray = new int[2];
        nArray[0] = startOfToken;
        int[] beginEnd = nArray;
        String subString = inputString.substring(startOfToken);
        beginEnd[0] = subString.indexOf(tokenString) + startOfToken;
        beginEnd[1] = tokenString.length() + beginEnd[0];
        return beginEnd;
    }

    public int[] getBeginEndOfSequence(String sequenceString, String inputString, int startOfSequence) {
        int[] nArray = new int[2];
        nArray[0] = startOfSequence;
        int[] beginEnd = nArray;
        int[] beginEndTemp = new int[2];
        String[] inputStringArr = sequenceString.split(" ");
        beginEndTemp = inputStringArr.length > 0 ? this.getBeginEndOfToken(inputStringArr[0], inputString, startOfSequence) : this.getBeginEndOfToken(sequenceString, inputString, startOfSequence);
        beginEnd[0] = beginEndTemp[0];
        beginEnd[1] = beginEndTemp[1];
        int i = 1;
        while (i < inputStringArr.length) {
            beginEndTemp = this.getBeginEndOfToken(inputStringArr[i], inputString, beginEndTemp[1]);
            beginEnd[1] = beginEndTemp[1];
            ++i;
        }
        return beginEnd;
    }

    public String normalizeString(String stringToBeNormalized) {
        stringToBeNormalized = stringToBeNormalized.replaceAll("[A-Z]+;", "");
        stringToBeNormalized = stringToBeNormalized.replaceAll("\n", " ");
        stringToBeNormalized = stringToBeNormalized.replaceAll("\\s+", " ");
        stringToBeNormalized = stringToBeNormalized.replaceFirst("^[\\s]+", "");
        return stringToBeNormalized;
    }

    public void initialize() throws ResourceInitializationException {
        logger = this.getUimaContext().getLogger();
        logger.log(Level.INFO, "initialize() - Initializing MUC7 Reader...");
        DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
        try {
            this.builder = factory.newDocumentBuilder();
        }
        catch (ParserConfigurationException e) {
            logger.log(Level.SEVERE, "initialize() " + e.getMessage());
        }
        this.files = this.getFilesFromInputDirectory();
        if (this.files != null && this.files.size() > 0) {
            this.docIDDocNodeHash = this.buildDocIDDocNodeHash(this.files);
            this.keyIter = this.docIDDocNodeHash.keySet().iterator();
        }
    }

    public void getNext(CAS cas) throws IOException, CollectionException {
        String key = this.keyIter.next();
        ArrayList<Node> docNodes = this.docIDDocNodeHash.get(key);
        try {
            this.jcas = cas.getJCas();
            this.jcas.reset();
        }
        catch (CASException e) {
            throw new CollectionException((Throwable)e);
        }
        String textToBeProcessed = this.annotateTextToBeProcessed(docNodes.get(0));
        int i = 0;
        while (i < docNodes.size()) {
            Node docNode = docNodes.get(i);
            corefHashMap = new HashMap();
            startPosition = 0;
            this.annotateHeader(docNode);
            this.annotateSlug(docNode, textToBeProcessed);
            this.annotateDate(docNode, textToBeProcessed);
            this.annotateNumOfWords(docNode, textToBeProcessed);
            this.annotatePreamble(docNode, textToBeProcessed);
            this.annotateText(docNode, textToBeProcessed);
            this.annotateTrailer(docNode, textToBeProcessed);
            this.annotateCorefs();
            ++i;
        }
    }

    public void close() throws IOException {
    }

    public Progress[] getProgress() {
        return null;
    }

    public boolean hasNext() throws IOException, CollectionException {
        return this.keyIter.hasNext();
    }
}

