/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.cr.mmax2;

import de.julielab.jcore.types.ConceptMention;
import de.julielab.jcore.types.Gene;
import de.julielab.jcore.types.Header;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.types.Token;
import de.julielab.jcore.utility.JCoReAnnotationTools;
import de.julielab.jules.mmax.MarkableContainer;
import de.julielab.jules.mmax.Statistics;
import de.julielab.jules.mmax.WordInformation;
import java.io.BufferedReader;
import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Optional;
import java.util.Set;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import org.apache.uima.UimaContext;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.eml.MMAX2.annotation.markables.Markable;
import org.eml.MMAX2.discourse.MMAX2Discourse;
import org.eml.MMAX2.discourse.MMAX2DiscourseElement;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name="JCoRe MMAX2 reader", description="Collection reader for MMAX2 annotation projects.", vendor="JULIE Lab Jena, Germany")
public class MMAX2Reader
extends JCasCollectionReader_ImplBase {
    public static final String PARAM_INPUT_DIR = "InputDir";
    public static final String PARAM_ANNOTATION_LEVELS = "AnnotationLevels";
    public static final String PARAM_ORIGINAL_TEXT_FILES = "OriginalTextFiles";
    public static final String PARAM_UIMA_ANNOTATION_TYPES = "UimaAnnotationTypes";
    public static final String PARAM_REMOVE_OVERLAPPING_SHORTER_ANNOTATIONS = "RemoveOverlappingShorterAnnotations";
    private static final Logger log = LoggerFactory.getLogger(MMAX2Reader.class);
    @ConfigurationParameter(name="RemoveOverlappingShorterAnnotations", mandatory=false, defaultValue={"false"}, description="If set to true, for all overlapping annotations only the longest is kept.")
    boolean removeOverlappingShorterAnnotations;
    @ConfigurationParameter(name="InputDir", description="Should point to the directory of which the MMAX2 projects are sub directories of.")
    private String inputDir;
    @ConfigurationParameter(name="AnnotationLevels", description="The names of the MMAX2 annotation levels to create annotations for.")
    private String[] annotationLevels;
    @ConfigurationParameter(name="UimaAnnotationTypes", description="The fully qualified names of the UIMA annotation types to be used for the representation of the input annotation level. Must match the indices of AnnotationLevels, i.e. the ith level will be added to the CAS as the ith type.")
    private String[] uimaTypeNames;
    @ConfigurationParameter(name="OriginalTextFiles", mandatory=false, description="The MMAX2 base data consists of tokenized text and does not keep track of the original text. This parameter should point to a directory containing the original text files. The file names should match the MMAX2 project IDs.")
    private String originalTextFilesDir;
    private LinkedList<File> folderList;
    private String actualPath;
    private HashMap<String, String> levels2uimaNames;
    private List<Class<?>> uimaAnnotationClasses;
    private int numDocuments;

    public void initialize(UimaContext context) throws ResourceInitializationException {
        super.initialize(context);
        this.inputDir = (String)context.getConfigParameterValue(PARAM_INPUT_DIR);
        this.annotationLevels = (String[])context.getConfigParameterValue(PARAM_ANNOTATION_LEVELS);
        this.uimaTypeNames = (String[])this.getUimaContext().getConfigParameterValue(PARAM_UIMA_ANNOTATION_TYPES);
        this.originalTextFilesDir = (String)context.getConfigParameterValue(PARAM_ORIGINAL_TEXT_FILES);
        this.removeOverlappingShorterAnnotations = Optional.ofNullable((Boolean)context.getConfigParameterValue(PARAM_REMOVE_OVERLAPPING_SHORTER_ANNOTATIONS)).orElse(false);
        this.actualPath = null;
        if (this.annotationLevels.length != this.uimaTypeNames.length) {
            throw new IllegalArgumentException("The number of annotation levels and the number of UIMA type names must match. But the given annotation levels are '" + Arrays.toString(this.annotationLevels) + "' and the UIMA types names are '" + Arrays.toString(this.uimaTypeNames) + "'.");
        }
        try {
            this.uimaAnnotationClasses = Arrays.stream(this.uimaTypeNames).map(name -> {
                try {
                    return Class.forName(name);
                }
                catch (ClassNotFoundException e) {
                    throw new RuntimeException(e);
                }
            }).collect(Collectors.toList());
        }
        catch (Exception e) {
            log.error("Could not initialize UIMA annotation classes from parameter values {}", (Object)Arrays.toString(this.uimaTypeNames));
            throw new ResourceInitializationException((Throwable)e);
        }
        this.levels2uimaNames = IntStream.range(0, this.annotationLevels.length).collect(HashMap::new, (m, i) -> m.put(this.annotationLevels[i], this.uimaTypeNames[i]), (m1, m2) -> m1.putAll(m2));
        this.setUpFolderList();
    }

    /*
     * WARNING - void declaration
     */
    private void setUpFolderList() throws ResourceInitializationException {
        File rootX;
        this.folderList = new LinkedList();
        if (!this.inputDir.endsWith(File.separator)) {
            this.inputDir = this.inputDir + File.separator;
        }
        if (!(rootX = new File(this.inputDir)).exists()) {
            File dir1 = new File(".");
            try {
                rootX = new File(dir1.getCanonicalPath() + this.inputDir);
            }
            catch (IOException e) {
                e.printStackTrace();
                System.exit(1);
            }
            if (!rootX.exists()) {
                log.error("{} does not exist", (Object)this.inputDir);
                throw new ResourceInitializationException((Throwable)new IllegalArgumentException(this.inputDir + " does not exist"));
            }
        }
        for (String string : rootX.list()) {
            void var5_6;
            File root;
            if (!string.endsWith(File.separator)) {
                String string2 = string + File.separator;
            }
            if (!(root = new File(this.inputDir + (String)var5_6)).isDirectory()) continue;
            this.folderList.add(root);
        }
        this.numDocuments = this.folderList.size();
    }

    private String getPMID() throws CollectionException {
        try {
            String strLine;
            FileInputStream fstream = new FileInputStream(this.actualPath + "Basedata.uri");
            DataInputStream in = new DataInputStream(fstream);
            BufferedReader br = new BufferedReader(new InputStreamReader(in));
            int count = 0;
            String pmid = "";
            while ((strLine = br.readLine()) != null) {
                ++count;
                pmid = strLine;
            }
            if (count > 1) {
                log.error("unknown data in {}Basedata.uri", (Object)this.actualPath);
                System.exit(1);
                return null;
            }
            return pmid;
        }
        catch (IOException e) {
            log.error("Error while parsing {}Basedata.uri", (Object)this.actualPath);
            throw new CollectionException((Throwable)e);
        }
    }

    public void getNext(JCas jCas) throws CollectionException {
        ++Statistics.projects;
        this.actualPath = this.folderList.poll().getAbsolutePath() + "/";
        File style = new File(this.actualPath + "Styles/default_style.xsl");
        style.renameTo(new File(this.actualPath + "Styles/generic_nongui_style.xsl"));
        File mmaxfile = new File(this.actualPath + "project.mmax");
        MMAX2Discourse discourse = MMAX2Discourse.buildDiscourse((String)mmaxfile.getAbsolutePath());
        String documentText = discourse.getNextDocumentChunk();
        WordInformation[] words = new WordInformation[discourse.getDiscourseElementCount()];
        int textPosition = 0;
        for (MMAX2DiscourseElement elem : discourse.getDiscourseElements()) {
            WordInformation word = new WordInformation();
            word.setId(elem.getID());
            int discoursePosition = elem.getDiscoursePosition();
            word.setPosition(discoursePosition);
            StringBuilder textBuilder = new StringBuilder();
            int end = discourse.getDisplayEndPositionFromDiscoursePosition(discoursePosition);
            for (textPosition = discourse.getDisplayStartPositionFromDiscoursePosition(discoursePosition); textPosition <= end; ++textPosition) {
                textBuilder.append(documentText.charAt(textPosition));
            }
            word.setText(textBuilder.toString());
            words[discoursePosition] = word;
        }
        this.produceOutput(discourse, words, jCas);
        style = new File(this.actualPath + "Styles/generic_nongui_style.xsl");
        style.renameTo(new File(this.actualPath + "Styles/default_style.xsl"));
        ++Statistics.projects;
    }

    private void produceOutput(MMAX2Discourse discourse, WordInformation[] words, JCas jCas) throws CollectionException {
        StringBuilder out = new StringBuilder();
        StringBuilder outPlain = new StringBuilder();
        String pmid = this.getPMID();
        if (this.originalTextFilesDir != null && this.originalTextFilesDir.length() > 0) {
            this.handleOriginalTextInformation(pmid, words);
        }
        HashMap<Integer, Token> pos2offsets = new HashMap<Integer, Token>();
        for (int i = 0; i < words.length; ++i) {
            WordInformation word = words[i];
            Token token = new Token(jCas, outPlain.length(), outPlain.length() + word.getText().length());
            token.setComponentId(((Object)((Object)this)).getClass().getCanonicalName());
            token.addToIndexes();
            pos2offsets.put(word.getPosition(), token);
            outPlain.append(word.getText());
            if (!word.isFollowedBySpace()) continue;
            out.append(" ");
            outPlain.append(" ");
        }
        Set<Markable> ignoredMarkables = this.getIgnoredMarkables(discourse);
        for (int i = 0; i < this.annotationLevels.length; ++i) {
            Iterator iterator = discourse.getMarkableLevelByName(this.annotationLevels[i], false).getMarkables().stream().map(Markable.class::cast).filter(Predicate.not(ignoredMarkables::contains)).filter(Predicate.not(Markable::isDiscontinuous)).iterator();
            int id = 0;
            while (iterator.hasNext()) {
                Annotation a;
                Markable markable = (Markable)iterator.next();
                int beginPosition = markable.getLeftmostDiscoursePosition();
                int endPosition = markable.getRightmostDiscoursePosition();
                int beginOffset = ((Token)pos2offsets.get(beginPosition)).getBegin();
                int endOffset = ((Token)pos2offsets.get(endPosition)).getEnd();
                try {
                    a = JCoReAnnotationTools.getAnnotationByClassName((JCas)jCas, (String)this.uimaTypeNames[i]);
                }
                catch (Exception e) {
                    throw new CollectionException((Throwable)e);
                }
                a.setBegin(beginOffset);
                a.setEnd(endOffset);
                if (a instanceof ConceptMention) {
                    ((ConceptMention)a).setSpecificType(markable.getAttributeValue(markable.getMarkableLevelName()));
                } else if (a instanceof Sentence) {
                    ((Sentence)a).setId(String.valueOf(id));
                }
                a.addToIndexes();
                ++id;
            }
        }
        for (WordInformation word : words) {
            for (MarkableContainer mc : word.getMarkables()) {
                int beginPosition = mc.getBegin();
                if (beginPosition != word.getPosition()) continue;
                int endPosition = mc.getEnd();
                int beginOffset = ((Token)pos2offsets.get(beginPosition)).getBegin();
                int endOffset = ((Token)pos2offsets.get(endPosition)).getEnd();
                Gene gene = new Gene(jCas, beginOffset, endOffset);
                gene.addToIndexes();
            }
        }
        String textPlain = outPlain.toString();
        jCas.setDocumentText(textPlain);
        Header h = new Header(jCas);
        h.setDocId(pmid);
        h.addToIndexes();
    }

    private Set<Markable> getIgnoredMarkables(MMAX2Discourse discourse) {
        if (!this.removeOverlappingShorterAnnotations) {
            return Collections.emptySet();
        }
        HashSet<Markable> toIgnore = new HashSet<Markable>();
        for (int i = 0; i < this.annotationLevels.length; ++i) {
            HashMap markablesByPos = new HashMap();
            Iterator iterator = discourse.getMarkableLevelByName(this.annotationLevels[i], false).getMarkables().stream().map(Markable.class::cast).filter(Predicate.not(Markable::isDiscontinuous)).iterator();
            while (iterator.hasNext()) {
                Markable markable = (Markable)iterator.next();
                IntStream.rangeClosed(markable.getLeftmostDiscoursePosition(), markable.getRightmostDiscoursePosition()).forEach(j -> markablesByPos.compute(j, (k, v) -> v != null ? v : new HashSet()).add(markable));
            }
            for (Integer pos : markablesByPos.keySet()) {
                Set markables = (Set)markablesByPos.get(pos);
                if (markables.size() <= 1) continue;
                int maxSize = 0;
                Markable longestMarkable = null;
                for (Markable markable : markables) {
                    toIgnore.add(markable);
                    int markableLength = markable.getRightmostDiscoursePosition() - markable.getLeftmostDiscoursePosition() + 1;
                    if (markableLength <= maxSize) continue;
                    maxSize = markableLength;
                    longestMarkable = markable;
                }
                toIgnore.remove(longestMarkable);
            }
        }
        return toIgnore;
    }

    private void handleOriginalTextInformation(String pmid, WordInformation[] words) throws CollectionException {
        File file;
        if (this.originalTextFilesDir.length() > 0 && !this.originalTextFilesDir.endsWith("/")) {
            this.originalTextFilesDir = this.originalTextFilesDir + File.separator;
        }
        if (!(file = new File(this.originalTextFilesDir + pmid)).exists()) {
            log.warn("no original File found for {} using only mmax text.", (Object)pmid);
            return;
        }
        try {
            FileInputStream fis = new FileInputStream(file);
            InputStreamReader isr = new InputStreamReader(fis);
            int wordCounter = 0;
            try {
                int i;
                WordInformation actualWord = words[wordCounter];
                String actualText = actualWord.getText();
                actualWord.setFollowedBySpace(false);
                int wordCharCounter = 0;
                while ((i = isr.read()) >= 0) {
                    if (wordCharCounter >= actualText.length()) {
                        if (++wordCounter < words.length) {
                            actualWord = words[wordCounter];
                            actualText = actualWord.getText();
                            actualWord.setFollowedBySpace(false);
                            wordCharCounter = 0;
                        } else {
                            if (!Character.isWhitespace(i)) {
                                log.warn("original Text contains more words than mmax information");
                            }
                            return;
                        }
                    }
                    if (actualText.charAt(wordCharCounter) == i || Character.toLowerCase(actualText.charAt(wordCharCounter)) == Character.toLowerCase(i)) {
                        ++wordCharCounter;
                        continue;
                    }
                    if (!Character.isWhitespace(i)) {
                        log.warn("there is a non whitespace character different in original text at document {} critical character is '{}' near word '{}' (MMAX2 word ID {})", new Object[]{pmid, i, actualText, actualWord.getId()});
                        continue;
                    }
                    words[wordCounter - 1].setFollowedBySpace(true);
                }
                isr.close();
            }
            catch (IOException e) {
                log.error("Error attempting to read original text file ", (Throwable)e);
                throw new CollectionException((Throwable)e);
            }
        }
        catch (Exception e) {
            log.error("Error attempting to read original text file", (Throwable)e);
            if (e instanceof CollectionException) {
                throw (CollectionException)e;
            }
            throw new CollectionException((Throwable)e);
        }
    }

    public void close() {
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.numDocuments - this.folderList.size(), this.numDocuments, "document")};
    }

    public boolean hasNext() {
        return !this.folderList.isEmpty();
    }
}

