/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.consumer.cas2iob.main;

import de.julielab.jcore.consumer.cas2iob.utils.UIMAUtils;
import de.julielab.jcore.types.Header;
import de.julielab.jcore.types.Paragraph;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.types.Token;
import de.julielab.jcore.utility.JCoReAnnotationTools;
import de.julielab.jcore.utility.index.Comparators;
import de.julielab.jcore.utility.index.IndexTermGenerator;
import de.julielab.jcore.utility.index.JCoReTreeMapAnnotationIndex;
import de.julielab.jcore.utility.index.TermGenerators;
import de.julielab.segmentationEvaluator.IOBToken;
import de.julielab.segmentationEvaluator.IOToken;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileWriter;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.lang.reflect.Method;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name="JCoRe IOB Writer", description="This component help to write CAS entity or chunk annotations into a text file in IOB format.")
public class ToIOBConsumer
extends JCasAnnotator_ImplBase {
    public static final String PARAM_LABELS = "labels";
    public static final String PARAM_OUTFOLDER = "outFolder";
    public static final String PARAM_LABEL_METHODS = "labelNameMethods";
    public static final String PARAM_IOB_LABEL_NAMES = "iobLabelNames";
    public static final String PARAM_TYPE_PATH = "typePath";
    public static final String PARAM_MODE = "mode";
    public static final String PARAM_ADD_POS = "addPos";
    public static final String PARAM_COLUMN_SEPARATOR = "columnSeparator";
    public static final String PARAM_IOB_MARK_SEPARATOR = "iobMarkSeparator";
    private static final Logger LOGGER = LoggerFactory.getLogger(ToIOBConsumer.class);
    private final String SENTENCE_END_MARK = "SENTENCE_END_MARKER";
    private final String PARAGRAPH_END_MARK = "PARAGRAPH_END_MARKER";
    @ConfigurationParameter(name="outFolder", description="Path to folder where IOB-files should be written to.")
    String outFolder = null;
    @ConfigurationParameter(name="typePath", mandatory=false, description="The path of the UIMA types, e.g. \"de.julielab.jcore.\" (with terminating \".\"!). It is prepended to the class names in labelNameMethods. This parameter may be null which is equivalent to the empty String \"\".")
    String typePath = null;
    @ConfigurationParameter(name="labels", mandatory=false, description="The labels NOT to be exported into IOB format. Label does here not refer to an UIMA type but to the specific label aquired by the labelNameMethod.")
    String[] labels = null;
    HashMap<String, String> objNameMethMap = null;
    Map<String, String> labelIOBMap = null;
    int id = 1;
    @ConfigurationParameter(name="mode", mandatory=false, description="This parameter determines whether the IOB or IO annotation schema should be used. The parameter defaults to IOB, the value is not case sensitive.", defaultValue={"IOB"})
    private String mode = null;
    @ConfigurationParameter(name="labelNameMethods", description="This is the primary parameter to define from which types IOB labels should be derived. The parameter expects pairs of UIMA-annotation-type-names and their corresponding method for extracting the annotation label. Format: &lt;annotationName&gt;[\\s=/\\\\|]&lt;method Name&gt;. The annotation name is fully qualified name of the UIMA type. For abbreviation purposes, the \"typePath\" parameter can be used to define a type prefix that will then be prepended to all UIMA type names given in this parameter. So, for example, the prefix \"de.julielab.jcore.types.\" will allow to use the \"specificType\" feature of the \"de.julielab.jcore.types.Gene\" type by providing \"Gene=getSpecificType\".  If the name of the annotation class itself is to be being used as label, only the class name is expected: &lt;annotationName&gt; (here, again, applies the use of the \"typePath\" parameter). You also may specify a mix of pairs and single class names. If you give the name extracting method for a class and have also specified its superclass as a single class name, the given method is used rather than the superclass name.")
    private String[] labelNameMethods;
    @ConfigurationParameter(name="iobLabelNames", mandatory=false, description="Pairs of label names in UIMA (aquired by the methods given in labelNameMethods) and the name the label is supposed to get in the outcoming IOB file. Format: &lt;UIMA label name&gt;[\\s=/\\\\|]&lt;IOB label name&gt;")
    private String[] iobLabelNames;
    @ConfigurationParameter(name="addPos", mandatory=false, description="If set to true and if annotations of (sub-)type de.julielab.jcore.types.POSTag are present in the CAS, the PoS tags will be added to the output file as the second column. Defaults to false.")
    private Boolean addPos;
    @ConfigurationParameter(name="columnSeparator", mandatory=false, description="The string given with this parameter will be used to separate the columns in the output file. Defaults to a single tab character.", defaultValue={"\\t"})
    private String separator;
    @ConfigurationParameter(name="iobMarkSeparator", mandatory=false, description="This string will be used to separate the IO(B) mark - i. e. I or B - from the entity or chunk label in the output file. Defaults to an underscore character.")
    private String iobMarkSeparator;

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        String[] parts;
        int i;
        super.initialize(aContext);
        LOGGER.info("Initializing...");
        String regexp = "[\\s=/\\|]";
        this.labels = Optional.ofNullable((String[])aContext.getConfigParameterValue(PARAM_LABELS)).orElse(new String[0]);
        this.outFolder = (String)aContext.getConfigParameterValue(PARAM_OUTFOLDER);
        this.labelNameMethods = (String[])aContext.getConfigParameterValue(PARAM_LABEL_METHODS);
        this.iobLabelNames = (String[])aContext.getConfigParameterValue(PARAM_IOB_LABEL_NAMES);
        this.typePath = (String)aContext.getConfigParameterValue(PARAM_TYPE_PATH);
        if (this.typePath == null) {
            this.typePath = "";
        }
        this.addPos = Optional.ofNullable((Boolean)aContext.getConfigParameterValue(PARAM_ADD_POS)).orElse(false);
        this.separator = Optional.ofNullable((String)aContext.getConfigParameterValue(PARAM_COLUMN_SEPARATOR)).orElse("\t");
        this.separator = this.separator.replaceAll("\\\\t", "\t");
        this.iobMarkSeparator = Optional.ofNullable((String)aContext.getConfigParameterValue(PARAM_IOB_MARK_SEPARATOR)).orElse("_");
        this.mode = (String)aContext.getConfigParameterValue(PARAM_MODE);
        if (this.mode.equals("IOB") || this.mode.equals("iob")) {
            this.mode = "IOB";
        } else if (this.mode.equals("IO") || this.mode.equals("io")) {
            this.mode = "IO";
        } else {
            throw new ResourceInitializationException();
        }
        if (this.labelNameMethods != null) {
            this.objNameMethMap = new HashMap();
            for (i = 0; i < this.labelNameMethods.length; ++i) {
                parts = this.labelNameMethods[i].split("[\\s=/\\|]");
                if (parts.length == 1) {
                    this.objNameMethMap.put(this.typePath + parts[0], null);
                    continue;
                }
                this.objNameMethMap.put(this.typePath + parts[0], parts[1]);
            }
        }
        if (this.iobLabelNames != null) {
            this.labelIOBMap = new HashMap<String, String>();
            for (i = 0; i < this.iobLabelNames.length; ++i) {
                parts = this.iobLabelNames[i].split("[\\s=/\\|]");
                this.labelIOBMap.put(parts[0], parts[1]);
            }
        } else {
            this.labelIOBMap = Collections.emptyMap();
        }
    }

    public void process(JCas jCas) {
        LOGGER.trace("Converting CAS to IO(B)Tokens...");
        IOToken[] ioTokens = this.convertToIOB(jCas);
        LOGGER.trace("Writing IO(B) file...");
        String outPathName = Paths.get(this.outFolder, this.getDocumentId(jCas)).toString() + ".iob";
        if (Files.notExists(Paths.get(this.outFolder, new String[0]), new LinkOption[0])) {
            new File(this.outFolder).mkdirs();
        }
        try {
            BufferedWriter bw = new BufferedWriter(new FileWriter(outPathName));
            for (IOToken token : ioTokens) {
                if (token.getText().equals("") || token.getText().equals("SENTENCE_END_MARKER")) {
                    bw.newLine();
                    continue;
                }
                if (token.getText().equals("") || token.getText().equals("PARAGRAPH_END_MARKER")) {
                    bw.newLine();
                    bw.newLine();
                    continue;
                }
                Stream.Builder<Object> sb = Stream.builder();
                sb.accept(token.getText());
                sb.accept(token.getPos());
                sb.accept(token.getIobMark().equals("O") ? token.getIobMark() : token.getIobMark() + this.iobMarkSeparator + token.getLabel());
                String line = sb.build().filter(Objects::nonNull).collect(Collectors.joining(this.separator));
                bw.write(line);
                bw.newLine();
            }
            if (bw != null) {
                bw.close();
            }
        }
        catch (IOException e) {
            e.printStackTrace();
        }
        LOGGER.trace("The IO(B) file was written to " + outPathName);
    }

    public IOToken[] convertToIOB(JCas jcas) {
        Boolean no_paragraphs = true;
        ArrayList<IOToken> ioTokens = new ArrayList<IOToken>();
        Iterator[] annotationIters = new Iterator[this.objNameMethMap.size()];
        Iterator<String> it = this.objNameMethMap.keySet().iterator();
        int i = 0;
        while (it.hasNext()) {
            String objName = it.next();
            Type type = jcas.getTypeSystem().getType(objName);
            annotationIters[i] = jcas.getAnnotationIndex(type).iterator();
            ++i;
        }
        TreeMap<Integer, IOToken> ioTokenMap = new TreeMap<Integer, IOToken>();
        this.tokenLabeling(ioTokenMap, annotationIters, jcas);
        FSIterator paragraphIter = jcas.getAnnotationIndex(Paragraph.type).iterator();
        ArrayList<Paragraph> paragraphs = new ArrayList<Paragraph>();
        while (paragraphIter.hasNext()) {
            paragraphs.add((Paragraph)paragraphIter.next());
        }
        if (paragraphs.isEmpty()) {
            Paragraph dParagraph = null;
            try {
                dParagraph = (Paragraph)JCoReAnnotationTools.getAnnotationByClassName((JCas)jcas, (String)Paragraph.class.getName());
            }
            catch (ClassNotFoundException | IllegalAccessException | IllegalArgumentException | InstantiationException | NoSuchMethodException | SecurityException | InvocationTargetException e) {
                e.printStackTrace();
            }
            dParagraph.setBegin(0);
            dParagraph.setEnd(jcas.getDocumentText().length());
            dParagraph.addToIndexes(jcas);
            paragraphs.add(dParagraph);
        }
        Paragraph lastPara = null;
        int overallSentCount = 0;
        for (Sentence sentence : jcas.getAnnotationIndex(Sentence.type)) {
            int sentCount = 0;
            Paragraph currentParagraph = lastPara;
            for (Paragraph para : paragraphs) {
                if (para.getBegin() <= sentence.getBegin() && para.getEnd() >= sentence.getEnd()) {
                    currentParagraph = para;
                }
                ArrayList tokenList = (ArrayList)UIMAUtils.getAnnotations(jcas, (Annotation)sentence, new Token(jcas, 0, 0).getClass());
                for (int i2 = 0; i2 < tokenList.size(); ++i2) {
                    Token token = (Token)tokenList.get(i2);
                    if (i2 == 0 && overallSentCount > 0) {
                        IOBToken ioToken = null;
                        ioToken = currentParagraph != lastPara ? new IOBToken("PARAGRAPH_END_MARKER", "PARAGRAPH_END_MARKER") : new IOBToken("SENTENCE_END_MARKER", "SENTENCE_END_MARKER");
                        ioTokenMap.put(token.getBegin() - 1, (IOToken)ioToken);
                    }
                    if (ioTokenMap.containsKey(token.getBegin())) continue;
                    String pos = this.addPos != false && token.getPosTag().size() > 0 ? token.getPosTag(0).getValue() : null;
                    IOBToken ioToken = new IOBToken(token.getCoveredText(), "O", pos);
                    ioTokenMap.put(token.getBegin(), (IOToken)ioToken);
                }
                ++overallSentCount;
                ++sentCount;
            }
        }
        Set<Integer> beginSet = ioTokenMap.keySet();
        for (Integer begin : beginSet) {
            IOToken ioToken = ioTokenMap.get(begin);
            ioTokens.add(ioToken);
        }
        IOToken[] ret = new IOToken[ioTokens.size()];
        if (this.mode.equals("IOB")) {
            ret = ioTokens.toArray(ret);
        } else {
            for (int i3 = 0; i3 < ioTokens.size(); ++i3) {
                IOBToken iobToken = (IOBToken)ioTokens.get(i3);
                ret[i3] = iobToken.toXIoToken();
            }
        }
        return ret;
    }

    private void tokenLabeling(TreeMap<Integer, IOToken> ioTokenMap, Iterator[] annotationIters, JCas jcas) {
        for (int i = 0; i < annotationIters.length; ++i) {
            Iterator annoIter = annotationIters[i];
            JCoReTreeMapAnnotationIndex tokenByAnnotation = new JCoReTreeMapAnnotationIndex(Comparators.longOverlapComparator(), (IndexTermGenerator)TermGenerators.longOffsetTermGenerator(), (IndexTermGenerator)TermGenerators.longOffsetTermGenerator(), jcas, Token.type);
            while (annoIter.hasNext()) {
                Annotation ann = (Annotation)annoIter.next();
                String label = this.getAnnotationLabel(ann);
                Iterator subtokenIterator = tokenByAnnotation.searchFuzzy(ann).iterator();
                try {
                    Token token = (Token)subtokenIterator.next();
                    if (this.addPos.booleanValue() && token.getPosTag() == null) {
                        throw new IllegalStateException("The IOB consumer is configured to add the part of speech tag to each token but the token \"" + token.getCoveredText() + "\", " + token + " doesn't have any (the PoS list is null).");
                    }
                    String pos = this.addPos != false && token.getPosTag().size() > 0 ? token.getPosTag(0).getValue() : null;
                    Integer begin = token.getBegin();
                    if (!ioTokenMap.containsKey(begin)) {
                        IOBToken ioToken = new IOBToken(token.getCoveredText(), "B_" + label, pos);
                        ioTokenMap.put(begin, (IOToken)ioToken);
                        while (subtokenIterator.hasNext()) {
                            token = (Token)subtokenIterator.next();
                            begin = token.getBegin();
                            ioToken = new IOBToken(token.getCoveredText(), "I_" + label, pos);
                            ioTokenMap.put(begin, (IOToken)ioToken);
                        }
                        continue;
                    }
                    this.handleCompetingAnnotations(ioTokenMap, label, subtokenIterator, token, begin, pos);
                }
                catch (NoSuchElementException e) {
                    LOGGER.warn("no token annotation in label annotation: " + ann.getCoveredText() + ", " + ann);
                }
            }
        }
    }

    private void handleCompetingAnnotations(TreeMap<Integer, IOToken> ioTokenMap, String label, Iterator subtokenIterator, Token token, Integer begin, String pos) {
        int oldLength = 0;
        Set<Integer> keySet = ioTokenMap.keySet();
        for (Integer index : keySet) {
            IOToken actToken = ioTokenMap.get(index);
            if (index < begin) continue;
            if (!actToken.getLabel().equals(label) || !actToken.getIobMark().equals("I") && oldLength > 0) break;
            ++oldLength;
        }
        HashMap<IOBToken, Integer> newTokenSeq = new HashMap<IOBToken, Integer>();
        IOBToken ioToken2 = new IOBToken(token.getCoveredText(), "B_" + label, pos);
        newTokenSeq.put(ioToken2, begin);
        while (subtokenIterator.hasNext()) {
            token = (Token)subtokenIterator.next();
            begin = token.getBegin();
            ioToken2 = new IOBToken(token.getCoveredText(), "I_" + label, pos);
            newTokenSeq.put(ioToken2, begin);
        }
        if (newTokenSeq.size() > oldLength) {
            Set hashKeys = newTokenSeq.keySet();
            for (IOBToken ioToken2 : hashKeys) {
                begin = (Integer)newTokenSeq.get(ioToken2);
                ioTokenMap.put(begin, (IOToken)ioToken2);
            }
        }
    }

    private String getAnnotationLabel(Annotation ann) {
        String ret = null;
        Class<?> annClass = ann.getClass();
        Method getLabelMethod = null;
        String methodName = this.objNameMethMap.get(annClass.getName());
        try {
            if (methodName == null) {
                ret = annClass.getName();
            } else {
                getLabelMethod = annClass.getMethod(methodName, new Class[0]);
                ret = (String)getLabelMethod.invoke((Object)ann, (Object[])null);
            }
        }
        catch (NoSuchMethodException e) {
            LOGGER.error("The class \"" + annClass.getName() + "\" does not have a method \"" + methodName + "\".");
            e.printStackTrace();
        }
        catch (Exception e) {
            e.printStackTrace();
        }
        if (ret != null) {
            for (String label : this.labels) {
                if (!ret.equals(label)) continue;
                ret = null;
            }
        }
        if (ret != null && this.labelIOBMap.get(ret) != null) {
            ret = this.labelIOBMap.get(ret);
        }
        return ret;
    }

    private String getDocumentId(JCas cas) {
        Header header = null;
        try {
            header = (Header)cas.getAnnotationIndex(Header.type).iterator().next();
        }
        catch (NoSuchElementException e) {
            LOGGER.trace("No annotation of type {} found in current CAS", (Object)Header.class.getCanonicalName());
        }
        if (header != null) {
            return header.getDocId();
        }
        return String.valueOf(this.id++);
    }
}

