/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.consumer.entityevaluator;

import de.julielab.java.utilities.FileUtilities;
import de.julielab.jcore.consumer.entityevaluator.Column;
import de.julielab.jcore.consumer.entityevaluator.DocumentIdColumn;
import de.julielab.jcore.consumer.entityevaluator.FeatureValueFilter;
import de.julielab.jcore.consumer.entityevaluator.OffsetsColumn;
import de.julielab.jcore.consumer.entityevaluator.SentenceIdColumn;
import de.julielab.jcore.types.Header;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.utility.JCoReAnnotationIndexMerger;
import de.julielab.jcore.utility.index.Comparators;
import de.julielab.jcore.utility.index.IndexTermGenerator;
import de.julielab.jcore.utility.index.JCoReTreeMapAnnotationIndex;
import de.julielab.jcore.utility.index.TermGenerators;
import java.io.BufferedWriter;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.NavigableMap;
import java.util.Optional;
import java.util.Set;
import java.util.TreeMap;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.TypeSystem;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.TOP;
import org.apache.uima.jcas.tcas.Annotation;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class EntityEvaluatorConsumer
extends JCasAnnotator_ImplBase {
    public static final String DOCUMENT_ID_COLUMN = "DocumentId";
    public static final String SENTENCE_ID_COLUMN = "SentenceId";
    public static final String OFFSETS_COLUMN = "Offsets";
    public static final String PARAM_OUTPUT_COLUMNS = "OutputColumns";
    public static final String PARAM_COLUMN_DEFINITIONS = "ColumnDefinitions";
    public static final String PARAM_TYPE_PREFIX = "TypePrefix";
    public static final String PARAM_ENTITY_TYPES = "EntityTypes";
    public static final String PARAM_FEATURE_FILTERS = "FeatureFilters";
    public static final String PARAM_OFFSET_MODE = "OffsetMode";
    public static final String PARAM_OFFSET_SCOPE = "OffsetScope";
    public static final String PARAM_OUTPUT_FILE = "OutputFile";
    private static final Logger log = LoggerFactory.getLogger(EntityEvaluatorConsumer.class);
    @ConfigurationParameter(name="OutputColumns", description="A list of column names that are either defined with the parameter ColumnDefinitions or one of 'DocumentId', 'SentenceId' or 'Offsets'. This list determines the set and the order of columns that are written into the output file in a tab-separated manner.")
    private String[] outputColumnNamesArray;
    @ConfigurationParameter(name="ColumnDefinitions", description="Custom definitions of output columns. Predefined columns are 'DocumentId', 'SentenceId' and 'Offsets'. The first two may be overwritten by a custom definition using their exact name. A column definition consists of the name of the column, the type of the annotation from which the values for this column should be derived, and a feature path pointing to the value. A single column definition may refer to multiple, different annotation types with their own feature path. Annotation types that should use the same feature path are separated by a comma. The sets of annotation types where each set shared one feature path are separated by a semicolon. Example: 'entityid:Chemical,Gene=/registryNumber;Disease=/specificType'. In this example, the column named 'entityid' will list the IDs of annotations of types 'Chemical', 'Gene' and 'Disease'. For the first two, the feature 'registryNumber' will be employed, for the latter the feature 'specificType'. The annotation type names will be resolved against the 'TypePrefix' parameter, if specified. The built-in feature path functions 'coveredText()' and 'typeName()' are available. For example, 'type:Gene=/:typeName()' (note the colon preceding the built-in function) will output the fully qualified name of the Gene type.")
    private String[] columnDefinitionDescriptions;
    @ConfigurationParameter(name="EntityTypes", mandatory=false, description="Optional. A list of entity types for which an output should be created. If all desired types are already mentioned in the 'ColumnDefinitions' parameter, this parameter can be left empty.")
    private String[] entityTypeStrings;
    @ConfigurationParameter(name="OffsetMode", mandatory=false, description="Optional. Determines the kind of offset printed out by the component for each entity. Supported are CharacterSpan and NonWsCharacters. The first uses the common UIMA character span offsets. The second counts only the non-whitespace characters for the offsets. This last format is used, for example, by the BioCreative 2 Gene Mention task data. Default is CharacterSpan.")
    private OffsetMode offsetMode;
    @ConfigurationParameter(name="OffsetScope", mandatory=false, description="Optional. 'Document' or 'Sentence'. Defaults to Document.")
    private OffsetScope offsetScope;
    @ConfigurationParameter(name="TypePrefix", mandatory=false, description="Optional. If an annotation type name given in one of the 'ColumnDefinitions' or 'EntityTypes' can not be found, it is searched with this prefix. Thus, for JCoRe the prefix 'de.julielab.jcore.types' will cover all annotation types and make the other parameter values briefer.")
    private String typePrefix;
    @ConfigurationParameter(name="FeatureFilters", mandatory=false, description="Optional. Only lets those entities contribute to the output file that fulfill the given feature value. The syntax is <type>:<feature path>=<value>")
    private String[] featureFilterDefinitions;
    @ConfigurationParameter(name="OutputFile", description="Output file to which all entity information is written in the format\ndocId EGID begin end confidence\nWhere the fields are separated by tab stops. If the file name ends with .gz, the output file will automatically be gzipped.")
    private String outputFilePath;
    private Set<String> predefinedColumnNames = new HashSet<String>();
    private LinkedHashSet<String> outputColumnNames;
    private LinkedHashMap<String, Column> columns;
    private LinkedHashSet<Object> entityTypes = new LinkedHashSet();
    private List<FeatureValueFilter> featureFilters;
    private File outputFile;
    private List<String[]> entityRecords = new ArrayList<String[]>();
    private BufferedWriter bw;

    public static NavigableMap<Integer, Integer> createNumWsMap(String input) {
        TreeMap<Integer, Integer> map = new TreeMap<Integer, Integer>();
        map.put(0, 0);
        int numWs = 0;
        boolean lastCharWasWs = false;
        for (int i = 0; i < input.length(); ++i) {
            char c;
            if (lastCharWasWs) {
                map.put(i, numWs);
            }
            if (Character.isWhitespace(c = input.charAt(i))) {
                ++numWs;
                lastCharWasWs = true;
                continue;
            }
            lastCharWasWs = false;
        }
        return map;
    }

    public static Type findType(String typeName, String typePrefix, TypeSystem ts) {
        String effectiveName = typeName.contains(".") ? typeName : typePrefix + "." + typeName;
        Type type = ts.getType(effectiveName);
        if (type == null) {
            type = ts.getType(typePrefix + "." + effectiveName);
        }
        if (type == null) {
            throw new IllegalArgumentException("The annotation type " + effectiveName + " was not found in the type system. The prefixed name \"" + typePrefix + "." + effectiveName + "\" has also been tried without success.");
        }
        return type;
    }

    private void addOffsetsColumn(JCas aJCas) {
        OffsetsColumn offsetColumn;
        NavigableMap<Integer, Integer> numWsMap = null;
        if (this.offsetMode == OffsetMode.NonWsCharacters && this.offsetScope == OffsetScope.Document) {
            numWsMap = EntityEvaluatorConsumer.createNumWsMap(aJCas.getDocumentText());
            offsetColumn = new OffsetsColumn(numWsMap, this.offsetMode);
        } else if (this.offsetScope == OffsetScope.Document) {
            offsetColumn = new OffsetsColumn(this.offsetMode);
        } else if (this.offsetScope == OffsetScope.Sentence) {
            offsetColumn = new OffsetsColumn(((SentenceIdColumn)this.columns.get(SENTENCE_ID_COLUMN)).getSentenceIndex(), this.offsetMode);
        } else {
            throw new IllegalArgumentException("Unsupported offset scope " + (Object)((Object)this.offsetScope));
        }
        this.columns.put(OFFSETS_COLUMN, offsetColumn);
    }

    private void addDocumentIdColumn(JCas aJCas) throws CASException {
        if (this.outputColumnNames.contains(DOCUMENT_ID_COLUMN)) {
            Column c = this.columns.get(DOCUMENT_ID_COLUMN);
            if (c == null) {
                c = new Column("DocumentId:" + Header.class.getCanonicalName() + "=/docId", null, aJCas.getTypeSystem());
            }
            c = new DocumentIdColumn(c);
            this.columns.put(DOCUMENT_ID_COLUMN, c);
        }
    }

    private void addSentenceIdColumn(JCas aJCas) throws CASException {
        if (this.outputColumnNames.contains(SENTENCE_ID_COLUMN)) {
            Column c = this.columns.get(SENTENCE_ID_COLUMN);
            if (c == null) {
                c = new Column("SentenceId:" + Sentence.class.getCanonicalName() + "=/id", null, aJCas.getTypeSystem());
            }
            Column docIdColumn = this.columns.get(DOCUMENT_ID_COLUMN);
            String documentId = null;
            if (docIdColumn != null) {
                documentId = docIdColumn.getValue(aJCas.getDocumentAnnotationFs());
            }
            Type sentenceType = c.getSingleType();
            JCoReTreeMapAnnotationIndex sentenceIndex = new JCoReTreeMapAnnotationIndex(Comparators.longOverlapComparator(), (IndexTermGenerator)TermGenerators.longOffsetTermGenerator(), (IndexTermGenerator)TermGenerators.longOffsetTermGenerator());
            sentenceIndex.index(aJCas, sentenceType);
            c = new SentenceIdColumn(documentId, c, (JCoReTreeMapAnnotationIndex<Long, ? extends Annotation>)sentenceIndex);
            this.columns.put(SENTENCE_ID_COLUMN, c);
        }
    }

    protected void appendEntityRecordsToFile() {
        for (String[] entityRecord : this.entityRecords) {
            try {
                this.bw.write(Stream.of(entityRecord).collect(Collectors.joining("\t")) + "\n");
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
        this.entityRecords.clear();
    }

    private void assertColumnDefined(String columnName) {
        Column c = this.columns.get(columnName);
        if (c == null) {
            throw new IllegalArgumentException("The column \"" + columnName + "\" was set for output but was not defined.");
        }
    }

    public void batchProcessComplete() throws AnalysisEngineProcessException {
        super.batchProcessComplete();
        log.debug("Batch completed. Writing {} entity records to file {}.", (Object)this.entityRecords.size(), (Object)this.outputFile.getName());
        this.appendEntityRecordsToFile();
    }

    public void collectionProcessComplete() throws AnalysisEngineProcessException {
        super.collectionProcessComplete();
        log.info("Collection completed. Writing {} entity records to file {}.", (Object)this.entityRecords.size(), (Object)this.outputFile.getName());
        this.appendEntityRecordsToFile();
        try {
            this.bw.close();
        }
        catch (IOException e) {
            throw new AnalysisEngineProcessException((Throwable)e);
        }
    }

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        super.initialize(aContext);
        this.outputColumnNamesArray = (String[])aContext.getConfigParameterValue(PARAM_OUTPUT_COLUMNS);
        this.columnDefinitionDescriptions = (String[])aContext.getConfigParameterValue(PARAM_COLUMN_DEFINITIONS);
        this.typePrefix = (String)aContext.getConfigParameterValue(PARAM_TYPE_PREFIX);
        this.featureFilterDefinitions = (String[])Optional.ofNullable(aContext.getConfigParameterValue(PARAM_FEATURE_FILTERS)).orElse(new String[0]);
        this.outputFilePath = (String)aContext.getConfigParameterValue(PARAM_OUTPUT_FILE);
        this.entityTypeStrings = (String[])aContext.getConfigParameterValue(PARAM_ENTITY_TYPES);
        String offsetModeStr = (String)aContext.getConfigParameterValue(PARAM_OFFSET_MODE);
        String offsetScopeStr = (String)aContext.getConfigParameterValue(PARAM_OFFSET_SCOPE);
        this.outputColumnNames = new LinkedHashSet(Stream.of(this.outputColumnNamesArray).collect(Collectors.toList()));
        OffsetMode offsetMode = this.offsetMode = null == offsetModeStr ? OffsetMode.CharacterSpan : OffsetMode.valueOf(offsetModeStr);
        this.offsetScope = null == offsetScopeStr ? (this.outputColumnNames.contains(SENTENCE_ID_COLUMN) ? OffsetScope.Sentence : OffsetScope.Document) : OffsetScope.valueOf(offsetScopeStr);
        this.outputFile = new File(this.outputFilePath);
        if (this.outputFile.exists()) {
            log.warn("File \"{}\" is overridden.", (Object)this.outputFile.getAbsolutePath());
            this.outputFile.delete();
        }
        try {
            if (this.outputFile != null && this.outputFile.getParentFile() != null && !this.outputFile.getParentFile().exists()) {
                this.outputFile.getParentFile().mkdirs();
            }
            this.bw = FileUtilities.getWriterToFile((File)this.outputFile);
        }
        catch (IOException e) {
            throw new ResourceInitializationException((Throwable)e);
        }
        this.predefinedColumnNames.add(DOCUMENT_ID_COLUMN);
        this.predefinedColumnNames.add(SENTENCE_ID_COLUMN);
        this.predefinedColumnNames.add(OFFSETS_COLUMN);
        log.info("{}: {}", (Object)PARAM_OUTPUT_COLUMNS, this.outputColumnNames);
        log.info("{}: {}", (Object)PARAM_COLUMN_DEFINITIONS, (Object)this.columnDefinitionDescriptions);
        log.info("{}: {}", (Object)PARAM_FEATURE_FILTERS, (Object)this.featureFilterDefinitions);
        log.info("{}: {}", (Object)PARAM_ENTITY_TYPES, (Object)this.entityTypeStrings);
        log.info("{}: {}", (Object)PARAM_TYPE_PREFIX, (Object)this.typePrefix);
        log.info("{}: {}", (Object)PARAM_OUTPUT_FILE, (Object)this.outputFilePath);
        log.info("{}: {}", (Object)PARAM_OFFSET_MODE, (Object)this.offsetMode);
    }

    public void process(JCas aJCas) throws AnalysisEngineProcessException {
        try {
            TypeSystem ts = aJCas.getTypeSystem();
            if (this.columns == null) {
                this.columns = new LinkedHashMap();
                for (int i = 0; i < this.columnDefinitionDescriptions.length; ++i) {
                    String definition = this.columnDefinitionDescriptions[i];
                    Column c2 = new Column(definition, this.typePrefix, ts);
                    this.columns.put(c2.getName(), c2);
                }
                this.entityTypes = new LinkedHashSet(this.columns.values().stream().filter(c -> !this.predefinedColumnNames.contains(c.getName())).flatMap(c -> c.getTypes().stream()).collect(Collectors.toList()));
                if (this.entityTypeStrings != null) {
                    Stream.of(this.entityTypeStrings).map(name -> EntityEvaluatorConsumer.findType(name, this.typePrefix, ts)).forEach(this.entityTypes::add);
                }
                this.removeSubsumedTypes(this.entityTypes, ts);
                this.featureFilters = Stream.of(this.featureFilterDefinitions).map(d -> new FeatureValueFilter((String)d, this.typePrefix, ts)).collect(Collectors.toList());
                this.addDocumentIdColumn(aJCas);
            }
            this.addSentenceIdColumn(aJCas);
            this.addOffsetsColumn(aJCas);
            JCoReAnnotationIndexMerger indexMerger = new JCoReAnnotationIndexMerger(this.entityTypes, true, null, aJCas);
            while (indexMerger.incrementAnnotation()) {
                TOP a = indexMerger.getAnnotation();
                int contradictions = 0;
                for (FeatureValueFilter filter : this.featureFilters) {
                    if (!filter.contradictsFeatureFilter(a)) continue;
                    ++contradictions;
                }
                if (!this.featureFilters.isEmpty() && contradictions == this.featureFilters.size()) continue;
                int colIndex = 0;
                String[] record = new String[this.outputColumnNames.size()];
                for (String outputColumnName : this.outputColumnNames) {
                    this.assertColumnDefined(outputColumnName);
                    Column c3 = this.columns.get(outputColumnName);
                    record[colIndex++] = this.removeLineBreak(c3.getValue(a));
                }
                this.entityRecords.add(record);
            }
        }
        catch (ClassNotFoundException | CASException e) {
            e.printStackTrace();
        }
        for (Column c4 : this.columns.values()) {
            c4.reset();
        }
    }

    private String removeLineBreak(String text) {
        if (text == null) {
            return null;
        }
        String ret = text.replaceAll("\n", " ");
        return ret;
    }

    private void removeSubsumedTypes(LinkedHashSet<Object> entityTypes, TypeSystem ts) {
        Set copy = entityTypes.stream().map(Type.class::cast).collect(Collectors.toSet());
        for (Type refType : copy) {
            Iterator typeIt = entityTypes.iterator();
            while (typeIt.hasNext()) {
                Type type = (Type)typeIt.next();
                if (refType.equals(type) || !ts.subsumes(refType, type)) continue;
                typeIt.remove();
            }
        }
    }

    public static enum OffsetScope {
        Document,
        Sentence;

    }

    public static enum OffsetMode {
        CharacterSpan,
        NonWsCharacters;

    }
}

