/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.ae.topicindexing;

import cc.mallet.topics.TopicAssignment;
import cc.mallet.types.LabelSequence;
import de.julielab.jcore.ae.topicindexing.ITopicModelProvider;
import de.julielab.jcore.types.AutoDescriptor;
import de.julielab.jcore.types.DocumentTopics;
import de.julielab.jcore.utility.JCoReTools;
import de.julielab.topicmodeling.businessobjects.Model;
import de.julielab.topicmodeling.businessobjects.Topic;
import de.julielab.topicmodeling.services.MalletTopicModeling;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import org.apache.commons.configuration2.XMLConfiguration;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.FeatureStructure;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ExternalResource;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.fit.descriptor.TypeCapability;
import org.apache.uima.fit.util.JCasUtil;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.cas.DoubleArray;
import org.apache.uima.jcas.cas.FSArray;
import org.apache.uima.jcas.cas.IntegerArray;
import org.apache.uima.jcas.cas.StringArray;
import org.apache.uima.resource.ResourceInitializationException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name="JCoRe Topic Indexer", description="This component assigns topics relative to a given topic model to the encoutered documents. The topic model is one trained by the julielab-topic-modeling project.")
@TypeCapability(inputs={"de.julielab.jcore.types.Sentence", "de.julielab.jcore.types.Token", "de.julielab.jcore.types.PennBioIEPOSTag", "de.julielab.jcore.types.Lemma"}, outputs={"de.julielab.jcore.types.ManualDescriptor", "de.julielab.jcore.types.DocumentTopics"})
public class TopicIndexer
extends JCasAnnotator_ImplBase {
    public static final String PARAM_TOPIC_MODEL_CONFIG = "TopicModelConfig";
    public static final String RESOURCE_KEY_MODEL_FILE_NAME = "TopicModelFile";
    public static final String PARAM_NUM_DISPLAYED_TOPIC_WORDS = "DisplayedTopicWords";
    public static final String PARAM_STORE_IN_MODEL_INDEX = "StoreInModelIndex";
    private static final Logger log = LoggerFactory.getLogger(TopicIndexer.class);
    MalletTopicModeling tm;
    Model savedModel;
    XMLConfiguration xmlConfig;
    @ConfigurationParameter(name="TopicModelConfig", description="The julielab-topic-modeling XML configuration that specifies the 'infer' element.")
    private String model_config;
    @ConfigurationParameter(name="DisplayedTopicWords", description="The number of words per inferred topic to be written into the CAS. This is not an exhaustive list of topic words but just the selection of the most important words for each topic.")
    private int displayedTopicWords;
    @ConfigurationParameter(name="StoreInModelIndex", description="Whether or not to store the inferred labels back into the model. This will cause copies of the model to be written next to the original model file. For each run pipeline there will be one such copy that contains the inferred topic for all documents that this pipeline has processed. In case of a single pipeline, all documents will end up in one model copy.")
    private boolean toModelIndex;
    @ExternalResource(key="TopicModelFile", description="The topic model pretrained by the julielab-topic-modeling software.")
    private ITopicModelProvider topicModelProvider;
    private Object[][] topWords;

    public void initialize(UimaContext aContext) throws ResourceInitializationException {
        try {
            this.model_config = (String)aContext.getConfigParameterValue(PARAM_TOPIC_MODEL_CONFIG);
            this.toModelIndex = (Boolean)aContext.getConfigParameterValue(PARAM_STORE_IN_MODEL_INDEX);
            this.displayedTopicWords = (Integer)aContext.getConfigParameterValue(PARAM_NUM_DISPLAYED_TOPIC_WORDS);
            this.tm = new MalletTopicModeling();
            this.xmlConfig = this.tm.loadConfig(this.model_config);
            this.topicModelProvider = (ITopicModelProvider)aContext.getResourceObject(RESOURCE_KEY_MODEL_FILE_NAME);
            this.savedModel = this.topicModelProvider.getModel();
            if (this.displayedTopicWords > 0) {
                this.topWords = this.topicModelProvider.getTopWords(this.displayedTopicWords);
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void process(JCas aJCas) throws AnalysisEngineProcessException {
        try {
            Map topicMap;
            String modelID = this.savedModel.modelId;
            String modelVersion = this.savedModel.modelVersion;
            String docId = JCoReTools.getDocId((JCas)aJCas);
            if (!this.savedModel.pubmedIdModelId.containsKey(docId)) {
                topicMap = this.tm.inferLabel(aJCas, this.savedModel, this.xmlConfig);
            } else {
                LabelSequence ts = ((TopicAssignment)this.savedModel.malletModel.data.get((int)0)).topicSequence;
                double[] prbs = this.savedModel.malletModel.getTopicProbabilities(ts);
                ArrayList<Topic> topicList = new ArrayList<Topic>(prbs.length);
                for (int i = 0; i < prbs.length; ++i) {
                    Topic topic = new Topic();
                    topic.id = i;
                    topic.probability = prbs[i];
                    topic.modelId = this.savedModel.modelId;
                    topic.modelVersion = this.savedModel.modelVersion;
                    topicList.add(topic);
                }
                topicMap = Collections.singletonMap(docId, topicList);
            }
            List docTopics = (List)topicMap.get(docId);
            DoubleArray topicWeights = new DoubleArray(aJCas, docTopics.size());
            IntegerArray topicIds = new IntegerArray(aJCas, docTopics.size());
            StringArray topicWords = new StringArray(aJCas, this.displayedTopicWords);
            for (int i = 0; i < docTopics.size(); ++i) {
                double topicWeight = ((Topic)docTopics.get((int)i)).probability;
                int topicId = ((Topic)docTopics.get((int)i)).id;
                for (int k = 0; this.displayedTopicWords > 0 && k < Math.min(this.displayedTopicWords, this.topWords[topicId].length); ++k) {
                    String topicWord = (String)this.topWords[topicId][k];
                    topicWords.set(k, topicWord);
                }
                topicWeights.set(i, topicWeight);
                topicIds.set(i, topicId);
            }
            DocumentTopics documentTopics = new DocumentTopics(aJCas);
            documentTopics.setIDs(topicIds);
            documentTopics.setWeights(topicWeights);
            documentTopics.setModelID(modelID);
            if (modelVersion != "") {
                documentTopics.setModelVersion(modelVersion);
            }
            documentTopics.setTopicWords(topicWords);
            aJCas.addFsToIndexes((FeatureStructure)documentTopics);
            log.trace("Labeled document " + docId);
            if (this.toModelIndex) {
                AutoDescriptor autoDesc;
                ArrayList<Topic> topics = new ArrayList<Topic>();
                for (int i = 0; i < topicWeights.size(); ++i) {
                    Topic topic = new Topic();
                    topic.probability = topicWeights.get(i);
                    topic.id = topicIds.get(i);
                    topic.modelId = modelID;
                    topic.modelVersion = modelVersion;
                    topics.add(topic);
                }
                this.topicModelProvider.addToIndex(docId, topics);
                log.trace("Indexed document: " + docId);
                Collection autoDescs = JCasUtil.select((JCas)aJCas, AutoDescriptor.class);
                if (!autoDescs.isEmpty()) {
                    autoDesc = (AutoDescriptor)autoDescs.iterator().next();
                } else {
                    autoDesc = new AutoDescriptor(aJCas);
                    autoDesc.addToIndexes();
                }
                FSArray dt = autoDesc.getDocumentTopics();
                dt = JCoReTools.addToFSArray((FSArray)dt, (FeatureStructure)documentTopics);
                autoDesc.setDocumentTopics(dt);
            }
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }

    public void collectionProcessComplete() throws AnalysisEngineProcessException {
        super.collectionProcessComplete();
        try {
            this.topicModelProvider.saveModel();
        }
        catch (IOException e) {
            throw new AnalysisEngineProcessException((Throwable)e);
        }
    }
}

