/*
 * Decompiled with CFR 0.152.
 */
package de.datexis.nel;

import com.google.common.collect.Lists;
import de.datexis.annotator.Annotator;
import de.datexis.common.Timer;
import de.datexis.encoder.Encoder;
import de.datexis.index.ArticleIndex;
import de.datexis.index.ArticleRef;
import de.datexis.index.impl.VectorArticleIndex;
import de.datexis.model.Annotation;
import de.datexis.model.Document;
import de.datexis.model.Sentence;
import de.datexis.nel.NamedEntityAnnotation;
import de.datexis.ner.MentionAnnotation;
import de.datexis.ner.MentionAnnotator;
import de.datexis.preprocess.DocumentFactory;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.TreeMap;
import java.util.stream.Collectors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class NamedEntityAnnotator
extends Annotator {
    protected static final Logger log = LoggerFactory.getLogger(NamedEntityAnnotator.class);
    protected final MentionAnnotator ner;
    protected final ArticleIndex index;
    protected final Encoder encoder;

    public NamedEntityAnnotator(MentionAnnotator recognize, ArticleIndex search, Encoder disambiguate) {
        this.ner = recognize;
        this.index = search;
        this.encoder = disambiguate;
    }

    public NamedEntityAnnotator(MentionAnnotator recognize, ArticleIndex search) {
        this.ner = recognize;
        this.index = search;
        this.encoder = null;
    }

    public Document annotate(String text) {
        log.trace("Annotating document: " + text);
        Document doc = DocumentFactory.fromText((String)text);
        if (doc.countTokens() == 0) {
            return doc;
        }
        this.annotate(doc);
        return doc;
    }

    public Document annotate(Document doc) {
        this.annotate(Lists.newArrayList((Object[])new Document[]{doc}));
        return doc;
    }

    public void annotate(Collection<Document> docs) {
        Timer timer = new Timer();
        timer.start();
        TreeMap result = new TreeMap();
        timer.resetSplit();
        this.ner.annotate(docs);
        timer.setSplit("NER");
        for (Document doc : docs) {
            this.createSignature(doc);
            this.disambiguateMentions(doc, Annotation.Source.PRED);
        }
        timer.setSplit("NED");
        timer.stop();
        log.debug("Annotated " + docs.size() + " documends [" + timer.get("NER") + " NER, " + timer.get("NED") + " NED, " + timer.get() + " total]");
    }

    public ArticleIndex getKnowlegeBase() {
        return this.index;
    }

    protected void recognizeMentions(Document doc) {
        this.ner.annotate(doc);
    }

    protected void createSignature(Document doc) {
    }

    public void disambiguateMentions(Document doc, Annotation.Source source) {
        List anns = doc.streamAnnotations(source, MentionAnnotation.class).collect(Collectors.toList());
        for (MentionAnnotation mention : anns) {
            NamedEntityAnnotation entity = new NamedEntityAnnotation(mention, new ArrayList<ArticleRef>());
            String entityMention = mention.getText();
            String entityContext = ((Sentence)doc.getSentenceAtPosition(mention.getBegin()).get()).toTokenizedString();
            List<ArticleRef> candidates = this.index instanceof VectorArticleIndex ? ((VectorArticleIndex)this.index).querySimilarArticles(entityMention, entityContext, 1) : this.index.queryNames(entityMention, 1);
            if (candidates.size() > 0) {
                entity.setRefName(candidates.get(0).getTitle());
                entity.setRefId(candidates.get(0).getId());
                entity.setRefUrl(candidates.get(0).getUrl());
            }
            entity.setSource(Annotation.Source.PRED);
            doc.addAnnotation((Annotation)entity);
        }
    }
}

