package org.apache.stanbol.enhancer.engine.topic;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.Dictionary;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import org.apache.clerezza.rdf.core.Graph;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.NonLiteral;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.apache.clerezza.rdf.utils.GraphNode;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.SolrServerException;
import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
import org.apache.solr.client.solrj.request.UpdateRequest;
import org.apache.solr.client.solrj.response.QueryResponse;
import org.apache.solr.client.solrj.util.ClientUtils;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.common.SolrDocumentList;
import org.apache.solr.common.SolrInputDocument;
import org.apache.stanbol.commons.solr.managed.ManagedSolrServer;
import org.apache.stanbol.commons.solr.utils.StreamQueryRequest;
import org.apache.stanbol.enhancer.servicesapi.Blob;
import org.apache.stanbol.enhancer.servicesapi.Chain;
import org.apache.stanbol.enhancer.servicesapi.ChainException;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.rdf.NamespaceEnum;
import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
import org.apache.stanbol.enhancer.topic.ConfiguredSolrCoreTracker;
import org.apache.stanbol.enhancer.topic.EmbeddedSolrHelper;
import org.apache.stanbol.enhancer.topic.UTCTimeStamper;
import org.apache.stanbol.enhancer.topic.api.Batch;
import org.apache.stanbol.enhancer.topic.api.BatchProcessor;
import org.apache.stanbol.enhancer.topic.api.ClassificationReport;
import org.apache.stanbol.enhancer.topic.api.ClassifierException;
import org.apache.stanbol.enhancer.topic.api.TopicClassifier;
import org.apache.stanbol.enhancer.topic.api.TopicSuggestion;
import org.apache.stanbol.enhancer.topic.api.training.Example;
import org.apache.stanbol.enhancer.topic.api.training.TrainingSet;
import org.apache.stanbol.enhancer.topic.api.training.TrainingSetException;
import org.apache.stanbol.enhancer.topic.training.SolrTrainingSet;
import org.apache.stanbol.entityhub.servicesapi.Entityhub;
import org.apache.stanbol.entityhub.servicesapi.EntityhubException;
import org.apache.stanbol.entityhub.servicesapi.model.Entity;
import org.apache.stanbol.entityhub.servicesapi.model.Representation;
import org.apache.stanbol.entityhub.servicesapi.model.Text;
import org.apache.stanbol.entityhub.servicesapi.site.SiteManager;
import org.osgi.framework.BundleContext;
import org.osgi.framework.InvalidSyntaxException;
import org.osgi.framework.ServiceReference;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.osgi.util.tracker.ServiceTracker;
import org.osgi.util.tracker.ServiceTrackerCustomizer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/stanbol/enhancer/engine/topic/TopicClassificationEngine.class */
public class TopicClassificationEngine extends ConfiguredSolrCoreTracker implements EnhancementEngine, ServiceProperties, TopicClassifier {
    public static final String DEFAULT_SOLR_CORE_CONFIG = "default-topic-model.solrindex.zip";
    public static final String MODEL_ENTRY = "model";
    public static final String METADATA_ENTRY = "metadata";
    public static final String SOLR_CORE = "org.apache.stanbol.enhancer.engine.topic.solrCore";
    public static final String SOLR_CORE_CONFIG = "org.apache.stanbol.enhancer.engine.topic.solrCoreConfig";
    public static final String LANGUAGES = "org.apache.stanbol.enhancer.engine.topic.languages";
    public static final String ORDER = "org.apache.stanbol.enhancer.engine.topic.order";
    public static final String ENTRY_ID_FIELD = "org.apache.stanbol.enhancer.engine.topic.entryIdField";
    public static final String DEFAULT_ENTRY_ID_FIELD = "entry_id";
    public static final String ENTRY_TYPE_FIELD = "org.apache.stanbol.enhancer.engine.topic.entryTypeField";
    public static final String DEFAULT_ENTRY_TYPE_FIELD = "entry_type";
    public static final String SIMILARTITY_FIELD = "org.apache.stanbol.enhancer.engine.topic.similarityField";
    public static final String DEFAULT_SIMILARTITY_FIELD = "classifier_features";
    public static final String CONCEPT_URI_FIELD = "org.apache.stanbol.enhancer.engine.topic.conceptUriField";
    public static final String DEFAULT_CONCEPT_URI_FIELD = "concept";
    public static final String BROADER_FIELD = "org.apache.stanbol.enhancer.engine.topic.broaderField";
    public static final String DEFAULT_BROADER_FIELD = "broader";
    public static final String PRIMARY_TOPIC_URI_FIELD = "org.apache.stanbol.enhancer.engine.topic.primaryTopicField";
    public static final String DEFAULT_PRIMARY_TOPIC_URI_FIELD = "primary_topic";
    public static final String MODEL_UPDATE_DATE_FIELD = "org.apache.stanbol.enhancer.engine.topic.modelUpdateDateField";
    public static final String DEFAULT_MODEL_UPDATE_DATE_FIELD = "last_update_dt";
    public static final String MODEL_EVALUATION_DATE_FIELD = "org.apache.stanbol.enhancer.engine.topic.modelEvaluationDateField";
    public static final String DEFAULT_MODEL_EVALUATION_DATE_FIELD = "last_evaluation_dt";
    public static final String MODEL_ENTRY_ID_FIELD = "org.apache.stanbol.enhancer.engine.topic.modelEntryIdField";
    public static final String DEFAULT_MODEL_ENTRY_ID_FIELD = "model_entry_id";
    public static final String PRECISION_FIELD = "org.apache.stanbol.enhancer.engine.topic.precisionField";
    public static final String DEFAULT_PRECISION_FIELD = "precision";
    public static final String RECALL_FIELD = "org.apache.stanbol.enhancer.engine.topic.recallField";
    public static final String DEFAULT_RECALL_FIELD = "recall";
    public static final String FALSE_POSITIVES_FIELD = "org.apache.stanbol.enhancer.engine.topic.falsePositivesField";
    public static final String DEFAULT_FALSE_POSITIVES_FIELD = "false_positives";
    public static final String FALSE_NEGATIVES_FIELD = "org.apache.stanbol.enhancer.engine.topic.falseNegativesField";
    public static final String DEFAULT_FALSE_NEGATIVES_FIELD = "false_negatives";
    public static final String POSITIVE_SUPPORT_FIELD = "org.apache.stanbol.enhancer.engine.topic.positiveSupportField";
    public static final String DEFAULT_POSITIVE_SUPPORT_FIELD = "positive_support";
    public static final String NEGATIVE_SUPPORT_FIELD = "org.apache.stanbol.enhancer.engine.topic.negativeSupportField";
    public static final String DEFAULT_NEGATIVE_SUPPORT_FIELD = "negative_support";
    public static final String TRAINING_SET_ID = "org.apache.stanbol.enhancer.engine.topic.trainingSetId";
    public static final String SOLR_NON_EMPTY_FIELD = "[\"\" TO *]";
    protected Entityhub entityhub;
    protected SiteManager referencedSiteManager;
    protected String engineName;
    protected List<String> acceptedLanguages;
    private Set<String> acceptedLanguageSet;
    protected String similarityField;
    protected String conceptUriField;
    protected String broaderField;
    protected String primaryTopicUriField;
    protected String modelUpdateDateField;
    protected String modelEvaluationDateField;
    protected String precisionField;
    protected String recallField;
    protected TrainingSet trainingSet;
    protected ServiceTracker trainingSetTracker;
    protected String trainingSetId;
    protected String entryIdField;
    protected String entryTypeField;
    protected String modelEntryIdField;
    protected String positiveSupportField;
    protected String negativeSupportField;
    protected String falsePositivesField;
    protected String falseNegativesField;
    protected ManagedSolrServer managedSolrServerDummy;
    private File embeddedSolrServerDir;
    private EmbeddedSolrServer __evaluationServer;
    private File __evaluationServerDir;
    public static final Integer DEFAULT_ENGINE_ORDER = ServiceProperties.ORDERING_CONTENT_EXTRACTION;
    private static final Logger log = LoggerFactory.getLogger(TopicClassificationEngine.class);
    public static final String PLAIN_TEXT_MIMETYPE = "text/plain";
    public static final Set<String> SUPPORTED_MIMETYPES = Collections.singleton(PLAIN_TEXT_MIMETYPE);
    private int MAX_COLLECTED_EXAMPLES = 1000;
    public int MAX_EVALUATION_SAMPLES = 500;
    public int MIN_EVALUATION_SAMPLES = 10;
    public int MAX_CHARS_PER_TOPIC = 100000;
    public Integer MAX_ROOTS = 1000;
    public int MAX_SUGGESTIONS = 5;
    protected Integer order = ORDERING_EXTRACTION_ENHANCEMENT;
    protected int cvFoldIndex = 0;
    protected int cvFoldCount = 0;
    protected boolean evaluationRunning = false;

    void configureEmbeddedSolrServerDir(File file) {
        this.embeddedSolrServerDir = file;
    }

    protected void activate(ComponentContext componentContext) throws ConfigurationException, InvalidSyntaxException {
        activate(componentContext, componentContext.getProperties());
    }

    protected void activate(ComponentContext componentContext, Dictionary<String, Object> dictionary) throws ConfigurationException, InvalidSyntaxException {
        this.context = componentContext;
        configure(dictionary);
        if (this.trainingSetId != null) {
            this.trainingSetTracker = new ServiceTracker(componentContext.getBundleContext(), componentContext.getBundleContext().createFilter(String.format("(&(%s=%s)(%s=%s))", "objectClass", TrainingSet.class.getName(), SolrTrainingSet.TRAINING_SET_NAME, this.trainingSetId)), (ServiceTrackerCustomizer) null);
            this.trainingSetTracker.open();
        }
    }

    public void deactivate(ComponentContext componentContext) {
        if (this.indexTracker != null) {
            this.indexTracker.close();
        }
        if (this.trainingSetTracker != null) {
            this.trainingSetTracker.close();
        }
        if (this.__evaluationServer != null) {
            try {
                this.__evaluationServer.getCoreContainer().shutdown();
                FileUtils.deleteQuietly(this.__evaluationServerDir);
            } catch (Exception e) {
                FileUtils.deleteQuietly(this.__evaluationServerDir);
            } catch (Throwable th) {
                FileUtils.deleteQuietly(this.__evaluationServerDir);
                throw th;
            }
        }
    }

    @Override // org.apache.stanbol.enhancer.topic.ConfiguredSolrCoreTracker
    public void configure(Dictionary<String, Object> dictionary) throws ConfigurationException {
        this.engineName = getRequiredStringParam(dictionary, "stanbol.enhancer.engine.name");
        this.entryIdField = getRequiredStringParam(dictionary, ENTRY_ID_FIELD, DEFAULT_ENTRY_ID_FIELD);
        this.modelEntryIdField = getRequiredStringParam(dictionary, MODEL_ENTRY_ID_FIELD, DEFAULT_MODEL_ENTRY_ID_FIELD);
        this.conceptUriField = getRequiredStringParam(dictionary, CONCEPT_URI_FIELD, DEFAULT_CONCEPT_URI_FIELD);
        this.entryTypeField = getRequiredStringParam(dictionary, ENTRY_TYPE_FIELD, DEFAULT_ENTRY_TYPE_FIELD);
        this.similarityField = getRequiredStringParam(dictionary, SIMILARTITY_FIELD, DEFAULT_SIMILARTITY_FIELD);
        this.acceptedLanguages = getStringListParan(dictionary, LANGUAGES);
        this.acceptedLanguageSet = new HashSet(this.acceptedLanguages);
        this.precisionField = getRequiredStringParam(dictionary, PRECISION_FIELD, DEFAULT_PRECISION_FIELD);
        this.recallField = getRequiredStringParam(dictionary, RECALL_FIELD, DEFAULT_RECALL_FIELD);
        this.modelUpdateDateField = getRequiredStringParam(dictionary, MODEL_UPDATE_DATE_FIELD, DEFAULT_MODEL_UPDATE_DATE_FIELD);
        this.modelEvaluationDateField = getRequiredStringParam(dictionary, MODEL_EVALUATION_DATE_FIELD, DEFAULT_MODEL_EVALUATION_DATE_FIELD);
        this.falsePositivesField = getRequiredStringParam(dictionary, FALSE_POSITIVES_FIELD, DEFAULT_FALSE_POSITIVES_FIELD);
        this.falseNegativesField = getRequiredStringParam(dictionary, FALSE_NEGATIVES_FIELD, DEFAULT_FALSE_NEGATIVES_FIELD);
        this.positiveSupportField = getRequiredStringParam(dictionary, POSITIVE_SUPPORT_FIELD, DEFAULT_POSITIVE_SUPPORT_FIELD);
        this.negativeSupportField = getRequiredStringParam(dictionary, NEGATIVE_SUPPORT_FIELD, DEFAULT_NEGATIVE_SUPPORT_FIELD);
        configureSolrCore(dictionary, "org.apache.stanbol.enhancer.engine.topic.solrCore", this.engineName + "-model", "org.apache.stanbol.enhancer.engine.topic.solrCoreConfig");
        this.broaderField = getRequiredStringParam(dictionary, BROADER_FIELD, DEFAULT_BROADER_FIELD);
        this.primaryTopicUriField = getRequiredStringParam(dictionary, PRIMARY_TOPIC_URI_FIELD, DEFAULT_PRIMARY_TOPIC_URI_FIELD);
        this.trainingSetId = (String) dictionary.get(TRAINING_SET_ID);
        Object obj = dictionary.get(ORDER);
        if (obj instanceof Number) {
            this.order = Integer.valueOf(((Number) obj).intValue());
        } else {
            if (obj == null) {
                this.order = DEFAULT_ENGINE_ORDER;
                return;
            }
            try {
                Integer.parseInt(obj.toString());
            } catch (NumberFormatException e) {
                throw new ConfigurationException(ORDER, "The configured EnhancementEngine order MUST BE an Intever value!", e);
            }
        }
    }

    public int canEnhance(ContentItem contentItem) throws EngineException {
        if (ContentItemHelper.getBlob(contentItem, SUPPORTED_MIMETYPES) == null || getActiveSolrServer() == null) {
            return 0;
        }
        return (this.acceptedLanguageSet.isEmpty() || this.acceptedLanguageSet.contains(EnhancementEngineHelper.getLanguage(contentItem)) || this.acceptedLanguageSet.contains("")) ? 1 : 0;
    }

    public void computeEnhancements(ContentItem contentItem) throws EngineException {
        Map.Entry blob = ContentItemHelper.getBlob(contentItem, SUPPORTED_MIMETYPES);
        if (blob == null) {
            throw new IllegalStateException("No ContentPart with a supported Mime Typefound for ContentItem " + contentItem.getUri() + "(supported: '" + SUPPORTED_MIMETYPES + "') -> this indicates that canEnhance wasNOT called and indicates a bug in the used EnhancementJobManager!");
        }
        String language = EnhancementEngineHelper.getLanguage(contentItem);
        if (!this.acceptedLanguageSet.isEmpty() && !this.acceptedLanguageSet.contains(language) && !this.acceptedLanguageSet.contains("")) {
            throw new IllegalStateException("The language '" + language + "' of the ContentItem is not configured as  active for this Engine (active: " + this.acceptedLanguageSet + ").");
        }
        try {
            String text = ContentItemHelper.getText((Blob) blob.getValue());
            if (text.trim().isEmpty()) {
                log.warn("ContentPart {} of ContentItem {} does not contain any text to extract topics from", blob.getKey(), contentItem.getUri());
                return;
            }
            MGraph metadata = contentItem.getMetadata();
            try {
                List<TopicSuggestion> suggestTopics = suggestTopics(text);
                if (suggestTopics.isEmpty()) {
                    return;
                }
                UriRef uriRef = new UriRef(NamespaceEnum.fise + "classifier/precision");
                UriRef uriRef2 = new UriRef(NamespaceEnum.fise + "classifier/recall");
                UriRef uriRef3 = new UriRef(NamespaceEnum.fise + "classifier/f1");
                LiteralFactory literalFactory = LiteralFactory.getInstance();
                contentItem.getLock().writeLock().lock();
                try {
                    try {
                        try {
                            try {
                                UriRef createTextEnhancement = EnhancementEngineHelper.createTextEnhancement(contentItem, this);
                                metadata.add(new TripleImpl(createTextEnhancement, Properties.DC_TYPE, OntologicalClasses.SKOS_CONCEPT));
                                for (TopicSuggestion topicSuggestion : suggestTopics) {
                                    UriRef createEntityEnhancement = EnhancementEngineHelper.createEntityEnhancement(contentItem, this);
                                    metadata.add(new TripleImpl(createEntityEnhancement, Properties.RDF_TYPE, TechnicalClasses.ENHANCER_TOPICANNOTATION));
                                    metadata.add(new TripleImpl(createEntityEnhancement, Properties.DC_RELATION, createTextEnhancement));
                                    metadata.add(new TripleImpl(createEntityEnhancement, Properties.ENHANCER_ENTITY_REFERENCE, new UriRef(topicSuggestion.conceptUri)));
                                    metadata.add(new TripleImpl(createEntityEnhancement, Properties.ENHANCER_ENTITY_TYPE, OntologicalClasses.SKOS_CONCEPT));
                                    metadata.add(new TripleImpl(createEntityEnhancement, Properties.ENHANCER_CONFIDENCE, literalFactory.createTypedLiteral(Double.valueOf(topicSuggestion.score))));
                                    if (getPerformanceEstimates(topicSuggestion.conceptUri).uptodate) {
                                        metadata.add(new TripleImpl(createEntityEnhancement, uriRef, literalFactory.createTypedLiteral(Double.valueOf(r0.precision))));
                                        metadata.add(new TripleImpl(createEntityEnhancement, uriRef2, literalFactory.createTypedLiteral(Double.valueOf(r0.recall))));
                                        metadata.add(new TripleImpl(createEntityEnhancement, uriRef3, literalFactory.createTypedLiteral(Double.valueOf(r0.f1))));
                                    }
                                    Entity entity = this.entityhub.getEntity(topicSuggestion.conceptUri);
                                    if (entity == null) {
                                        entity = this.referencedSiteManager.getEntity(topicSuggestion.conceptUri);
                                    }
                                    if (entity != null) {
                                        Representation representation = entity.getRepresentation();
                                        Text first = representation.getFirst(NamespaceEnum.skos + "prefLabel", new String[]{"en", "en-US", "en-GB"});
                                        if (first == null) {
                                            first = representation.getFirst(NamespaceEnum.rdfs + "label", new String[]{"en", "en-US", "en-GB"});
                                        }
                                        if (first != null) {
                                            metadata.add(new TripleImpl(createEntityEnhancement, Properties.ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(first.getText())));
                                        }
                                    }
                                }
                            } finally {
                                contentItem.getLock().writeLock().unlock();
                            }
                        } catch (IllegalArgumentException e) {
                            throw new EngineException(e);
                        }
                    } catch (ClassifierException e2) {
                        throw new EngineException(e2);
                    }
                } catch (EntityhubException e3) {
                    throw new EngineException(e3);
                }
            } catch (ClassifierException e4) {
                throw new EngineException(e4);
            }
        } catch (IOException e5) {
            throw new InvalidContentException(String.format("Unable to extract  textual content from ContentPart %s of ContentItem %s!", blob.getKey(), contentItem.getUri()), e5);
        }
    }

    public Map<String, Object> getServiceProperties() {
        return Collections.unmodifiableMap(Collections.singletonMap("org.apache.stanbol.enhancer.engine.order", this.order));
    }

    public static TopicClassificationEngine fromParameters(Dictionary<String, Object> dictionary) throws ConfigurationException {
        TopicClassificationEngine topicClassificationEngine = new TopicClassificationEngine();
        topicClassificationEngine.configure(dictionary);
        return topicClassificationEngine;
    }

    public String getName() {
        return this.engineName;
    }

    public List<String> getAcceptedLanguages() {
        return this.acceptedLanguages;
    }

    public List<TopicSuggestion> suggestTopics(Collection<Object> collection) throws ClassifierException {
        return suggestTopics(StringUtils.join(collection, "\n\n"));
    }

    public List<TopicSuggestion> suggestTopics(String str) throws ClassifierException {
        ArrayList<TopicSuggestion> arrayList = new ArrayList(this.MAX_SUGGESTIONS * 3);
        SolrServer activeSolrServer = getActiveSolrServer();
        SolrQuery solrQuery = new SolrQuery();
        solrQuery.setRequestHandler("/mlt");
        solrQuery.setFilterQueries(new String[]{this.entryTypeField + ":" + MODEL_ENTRY});
        solrQuery.set("mlt.match.include", false);
        solrQuery.set("mlt.mindf", 1);
        solrQuery.set("mlt.mintf", 1);
        solrQuery.set("mlt.maxqt", 30);
        solrQuery.set("mlt.maxntp", 10000);
        solrQuery.set("mlt.fl", new String[]{this.similarityField});
        solrQuery.set("stream.body", new String[]{str});
        solrQuery.setRows(Integer.valueOf(this.MAX_SUGGESTIONS * 3));
        solrQuery.setFields(new String[]{this.conceptUriField});
        solrQuery.setIncludeScore(true);
        try {
            for (SolrDocument solrDocument : (SolrDocument[]) new StreamQueryRequest(solrQuery).process(activeSolrServer).getResults().toArray(new SolrDocument[0])) {
                String str2 = (String) solrDocument.getFirstValue(this.conceptUriField);
                if (str2 == null) {
                    throw new ClassifierException(String.format("Solr Core '%s' is missing required field '%s'.", this.solrCoreId, this.conceptUriField));
                }
                Float f = (Float) solrDocument.getFirstValue("score");
                SolrQuery solrQuery2 = new SolrQuery("*:*");
                solrQuery2.addFilterQuery(new String[]{this.entryTypeField + ":" + METADATA_ENTRY});
                solrQuery2.addFilterQuery(new String[]{this.conceptUriField + ":" + ClientUtils.escapeQueryChars(str2)});
                solrQuery2.setFields(new String[]{this.conceptUriField, this.broaderField, this.primaryTopicUriField});
                SolrDocument solrDocument2 = (SolrDocument) activeSolrServer.query(solrQuery2).getResults().get(0);
                arrayList.add(new TopicSuggestion(str2, (String) solrDocument2.getFirstValue(this.primaryTopicUriField), solrDocument2.getFieldValues(this.broaderField), f.floatValue()));
            }
            if (arrayList.size() <= 1) {
                return arrayList;
            }
            float f2 = 0.0f;
            Iterator it = arrayList.iterator();
            while (it.hasNext()) {
                f2 += ((TopicSuggestion) it.next()).score / arrayList.size();
            }
            float f3 = (0.25f * ((TopicSuggestion) arrayList.get(0)).score) + (0.75f * f2);
            ArrayList arrayList2 = new ArrayList();
            for (TopicSuggestion topicSuggestion : arrayList) {
                if (arrayList2.size() >= this.MAX_SUGGESTIONS) {
                    return arrayList2;
                }
                if (!arrayList2.isEmpty() && topicSuggestion.score <= f3) {
                    break;
                }
                arrayList2.add(topicSuggestion);
            }
            return arrayList2;
        } catch (SolrServerException e) {
            if ("unknown handler: /mlt".equals(e.getCause().getMessage())) {
                throw new ClassifierException(String.format("SolrServer with id '%s' for topic engine '%s' lacks configuration for the MoreLikeThisHandler", this.solrCoreId, this.engineName), e);
            }
            throw new ClassifierException(e);
        }
    }

    public Set<String> getNarrowerConcepts(String str) throws ClassifierException {
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        if (this.broaderField == null) {
            return linkedHashSet;
        }
        SolrServer activeSolrServer = getActiveSolrServer();
        SolrQuery solrQuery = new SolrQuery("*:*");
        solrQuery.addFilterQuery(new String[]{this.entryTypeField + ":" + METADATA_ENTRY});
        solrQuery.addFilterQuery(new String[]{this.broaderField + ":" + ClientUtils.escapeQueryChars(str)});
        solrQuery.addField(this.conceptUriField);
        solrQuery.addSortField(this.conceptUriField, SolrQuery.ORDER.asc);
        try {
            Iterator it = activeSolrServer.query(solrQuery).getResults().iterator();
            while (it.hasNext()) {
                linkedHashSet.add(((SolrDocument) it.next()).getFirstValue(this.conceptUriField).toString());
            }
            return linkedHashSet;
        } catch (SolrServerException e) {
            throw new ClassifierException(String.format("Error while fetching narrower topics of '%s' on Solr Core '%s'.", str, this.solrCoreId), e);
        }
    }

    public Set<String> getBroaderConcepts(String str) throws ClassifierException {
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        if (this.broaderField == null) {
            return linkedHashSet;
        }
        SolrServer activeSolrServer = getActiveSolrServer();
        SolrQuery solrQuery = new SolrQuery("*:*");
        solrQuery.addFilterQuery(new String[]{this.conceptUriField + ":" + ClientUtils.escapeQueryChars(str)});
        solrQuery.addField(this.broaderField);
        try {
            Iterator it = activeSolrServer.query(solrQuery).getResults().iterator();
            while (it.hasNext()) {
                Collection fieldValues = ((SolrDocument) it.next()).getFieldValues(this.broaderField);
                if (fieldValues != null) {
                    Iterator it2 = fieldValues.iterator();
                    while (it2.hasNext()) {
                        linkedHashSet.add(it2.next().toString());
                    }
                }
            }
            return linkedHashSet;
        } catch (SolrServerException e) {
            throw new ClassifierException(String.format("Error while fetching broader topics of '%s' on Solr Core '%s'.", str, this.solrCoreId), e);
        }
    }

    public Set<String> getRootConcepts() throws ClassifierException {
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        SolrServer activeSolrServer = getActiveSolrServer();
        SolrQuery solrQuery = new SolrQuery("*:*");
        solrQuery.setRows(this.MAX_ROOTS);
        solrQuery.setFields(new String[]{this.conceptUriField});
        solrQuery.setSortField(this.conceptUriField, SolrQuery.ORDER.asc);
        solrQuery.addFilterQuery(new String[]{this.entryTypeField + ":" + METADATA_ENTRY});
        if (this.broaderField != null) {
            solrQuery.addFilterQuery(new String[]{" -" + this.broaderField + ":" + SOLR_NON_EMPTY_FIELD});
        }
        try {
            QueryResponse query = activeSolrServer.query(solrQuery);
            if (query.getResults().size() >= this.MAX_ROOTS.intValue()) {
                log.warn(String.format("TopicClassifier '%s' has more than %d registered topic roots. Some roots might be ignored.", this.engineName, this.MAX_ROOTS));
            }
            Iterator it = query.getResults().iterator();
            while (it.hasNext()) {
                linkedHashSet.add(((SolrDocument) it.next()).getFirstValue(this.conceptUriField).toString());
            }
            return linkedHashSet;
        } catch (SolrServerException e) {
            throw new ClassifierException(String.format("Error while fetching root topics on Solr Core '%s'.", this.solrCoreId), e);
        }
    }

    public void addConcept(String str, String str2, Collection<String> collection) throws ClassifierException {
        removeConcept(str);
        SolrInputDocument solrInputDocument = new SolrInputDocument();
        String uuid = UUID.randomUUID().toString();
        String uuid2 = UUID.randomUUID().toString();
        solrInputDocument.addField(this.conceptUriField, str);
        solrInputDocument.addField(this.entryIdField, uuid);
        solrInputDocument.addField(this.modelEntryIdField, uuid2);
        solrInputDocument.addField(this.entryTypeField, METADATA_ENTRY);
        if (collection != null && this.broaderField != null) {
            solrInputDocument.addField(this.broaderField, collection);
        }
        if (str2 != null && this.primaryTopicUriField != null) {
            solrInputDocument.addField(this.primaryTopicUriField, str2);
        }
        SolrInputDocument solrInputDocument2 = new SolrInputDocument();
        solrInputDocument2.addField(this.entryIdField, uuid2);
        solrInputDocument2.addField(this.conceptUriField, str);
        solrInputDocument2.addField(this.entryTypeField, MODEL_ENTRY);
        if (collection != null) {
            invalidateModelFields(collection, this.modelUpdateDateField, this.modelEvaluationDateField);
        }
        SolrServer activeSolrServer = getActiveSolrServer();
        try {
            UpdateRequest updateRequest = new UpdateRequest();
            updateRequest.add(solrInputDocument);
            updateRequest.add(solrInputDocument2);
            activeSolrServer.request(updateRequest);
            activeSolrServer.commit();
        } catch (Exception e) {
            throw new ClassifierException(String.format("Error adding topic with id '%s' on Solr Core '%s'", str, this.solrCoreId), e);
        }
    }

    public void addConcept(String str, Collection<String> collection) throws ClassifierException {
        addConcept(str, null, collection);
    }

    protected void invalidateModelFields(Collection<String> collection, String... strArr) throws ClassifierException {
        if (collection.isEmpty() || strArr.length == 0) {
            return;
        }
        SolrServer activeSolrServer = getActiveSolrServer();
        List asList = Arrays.asList(strArr);
        try {
            UpdateRequest updateRequest = new UpdateRequest();
            for (String str : collection) {
                SolrQuery solrQuery = new SolrQuery("*:*");
                solrQuery.addFilterQuery(new String[]{this.entryTypeField + ":" + METADATA_ENTRY});
                solrQuery.addFilterQuery(new String[]{this.conceptUriField + ":" + ClientUtils.escapeQueryChars(str)});
                Iterator it = activeSolrServer.query(solrQuery).getResults().iterator();
                while (it.hasNext()) {
                    SolrDocument solrDocument = (SolrDocument) it.next();
                    SolrInputDocument solrInputDocument = new SolrInputDocument();
                    for (String str2 : solrDocument.getFieldNames()) {
                        if (!asList.contains(str2)) {
                            solrInputDocument.setField(str2, solrDocument.getFieldValues(str2));
                        }
                    }
                    updateRequest.add(solrInputDocument);
                }
            }
            if (updateRequest.getDocuments() != null && updateRequest.getDocuments().size() > 0) {
                activeSolrServer.request(updateRequest);
            }
        } catch (Exception e) {
            throw new ClassifierException(String.format("Error invalidating topics [%s] on Solr Core '%s'", StringUtils.join(collection, ", "), this.solrCoreId), e);
        }
    }

    public void removeAllConcepts() throws ClassifierException {
        SolrServer activeSolrServer = getActiveSolrServer();
        try {
            activeSolrServer.deleteByQuery("*:*");
            activeSolrServer.commit();
        } catch (Exception e) {
            throw new ClassifierException(String.format("Error deleting concepts from Solr Core '%s'", this.solrCoreId), e);
        }
    }

    public void removeConcept(String str) throws ClassifierException {
        if (str == null || str.isEmpty()) {
            throw new ClassifierException("conceptId must not be null or empty");
        }
        SolrServer activeSolrServer = getActiveSolrServer();
        try {
            activeSolrServer.deleteByQuery(this.conceptUriField + ":" + ClientUtils.escapeQueryChars(str));
            activeSolrServer.commit();
        } catch (Exception e) {
            throw new ClassifierException(String.format("Error removing concept '%s' on Solr Core '%s'", str, this.solrCoreId), e);
        }
    }

    public TrainingSet getTrainingSet() {
        if (this.trainingSet != null) {
            return this.trainingSet;
        }
        if (this.trainingSetTracker == null) {
            return null;
        }
        TrainingSet trainingSet = (TrainingSet) this.trainingSetTracker.getService();
        if (trainingSet == null) {
            for (int i = 0; i < 5 && trainingSet == null; i++) {
                try {
                    trainingSet = (TrainingSet) this.trainingSetTracker.waitForService(1000L);
                } catch (InterruptedException e) {
                }
            }
        }
        return trainingSet;
    }

    public boolean isUpdatable() {
        return getTrainingSet() != null;
    }

    public void setTrainingSet(TrainingSet trainingSet) {
        this.trainingSet = trainingSet;
    }

    protected int batchOverTopics(BatchProcessor<SolrDocument> batchProcessor) throws TrainingSetException {
        int i = 0;
        SolrServer activeSolrServer = getActiveSolrServer();
        SolrQuery solrQuery = new SolrQuery("*:*");
        solrQuery.addFilterQuery(new String[]{this.entryTypeField + ":" + METADATA_ENTRY});
        String str = null;
        boolean z = false;
        solrQuery.addSortField(this.conceptUriField, SolrQuery.ORDER.asc);
        solrQuery.setRows(Integer.valueOf(1000 + 1));
        while (!z) {
            if (str != null) {
                try {
                    solrQuery.addFilterQuery(new String[]{this.conceptUriField + ":[" + ClientUtils.escapeQueryChars(str.toString()) + " TO *]"});
                } catch (Exception e) {
                    throw new TrainingSetException(String.format("Error while updating topics on Solr Core '%s'.", this.solrCoreId), e);
                }
            }
            QueryResponse query = activeSolrServer.query(solrQuery);
            int i2 = 0;
            ArrayList arrayList = new ArrayList();
            Iterator it = query.getResults().iterator();
            while (it.hasNext()) {
                SolrDocument solrDocument = (SolrDocument) it.next();
                String obj = solrDocument.getFirstValue(this.conceptUriField).toString();
                if (i2 == 1000) {
                    str = obj;
                } else {
                    i2++;
                    arrayList.add(solrDocument);
                }
            }
            i += batchProcessor.process(arrayList);
            activeSolrServer.commit();
            if (i2 < 1000) {
                z = true;
            }
        }
        activeSolrServer.optimize();
        return i;
    }

    public int updateModel(boolean z) throws TrainingSetException, ClassifierException {
        checkTrainingSet();
        long currentTimeMillis = System.currentTimeMillis();
        if (z && this.modelUpdateDateField == null) {
            log.warn("org.apache.stanbol.enhancer.engine.topic.modelUpdateDateField field is not configured: switching to batch update mode.");
            z = false;
        }
        final boolean z2 = z;
        int batchOverTopics = batchOverTopics(new BatchProcessor<SolrDocument>() { // from class: org.apache.stanbol.enhancer.engine.topic.TopicClassificationEngine.1
            public int process(List<SolrDocument> list) throws ClassifierException, TrainingSetException {
                Date date;
                int i = 0;
                for (SolrDocument solrDocument : list) {
                    String obj = solrDocument.getFirstValue(TopicClassificationEngine.this.conceptUriField).toString();
                    ArrayList arrayList = new ArrayList();
                    arrayList.add(obj);
                    arrayList.addAll(TopicClassificationEngine.this.getNarrowerConcepts(obj));
                    if (!z2 || (date = (Date) solrDocument.getFirstValue(TopicClassificationEngine.this.modelUpdateDateField)) == null || TopicClassificationEngine.this.getTrainingSet().hasChangedSince(arrayList, date)) {
                        TopicClassificationEngine.this.updateTopic(obj, solrDocument.getFirstValue(TopicClassificationEngine.this.entryIdField).toString(), solrDocument.getFirstValue(TopicClassificationEngine.this.modelEntryIdField).toString(), arrayList, TopicClassificationEngine.this.primaryTopicUriField != null ? (String) solrDocument.getFirstValue(TopicClassificationEngine.this.primaryTopicUriField) : null, solrDocument.getFieldValues(TopicClassificationEngine.this.broaderField));
                        i++;
                    }
                }
                return i;
            }
        });
        log.info("Sucessfully updated {} topics in {}s", Integer.valueOf(batchOverTopics), Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d));
        return batchOverTopics;
    }

    protected void updateTopic(String str, String str2, String str3, List<String> list, String str4, Collection<Object> collection) throws TrainingSetException, ClassifierException {
        long currentTimeMillis = System.currentTimeMillis();
        Batch emtpyBatch = Batch.emtpyBatch(Example.class);
        StringBuffer stringBuffer = new StringBuffer();
        int i = 0;
        do {
            emtpyBatch = getTrainingSet().getPositiveExamples(list, emtpyBatch.nextOffset);
            for (Example example : emtpyBatch.items) {
                if (this.cvFoldCount == 0 || i % this.cvFoldCount != this.cvFoldIndex) {
                    i++;
                    stringBuffer.append(StringUtils.join(example.contents, "\n\n"));
                    stringBuffer.append("\n\n");
                } else {
                    i++;
                }
            }
            if (stringBuffer.length() >= this.MAX_CHARS_PER_TOPIC) {
                break;
            }
        } while (emtpyBatch.hasMore);
        SolrInputDocument solrInputDocument = new SolrInputDocument();
        solrInputDocument.addField(this.entryIdField, str3);
        solrInputDocument.addField(this.conceptUriField, str);
        solrInputDocument.addField(this.entryTypeField, MODEL_ENTRY);
        if (stringBuffer.length() > 0) {
            solrInputDocument.addField(this.similarityField, stringBuffer);
        }
        SolrInputDocument solrInputDocument2 = new SolrInputDocument();
        solrInputDocument2.addField(this.entryIdField, str2);
        solrInputDocument2.addField(this.modelEntryIdField, str3);
        solrInputDocument2.addField(this.entryTypeField, METADATA_ENTRY);
        solrInputDocument2.addField(this.conceptUriField, str);
        if (this.primaryTopicUriField != null) {
            solrInputDocument2.addField(this.primaryTopicUriField, str4);
        }
        if (collection != null && this.broaderField != null) {
            solrInputDocument2.addField(this.broaderField, collection);
        }
        if (this.modelUpdateDateField != null) {
            solrInputDocument2.addField(this.modelUpdateDateField, UTCTimeStamper.nowUtcDate());
        }
        SolrServer activeSolrServer = getActiveSolrServer();
        try {
            UpdateRequest updateRequest = new UpdateRequest();
            updateRequest.add(solrInputDocument2);
            updateRequest.add(solrInputDocument);
            activeSolrServer.request(updateRequest);
            log.debug("Sucessfully updated topic {} in {}s", str, Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d));
        } catch (Exception e) {
            throw new ClassifierException(String.format("Error updating topic with id '%s' on Solr Core '%s'", str, this.solrCoreId), e);
        }
    }

    protected void checkTrainingSet() throws TrainingSetException {
        if (getTrainingSet() == null) {
            throw new TrainingSetException(String.format("TopicClassificationEngine %s has no registered training set hence cannot be updated.", this.engineName));
        }
    }

    public void setCrossValidationInfo(int i, int i2) {
        if (i > i2 - 1) {
            throw new IllegalArgumentException(String.format("foldIndex=%d should be smaller than foldCount=%d - 1", Integer.valueOf(i), Integer.valueOf(i2)));
        }
        this.cvFoldIndex = i;
        this.cvFoldCount = i2;
    }

    protected Dictionary<String, Object> getCanonicalConfiguration(Object obj, Object obj2) {
        Hashtable hashtable = new Hashtable();
        hashtable.put("stanbol.enhancer.engine.name", this.engineName + "-evaluation");
        hashtable.put(ENTRY_ID_FIELD, DEFAULT_ENTRY_ID_FIELD);
        hashtable.put(ENTRY_TYPE_FIELD, DEFAULT_ENTRY_TYPE_FIELD);
        hashtable.put(MODEL_ENTRY_ID_FIELD, DEFAULT_MODEL_ENTRY_ID_FIELD);
        hashtable.put("org.apache.stanbol.enhancer.engine.topic.solrCore", obj);
        hashtable.put("org.apache.stanbol.enhancer.engine.topic.solrCoreConfig", obj2);
        hashtable.put(CONCEPT_URI_FIELD, DEFAULT_CONCEPT_URI_FIELD);
        hashtable.put(PRIMARY_TOPIC_URI_FIELD, DEFAULT_PRIMARY_TOPIC_URI_FIELD);
        hashtable.put(SIMILARTITY_FIELD, DEFAULT_SIMILARTITY_FIELD);
        hashtable.put(BROADER_FIELD, DEFAULT_BROADER_FIELD);
        hashtable.put(MODEL_UPDATE_DATE_FIELD, DEFAULT_MODEL_UPDATE_DATE_FIELD);
        hashtable.put(MODEL_EVALUATION_DATE_FIELD, DEFAULT_MODEL_EVALUATION_DATE_FIELD);
        hashtable.put(PRECISION_FIELD, DEFAULT_PRECISION_FIELD);
        hashtable.put(RECALL_FIELD, DEFAULT_RECALL_FIELD);
        hashtable.put(POSITIVE_SUPPORT_FIELD, DEFAULT_POSITIVE_SUPPORT_FIELD);
        hashtable.put(NEGATIVE_SUPPORT_FIELD, DEFAULT_NEGATIVE_SUPPORT_FIELD);
        hashtable.put(FALSE_POSITIVES_FIELD, DEFAULT_FALSE_POSITIVES_FIELD);
        hashtable.put(FALSE_NEGATIVES_FIELD, DEFAULT_FALSE_NEGATIVES_FIELD);
        return hashtable;
    }

    public boolean isEvaluationRunning() {
        return this.evaluationRunning;
    }

    public synchronized int updatePerformanceEstimates(boolean z) throws ClassifierException, TrainingSetException {
        checkTrainingSet();
        if (this.evaluationRunning) {
            throw new ClassifierException("Another evaluation is already running");
        }
        int i = 0;
        try {
            try {
                try {
                    try {
                        this.evaluationRunning = true;
                        getTrainingSet().optimize();
                        for (int i2 = 0; i2 < 3; i2++) {
                            i = performCVFold(i2, 3, 3, z);
                        }
                        getActiveSolrServer().optimize();
                        FileUtils.deleteQuietly(this.__evaluationServerDir);
                        this.evaluationRunning = false;
                        return i;
                    } catch (ConfigurationException e) {
                        throw new ClassifierException(e);
                    }
                } catch (SolrServerException e2) {
                    throw new ClassifierException(e2);
                }
            } catch (IOException e3) {
                throw new ClassifierException(e3);
            }
        } catch (Throwable th) {
            FileUtils.deleteQuietly(this.__evaluationServerDir);
            this.evaluationRunning = false;
            throw th;
        }
    }

    protected int performCVFold(final int i, final int i2, int i3, boolean z) throws ConfigurationException, TrainingSetException, ClassifierException {
        log.info(String.format("Performing evaluation %d-fold CV iteration %d/%d on classifier %s", Integer.valueOf(i2), Integer.valueOf(i + 1), Integer.valueOf(i3 <= 0 ? i2 : i2), this.engineName));
        long currentTimeMillis = System.currentTimeMillis();
        final TopicClassificationEngine topicClassificationEngine = new TopicClassificationEngine();
        try {
            if (this.managedSolrServer != null) {
                topicClassificationEngine.bindManagedSolrServer(this.managedSolrServer);
                topicClassificationEngine.activate(this.context, getCanonicalConfiguration(this.engineName + "-evaluation", this.solrCoreConfig));
            } else {
                if (this.__evaluationServer == null) {
                    this.__evaluationServerDir = new File(this.embeddedSolrServerDir, this.engineName + "-evaluation");
                    if (!this.__evaluationServerDir.exists()) {
                        FileUtils.forceMkdir(this.__evaluationServerDir);
                    }
                    this.__evaluationServer = EmbeddedSolrHelper.makeEmbeddedSolrServer(this.__evaluationServerDir, "evaluationclassifierserver", "default-topic-model", "default-topic-model");
                }
                topicClassificationEngine.configure(getCanonicalConfiguration(this.__evaluationServer, this.solrCoreConfig));
            }
            topicClassificationEngine.removeAllConcepts();
            batchOverTopics(new BatchProcessor<SolrDocument>() { // from class: org.apache.stanbol.enhancer.engine.topic.TopicClassificationEngine.2
                public int process(List<SolrDocument> list) throws ClassifierException {
                    for (SolrDocument solrDocument : list) {
                        String obj = solrDocument.getFirstValue(TopicClassificationEngine.this.conceptUriField).toString();
                        Collection fieldValues = solrDocument.getFieldValues(TopicClassificationEngine.this.broaderField);
                        if (fieldValues == null) {
                            topicClassificationEngine.addConcept(obj, null, null);
                        } else {
                            ArrayList arrayList = new ArrayList();
                            Iterator it = fieldValues.iterator();
                            while (it.hasNext()) {
                                arrayList.add(it.next().toString());
                            }
                            topicClassificationEngine.addConcept(obj, null, arrayList);
                        }
                    }
                    return list.size();
                }
            });
            topicClassificationEngine.setCrossValidationInfo(i, i2);
            topicClassificationEngine.setTrainingSet(getTrainingSet());
            topicClassificationEngine.updateModel(false);
            int batchOverTopics = batchOverTopics(new BatchProcessor<SolrDocument>() { // from class: org.apache.stanbol.enhancer.engine.topic.TopicClassificationEngine.3
                public int process(List<SolrDocument> list) throws TrainingSetException, ClassifierException {
                    int i4 = 0;
                    Iterator<SolrDocument> it = list.iterator();
                    while (it.hasNext()) {
                        String obj = it.next().getFirstValue(TopicClassificationEngine.this.conceptUriField).toString();
                        List asList = Arrays.asList(obj);
                        ArrayList arrayList = new ArrayList();
                        int i5 = 0;
                        int i6 = 0;
                        int i7 = 0;
                        int i8 = 0;
                        Batch emtpyBatch = Batch.emtpyBatch(Example.class);
                        boolean z2 = false;
                        while (true) {
                            emtpyBatch = TopicClassificationEngine.this.getTrainingSet().getPositiveExamples(asList, emtpyBatch.nextOffset);
                            if (i8 == 0 && emtpyBatch.items.size() < TopicClassificationEngine.this.MIN_EVALUATION_SAMPLES) {
                                z2 = true;
                                break;
                            }
                            for (Example example : emtpyBatch.items) {
                                if (i8 % i2 != i) {
                                    i8++;
                                } else {
                                    i7++;
                                    i8++;
                                    boolean z3 = false;
                                    Iterator<TopicSuggestion> it2 = topicClassificationEngine.suggestTopics(example.contents).iterator();
                                    while (true) {
                                        if (!it2.hasNext()) {
                                            break;
                                        }
                                        if (obj.equals(it2.next().conceptUri)) {
                                            z3 = true;
                                            i5++;
                                            break;
                                        }
                                    }
                                    if (!z3) {
                                        i6++;
                                        if (arrayList.size() < TopicClassificationEngine.this.MAX_COLLECTED_EXAMPLES / i2) {
                                            arrayList.add(example.id);
                                        }
                                    }
                                }
                            }
                            if (0 != 0 || !emtpyBatch.hasMore || i8 >= TopicClassificationEngine.this.MAX_EVALUATION_SAMPLES) {
                                break;
                            }
                        }
                        ArrayList arrayList2 = new ArrayList();
                        int i9 = 0;
                        int i10 = 0;
                        int i11 = 0;
                        Batch emtpyBatch2 = Batch.emtpyBatch(Example.class);
                        while (!z2) {
                            emtpyBatch2 = TopicClassificationEngine.this.getTrainingSet().getNegativeExamples(asList, emtpyBatch2.nextOffset);
                            for (Example example2 : emtpyBatch2.items) {
                                if (i11 % i2 != i) {
                                    i11++;
                                } else {
                                    i10++;
                                    i11++;
                                    Iterator<TopicSuggestion> it3 = topicClassificationEngine.suggestTopics(example2.contents).iterator();
                                    while (true) {
                                        if (!it3.hasNext()) {
                                            break;
                                        }
                                        if (obj.equals(it3.next().conceptUri)) {
                                            i9++;
                                            if (arrayList2.size() < TopicClassificationEngine.this.MAX_COLLECTED_EXAMPLES / i2) {
                                                arrayList2.add(example2.id);
                                            }
                                        }
                                    }
                                }
                            }
                            if (!emtpyBatch2.hasMore || i11 >= TopicClassificationEngine.this.MAX_EVALUATION_SAMPLES) {
                                break;
                            }
                        }
                        if (z2) {
                            TopicClassificationEngine.log.debug("Skipping evaluation of {} because too few positive examples.", obj);
                        } else {
                            TopicClassificationEngine.this.updatePerformanceMetadata(obj, (i5 == 0 && i9 == 0) ? 0.0f : i5 / (i5 + i9), (i5 == 0 && i6 == 0) ? 0.0f : i5 / (i5 + i6), i7, i10, arrayList2, arrayList);
                            i4++;
                        }
                    }
                    try {
                        TopicClassificationEngine.this.getActiveSolrServer().commit();
                        return i4;
                    } catch (Exception e) {
                        throw new ClassifierException(e);
                    }
                }
            });
            log.info(String.format("Finished CV iteration %d/%d on classifier %s in %fs.", Integer.valueOf(i + 1), Integer.valueOf(i2), this.engineName, Double.valueOf((System.currentTimeMillis() - currentTimeMillis) / 1000.0d)));
            if (this.context != null) {
                topicClassificationEngine.deactivate(this.context);
            }
            return batchOverTopics;
        } catch (Exception e) {
            throw new ClassifierException(e);
        }
    }

    protected void updatePerformanceMetadata(String str, float f, float f2, int i, int i2, List<String> list, List<String> list2) throws ClassifierException {
        SolrServer activeSolrServer = getActiveSolrServer();
        try {
            SolrQuery solrQuery = new SolrQuery("*:*");
            solrQuery.addFilterQuery(new String[]{this.entryTypeField + ":" + METADATA_ENTRY});
            solrQuery.addFilterQuery(new String[]{this.conceptUriField + ":" + ClientUtils.escapeQueryChars(str)});
            Iterator it = activeSolrServer.query(solrQuery).getResults().iterator();
            while (it.hasNext()) {
                SolrDocument solrDocument = (SolrDocument) it.next();
                HashMap hashMap = new HashMap();
                for (String str2 : solrDocument.getFieldNames()) {
                    hashMap.put(str2, solrDocument.getFieldValues(str2));
                }
                addToList(hashMap, this.precisionField, Float.valueOf(f));
                addToList(hashMap, this.recallField, Float.valueOf(f2));
                increment(hashMap, this.positiveSupportField, i);
                increment(hashMap, this.negativeSupportField, i2);
                addToList(hashMap, this.falsePositivesField, list);
                addToList(hashMap, this.falseNegativesField, list2);
                SolrInputDocument solrInputDocument = new SolrInputDocument();
                for (Map.Entry<String, Collection<Object>> entry : hashMap.entrySet()) {
                    solrInputDocument.addField(entry.getKey(), entry.getValue());
                }
                solrInputDocument.setField(this.modelEvaluationDateField, UTCTimeStamper.nowUtcDate());
                activeSolrServer.add(solrInputDocument);
            }
            log.info(String.format("Performance for concept '%s': precision=%f, recall=%f, positiveSupport=%d, negativeSupport=%d", str, Float.valueOf(f), Float.valueOf(f2), Integer.valueOf(i), Integer.valueOf(i2)));
        } catch (Exception e) {
            throw new ClassifierException(String.format("Error updating performance metadata for topic '%s' on Solr Core '%s'", str, this.solrCoreId), e);
        }
    }

    protected void increment(Map<String, Collection<Object>> map, String str, int i) {
        Collection<Object> collection = map.get(str);
        if (collection != null && !collection.isEmpty()) {
            i += ((Integer) collection.iterator().next()).intValue();
        }
        ArrayList arrayList = new ArrayList();
        arrayList.add(Integer.valueOf(i));
        map.put(str, arrayList);
    }

    protected void addToList(Map<String, Collection<Object>> map, String str, Object obj) {
        ArrayList arrayList = new ArrayList();
        if (map.get(str) != null) {
            arrayList.addAll(map.get(str));
        }
        if (obj instanceof Collection) {
            arrayList.addAll((Collection) obj);
        } else {
            arrayList.add(obj);
        }
        map.put(str, arrayList);
    }

    public ClassificationReport getPerformanceEstimates(String str) throws ClassifierException {
        SolrServer activeSolrServer = getActiveSolrServer();
        SolrQuery solrQuery = new SolrQuery("*:*");
        solrQuery.addFilterQuery(new String[]{this.entryTypeField + ":" + METADATA_ENTRY});
        solrQuery.addFilterQuery(new String[]{this.conceptUriField + ":" + ClientUtils.escapeQueryChars(str)});
        try {
            SolrDocumentList results = activeSolrServer.query(solrQuery).getResults();
            if (results.isEmpty()) {
                throw new ClassifierException(String.format("'%s' is not a registered topic", str));
            }
            SolrDocument solrDocument = (SolrDocument) results.get(0);
            Float computeMeanValue = computeMeanValue(solrDocument, this.precisionField);
            Float computeMeanValue2 = computeMeanValue(solrDocument, this.recallField);
            int intValue = computeSumValue(solrDocument, this.positiveSupportField).intValue();
            int intValue2 = computeSumValue(solrDocument, this.negativeSupportField).intValue();
            Date date = (Date) solrDocument.getFirstValue(this.modelEvaluationDateField);
            ClassificationReport classificationReport = new ClassificationReport(computeMeanValue.floatValue(), computeMeanValue2.floatValue(), intValue, intValue2, date != null, date);
            if (solrDocument.getFieldValues(this.falsePositivesField) == null) {
                solrDocument.setField(this.falsePositivesField, new ArrayList());
            }
            Iterator it = solrDocument.getFieldValues(this.falsePositivesField).iterator();
            while (it.hasNext()) {
                classificationReport.falsePositiveExampleIds.add(it.next().toString());
            }
            if (solrDocument.getFieldValues(this.falseNegativesField) == null) {
                solrDocument.setField(this.falseNegativesField, new ArrayList());
            }
            Iterator it2 = solrDocument.getFieldValues(this.falseNegativesField).iterator();
            while (it2.hasNext()) {
                classificationReport.falseNegativeExampleIds.add(it2.next().toString());
            }
            return classificationReport;
        } catch (SolrServerException e) {
            throw new ClassifierException(String.format("Error fetching the performance report for topic " + str, new Object[0]));
        }
    }

    protected Float computeMeanValue(SolrDocument solrDocument, String str) {
        Float valueOf = Float.valueOf(0.0f);
        Collection fieldValues = solrDocument.getFieldValues(str);
        if (fieldValues == null || fieldValues.isEmpty()) {
            return valueOf;
        }
        Iterator it = fieldValues.iterator();
        while (it.hasNext()) {
            valueOf = Float.valueOf(valueOf.floatValue() + (((Float) it.next()).floatValue() / fieldValues.size()));
        }
        return valueOf;
    }

    protected Integer computeSumValue(SolrDocument solrDocument, String str) {
        Integer num = 0;
        Collection fieldValues = solrDocument.getFieldValues(str);
        if (fieldValues == null || fieldValues.isEmpty()) {
            return null;
        }
        Iterator it = fieldValues.iterator();
        while (it.hasNext()) {
            num = Integer.valueOf(num.intValue() + ((Integer) it.next()).intValue());
        }
        return num;
    }

    public List<String> getChainNames() throws InvalidSyntaxException {
        ArrayList arrayList = new ArrayList();
        BundleContext bundleContext = this.context.getBundleContext();
        ServiceReference[] serviceReferences = bundleContext.getServiceReferences(Chain.class.getName(), (String) null);
        if (serviceReferences != null) {
            for (ServiceReference serviceReference : serviceReferences) {
                Chain chain = (Chain) bundleContext.getService(serviceReference);
                try {
                    if (chain.getEngines().contains(getName())) {
                        arrayList.add(chain.getName());
                    }
                } catch (ChainException e) {
                }
            }
        }
        return arrayList;
    }

    public int importConceptsFromGraph(Graph graph, UriRef uriRef, UriRef uriRef2) throws ClassifierException {
        int i = 0;
        Iterator filter = graph.filter((NonLiteral) null, Properties.RDF_TYPE, uriRef);
        while (filter.hasNext()) {
            Triple triple = (Triple) filter.next();
            if (triple.getSubject() instanceof UriRef) {
                UriRef subject = triple.getSubject();
                GraphNode graphNode = new GraphNode(subject, graph);
                ArrayList arrayList = new ArrayList();
                Iterator objectNodes = graphNode.getObjectNodes(uriRef2);
                while (objectNodes.hasNext()) {
                    UriRef node = ((GraphNode) objectNodes.next()).getNode();
                    if (node instanceof UriRef) {
                        arrayList.add(node.getUnicodeString());
                    }
                }
                addConcept(subject.getUnicodeString(), arrayList);
                i++;
            }
        }
        return i;
    }

    protected void bindEntityhub(Entityhub entityhub) {
        this.entityhub = entityhub;
    }

    protected void unbindEntityhub(Entityhub entityhub) {
        if (this.entityhub == entityhub) {
            this.entityhub = null;
        }
    }

    protected void bindReferencedSiteManager(SiteManager siteManager) {
        this.referencedSiteManager = siteManager;
    }

    protected void unbindReferencedSiteManager(SiteManager siteManager) {
        if (this.referencedSiteManager == siteManager) {
            this.referencedSiteManager = null;
        }
    }
}
