package org.apache.stanbol.enhancer.engines.zemanta.impl;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.clerezza.rdf.core.Literal;
import org.apache.clerezza.rdf.core.LiteralFactory;
import org.apache.clerezza.rdf.core.MGraph;
import org.apache.clerezza.rdf.core.NonLiteral;
import org.apache.clerezza.rdf.core.Triple;
import org.apache.clerezza.rdf.core.TripleCollection;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.clerezza.rdf.core.impl.PlainLiteralImpl;
import org.apache.clerezza.rdf.core.impl.SimpleMGraph;
import org.apache.clerezza.rdf.core.impl.TripleImpl;
import org.apache.stanbol.commons.stanboltools.offline.OnlineMode;
import org.apache.stanbol.enhancer.engines.zemanta.ZemantaOntologyEnum;
import org.apache.stanbol.enhancer.servicesapi.Blob;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.InvalidContentException;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.helper.ContentItemHelper;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.rdf.OntologicalClasses;
import org.apache.stanbol.enhancer.servicesapi.rdf.Properties;
import org.apache.stanbol.enhancer.servicesapi.rdf.TechnicalClasses;
import org.osgi.framework.BundleContext;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/stanbol/enhancer/engines/zemanta/impl/ZemantaEnhancementEngine.class */
public class ZemantaEnhancementEngine extends AbstractEnhancementEngine<IOException, RuntimeException> implements EnhancementEngine, ServiceProperties {
    public static final String API_KEY_PROPERTY = "org.apache.stanbol.enhancer.engines.zemanta.key";
    public static final String DMOZ_BASE_URL = "http://www.dmoz.org/";
    public static final String ZEMANTA_DMOZ_PREFIX = "Top/";
    private static final int SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE = 50;
    private String key;
    public LiteralFactory literalFactory;
    protected BundleContext bundleContext;
    private OnlineMode onlineMode;
    protected static final Set<String> SUPPORTED_MIMETYPES = Collections.unmodifiableSet(new HashSet(Arrays.asList("text/plain", "text/html")));
    private static final Logger log = LoggerFactory.getLogger(ZemantaEnhancementEngine.class);
    public static final Integer defaultOrder = Integer.valueOf(ServiceProperties.ORDERING_EXTRACTION_ENHANCEMENT.intValue() + 10);

    protected void activate(ComponentContext componentContext) throws IOException, ConfigurationException {
        super.activate(componentContext);
        this.bundleContext = componentContext.getBundleContext();
        this.key = (String) componentContext.getProperties().get(API_KEY_PROPERTY);
        checkConfig();
        this.literalFactory = LiteralFactory.getInstance();
    }

    protected void deactivate(ComponentContext componentContext) {
        super.deactivate(componentContext);
        this.literalFactory = null;
        this.key = null;
        this.bundleContext = null;
    }

    private void checkConfig() throws ConfigurationException {
        if (this.key == null || this.key.trim().length() == 0) {
            throw new ConfigurationException(API_KEY_PROPERTY, String.format("%s : please configure a Zemanta key to use this engine (e.g. byusing the 'Configuration' tab of the Apache Felix Web Console).", getClass().getSimpleName()));
        }
    }

    public int canEnhance(ContentItem contentItem) {
        return ContentItemHelper.getBlob(contentItem, SUPPORTED_MIMETYPES) != null ? 2 : 0;
    }

    public void computeEnhancements(ContentItem contentItem) throws EngineException {
        Map.Entry blob = ContentItemHelper.getBlob(contentItem, SUPPORTED_MIMETYPES);
        if (blob == null) {
            throw new IllegalStateException("No ContentPart with a supported Mime Typefound for ContentItem " + contentItem.getUri() + "(supported: '" + SUPPORTED_MIMETYPES + "') -> this indicates that canEnhance wasNOT called and indicates a bug in the used EnhancementJobManager!");
        }
        try {
            String text = ContentItemHelper.getText((Blob) blob.getValue());
            if (text.trim().length() == 0) {
                log.warn("ContentPart {} of ContentItem {} does not contain any text to enhance", blob.getKey(), contentItem.getUri());
                return;
            }
            MGraph metadata = contentItem.getMetadata();
            UriRef uri = contentItem.getUri();
            SimpleMGraph simpleMGraph = new SimpleMGraph();
            try {
                simpleMGraph.addAll(new ZemantaAPIWrapper(this.key).enhance(text));
                contentItem.getLock().writeLock().lock();
                try {
                    processRecognition(simpleMGraph, metadata, text, uri);
                    processCategories(simpleMGraph, metadata, uri);
                    contentItem.getLock().writeLock().unlock();
                } catch (Throwable th) {
                    contentItem.getLock().writeLock().unlock();
                    throw th;
                }
            } catch (IOException e) {
                throw new EngineException("Unable to get Enhancement from remote Zemanta Service", e);
            }
        } catch (IOException e2) {
            throw new InvalidContentException(this, contentItem, e2);
        }
    }

    public Map<String, Object> getServiceProperties() {
        return Collections.unmodifiableMap(Collections.singletonMap("org.apache.stanbol.enhancer.engine.order", defaultOrder));
    }

    protected void processCategories(MGraph mGraph, MGraph mGraph2, UriRef uriRef) {
        Iterator filter = mGraph.filter((NonLiteral) null, Properties.RDF_TYPE, ZemantaOntologyEnum.Category.getUri());
        NonLiteral nonLiteral = null;
        while (filter.hasNext()) {
            NonLiteral subject = ((Triple) filter.next()).getSubject();
            log.debug("process category " + subject);
            Double parseConfidence = parseConfidence(mGraph, subject);
            log.debug(" > confidence :" + parseConfidence);
            UriRef reference = EnhancementEngineHelper.getReference(mGraph, subject, ZemantaOntologyEnum.target.getUri());
            if (reference != null) {
                UriRef reference2 = EnhancementEngineHelper.getReference(mGraph, reference, ZemantaOntologyEnum.categorization.getUri());
                if (reference2 == null || !reference2.equals(ZemantaOntologyEnum.categorization_DMOZ.getUri())) {
                    log.warn("Unable to process category " + subject + " because categorisation scheme != DMOZ (" + reference2 + " != " + ZemantaOntologyEnum.categorization_DMOZ.getUri() + ")");
                } else {
                    String string = EnhancementEngineHelper.getString(mGraph, reference, ZemantaOntologyEnum.title.getUri());
                    if (string != null) {
                        if (nonLiteral == null) {
                            nonLiteral = EnhancementEngineHelper.createTextEnhancement(mGraph2, this, uriRef);
                            mGraph2.add(new TripleImpl(nonLiteral, Properties.DC_TYPE, OntologicalClasses.SKOS_CONCEPT));
                        }
                        UriRef createTopicEnhancement = EnhancementEngineHelper.createTopicEnhancement(mGraph2, this, uriRef);
                        mGraph2.add(new TripleImpl(createTopicEnhancement, Properties.DC_RELATION, nonLiteral));
                        mGraph2.add(new TripleImpl(createTopicEnhancement, Properties.ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(string)));
                        if (string.startsWith(ZEMANTA_DMOZ_PREFIX)) {
                            mGraph2.add(new TripleImpl(createTopicEnhancement, Properties.ENHANCER_ENTITY_REFERENCE, new UriRef(DMOZ_BASE_URL + string.substring(ZEMANTA_DMOZ_PREFIX.length()))));
                        }
                        if (parseConfidence != null) {
                            mGraph2.add(new TripleImpl(createTopicEnhancement, Properties.ENHANCER_CONFIDENCE, this.literalFactory.createTypedLiteral(parseConfidence)));
                        }
                        mGraph2.add(new TripleImpl(createTopicEnhancement, Properties.ENHANCER_ENTITY_TYPE, OntologicalClasses.SKOS_CONCEPT));
                        mGraph2.add(new TripleImpl(createTopicEnhancement, Properties.ENHANCER_ENTITY_TYPE, ZemantaOntologyEnum.Category.getUri()));
                    } else {
                        log.warn("Unable to process category " + subject + " because no title is present");
                    }
                }
            } else {
                log.warn("Unable to process category " + subject + " because no target node was found");
            }
        }
    }

    protected void processRecognition(MGraph mGraph, MGraph mGraph2, String str, UriRef uriRef) {
        Iterator filter = mGraph.filter((NonLiteral) null, Properties.RDF_TYPE, ZemantaOntologyEnum.Recognition.getUri());
        while (filter.hasNext()) {
            NonLiteral subject = ((Triple) filter.next()).getSubject();
            log.debug("process recognition " + subject);
            Double parseConfidence = parseConfidence(mGraph, subject);
            log.debug(" > confidence :" + parseConfidence);
            String string = EnhancementEngineHelper.getString(mGraph, subject, ZemantaOntologyEnum.anchor.getUri());
            log.debug(" > anchor :" + string);
            Collection<NonLiteral> processTextAnnotation = processTextAnnotation(mGraph2, str, uriRef, string, parseConfidence);
            log.debug(" > number of textAnnotations :" + processTextAnnotation.size());
            UriRef reference = EnhancementEngineHelper.getReference(mGraph, subject, ZemantaOntologyEnum.object.getUri());
            log.debug(" > object :" + reference);
            HashSet hashSet = new HashSet();
            Iterator references = EnhancementEngineHelper.getReferences(mGraph, reference, ZemantaOntologyEnum.owlSameAs.getUri());
            while (references.hasNext()) {
                hashSet.add(references.next());
            }
            log.debug(" > sameAs :" + hashSet);
            Iterator references2 = EnhancementEngineHelper.getReferences(mGraph, reference, ZemantaOntologyEnum.target.getUri());
            String str2 = null;
            while (references2.hasNext()) {
                UriRef uriRef2 = (UriRef) references2.next();
                log.debug("    -  target :" + uriRef2);
                UriRef reference2 = EnhancementEngineHelper.getReference(mGraph, uriRef2, ZemantaOntologyEnum.targetType.getUri());
                log.debug("       o type :" + reference2);
                if (ZemantaOntologyEnum.targetType_RDF.getUri().equals(reference2)) {
                    String string2 = EnhancementEngineHelper.getString(mGraph, uriRef2, ZemantaOntologyEnum.title.getUri());
                    log.debug("       o title :" + string2);
                    if (!hashSet.contains(uriRef2)) {
                        log.warn("Found Target with type RDF, that is not linked with owl:sameAs to the others (this: '" + uriRef2 + " | sameAs: " + hashSet + ")");
                        log.warn("  - no Enhancement for " + uriRef2 + " will be created");
                    } else if (str2 == null) {
                        str2 = string2;
                    } else if (!str2.equals(string2)) {
                        log.warn("Entities marked with owl:sameAs do use different labels '" + str2 + "' != '" + string2 + "'!");
                    }
                }
            }
            UriRef createEntityEnhancement = EnhancementEngineHelper.createEntityEnhancement(mGraph2, this, uriRef);
            if (parseConfidence != null) {
                mGraph2.add(new TripleImpl(createEntityEnhancement, Properties.ENHANCER_CONFIDENCE, this.literalFactory.createTypedLiteral(parseConfidence)));
            }
            Iterator<NonLiteral> it = processTextAnnotation.iterator();
            while (it.hasNext()) {
                mGraph2.add(new TripleImpl(createEntityEnhancement, Properties.DC_RELATION, it.next()));
            }
            Iterator it2 = hashSet.iterator();
            while (it2.hasNext()) {
                mGraph2.add(new TripleImpl(createEntityEnhancement, Properties.ENHANCER_ENTITY_REFERENCE, (UriRef) it2.next()));
            }
            mGraph2.add(new TripleImpl(createEntityEnhancement, Properties.ENHANCER_ENTITY_LABEL, new PlainLiteralImpl(str2)));
        }
    }

    private static Double parseConfidence(TripleCollection tripleCollection, NonLiteral nonLiteral) {
        Double d;
        String string = EnhancementEngineHelper.getString(tripleCollection, nonLiteral, ZemantaOntologyEnum.confidence.getUri());
        if (string != null) {
            try {
                d = Double.valueOf(string);
            } catch (NumberFormatException e) {
                log.warn("Unable to parse Float confidence for Literal value '" + string + "'");
                d = null;
            }
        } else {
            d = null;
        }
        return d;
    }

    private Collection<NonLiteral> processTextAnnotation(MGraph mGraph, String str, UriRef uriRef, String str2, Double d) {
        int indexOf;
        int lastIndexOf;
        ArrayList arrayList = new ArrayList();
        int length = str2.length();
        PlainLiteralImpl plainLiteralImpl = new PlainLiteralImpl(str2);
        Map<Integer, Collection<NonLiteral>> searchExistingTextAnnotations = searchExistingTextAnnotations(mGraph, plainLiteralImpl);
        int indexOf2 = str.indexOf(str2);
        while (true) {
            int i = indexOf2;
            if (i < 0) {
                return arrayList;
            }
            Collection<NonLiteral> collection = searchExistingTextAnnotations.get(Integer.valueOf(i));
            if (collection != null) {
                arrayList.addAll(collection);
            } else {
                UriRef createTextEnhancement = EnhancementEngineHelper.createTextEnhancement(mGraph, this, uriRef);
                arrayList.add(createTextEnhancement);
                mGraph.add(new TripleImpl(createTextEnhancement, Properties.ENHANCER_START, this.literalFactory.createTypedLiteral(Integer.valueOf(i))));
                mGraph.add(new TripleImpl(createTextEnhancement, Properties.ENHANCER_END, this.literalFactory.createTypedLiteral(Integer.valueOf(i + length))));
                mGraph.add(new TripleImpl(createTextEnhancement, Properties.ENHANCER_SELECTED_TEXT, plainLiteralImpl));
                if (i <= SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE) {
                    indexOf = 0;
                } else {
                    int i2 = i - SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
                    indexOf = str.indexOf(32, i2);
                    if (indexOf < 0 || indexOf >= i) {
                        indexOf = i2;
                    }
                }
                if (i + length + SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE >= str.length()) {
                    lastIndexOf = str.length();
                } else {
                    int i3 = i + length + SELECTION_CONTEXT_PREFIX_SUFFIX_SIZE;
                    lastIndexOf = str.lastIndexOf(32, i3);
                    if (lastIndexOf <= i + length) {
                        lastIndexOf = i3;
                    }
                }
                mGraph.add(new TripleImpl(createTextEnhancement, Properties.ENHANCER_SELECTION_CONTEXT, new PlainLiteralImpl(str.substring(indexOf, lastIndexOf))));
                if (d != null) {
                    mGraph.add(new TripleImpl(createTextEnhancement, Properties.ENHANCER_CONFIDENCE, this.literalFactory.createTypedLiteral(d)));
                }
            }
            indexOf2 = str.indexOf(str2, i + 1);
        }
    }

    private Map<Integer, Collection<NonLiteral>> searchExistingTextAnnotations(MGraph mGraph, Literal literal) {
        Integer num;
        Iterator filter = mGraph.filter((NonLiteral) null, Properties.ENHANCER_SELECTED_TEXT, literal);
        HashMap hashMap = new HashMap();
        while (filter.hasNext()) {
            NonLiteral subject = ((Triple) filter.next()).getSubject();
            if (mGraph.contains(new TripleImpl(subject, Properties.RDF_TYPE, TechnicalClasses.ENHANCER_TEXTANNOTATION)) && (num = (Integer) EnhancementEngineHelper.get(mGraph, subject, Properties.ENHANCER_START, Integer.class, this.literalFactory)) != null) {
                Collection collection = (Collection) hashMap.get(num);
                if (collection == null) {
                    collection = new ArrayList();
                    hashMap.put(num, collection);
                }
                collection.add(subject);
            }
        }
        return hashMap;
    }

    protected void bindOnlineMode(OnlineMode onlineMode) {
        this.onlineMode = onlineMode;
    }

    protected void unbindOnlineMode(OnlineMode onlineMode) {
        if (this.onlineMode == onlineMode) {
            this.onlineMode = null;
        }
    }
}
