/*
 * Decompiled with CFR 0.152.
 */
package org.apache.stanbol.enhancer.engines.tika;

import java.io.BufferedInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.UnsupportedEncodingException;
import java.io.Writer;
import java.nio.charset.Charset;
import java.security.AccessController;
import java.security.PrivilegedActionException;
import java.security.PrivilegedExceptionAction;
import java.util.Arrays;
import java.util.Collections;
import java.util.Dictionary;
import java.util.Map;
import java.util.Set;
import org.apache.clerezza.rdf.core.UriRef;
import org.apache.commons.io.IOUtils;
import org.apache.stanbol.enhancer.engines.tika.handler.MultiHandler;
import org.apache.stanbol.enhancer.engines.tika.handler.PlainTextHandler;
import org.apache.stanbol.enhancer.engines.tika.metadata.OntologyMappings;
import org.apache.stanbol.enhancer.servicesapi.Blob;
import org.apache.stanbol.enhancer.servicesapi.ContentItem;
import org.apache.stanbol.enhancer.servicesapi.ContentItemFactory;
import org.apache.stanbol.enhancer.servicesapi.ContentSink;
import org.apache.stanbol.enhancer.servicesapi.EngineException;
import org.apache.stanbol.enhancer.servicesapi.EnhancementEngine;
import org.apache.stanbol.enhancer.servicesapi.ServiceProperties;
import org.apache.stanbol.enhancer.servicesapi.helper.EnhancementEngineHelper;
import org.apache.stanbol.enhancer.servicesapi.impl.AbstractEnhancementEngine;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.detect.Detector;
import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.BodyContentHandler;
import org.apache.tika.sax.ToXMLContentHandler;
import org.osgi.service.cm.ConfigurationException;
import org.osgi.service.component.ComponentContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class TikaEngine
extends AbstractEnhancementEngine<RuntimeException, RuntimeException>
implements EnhancementEngine,
ServiceProperties {
    private final Logger log = LoggerFactory.getLogger(TikaEngine.class);
    public static final String SKIP_LINEBREAKS_WITHIN_CONTENT = "stanbol.engines.tika.skipLinebreaks";
    public static final String MAPPING_MEDIA_RESOURCE = "stanbol.engine.tika.mapping.mediaResource";
    public static final boolean DEFAULT_MAPPING_MEDIA_RESOURCE_STATE = true;
    public static final String MAPPING_DUBLIN_CORE_TERMS = "stanbol.engine.tika.mapping.dcTerms";
    public static final boolean DEFAULT_MAPPING_DUBLIN_CORE_TERMS_STATE = true;
    public static final String MAPPING_NEPOMUK_MESSAGE = "stanbol.engine.tika.mapping.nepomukMessage";
    public static final boolean DEFAULT_MAPPING_NEPOMUK_MESSAGE_STATE = true;
    public static final String MAPPING_NEPOMUK_EXIF = "stanbol.engine.tika.mapping.nepomukExif";
    public static final boolean DEFAULT_MAPPING_NEPOMUK_EXIF_STATE = true;
    public static final String MAPPING_SKOS = "stanbol.engine.tika.mapping.skos";
    public static final boolean DEFAULT_MAPPING_SKOS_STATE = false;
    public static final String MAPPING_RDFS = "stanbol.engine.tika.mapping.rdfs";
    public static final boolean DEFAULT_MAPPING_RDFS_STATE = false;
    public static final String MAPPING_GEO = "stanbol.engine.tika.mapping.geo";
    public static final boolean DEFAULT_MAPPING_GEO_STATE = true;
    public static final boolean DEFAULT_SKIP_LINEBREAKS = false;
    private boolean skipLinebreaks = false;
    public static final Integer defaultOrder = ORDERING_PRE_PROCESSING;
    protected static final MediaType XHTML = new MediaType("application", "xhtml+xml");
    protected static final Charset UTF8 = Charset.forName("UTF-8");
    private TikaConfig config;
    private Parser parser;
    private Detector detector;
    private OntologyMappings ontologyMappings;
    private ContentItemFactory ciFactory;

    public TikaEngine() {
    }

    TikaEngine(ContentItemFactory cifactory) {
        this.ciFactory = cifactory;
    }

    public int canEnhance(ContentItem ci) throws EngineException {
        return 2;
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    public void computeEnhancements(ContentItem ci) throws EngineException {
        MediaTypeAndStream mtas = this.extractMediaType(ci);
        if (mtas.mediaType == null) {
            return;
        }
        MediaType plainMediaType = mtas.mediaType.getBaseType();
        if (plainMediaType.equals((Object)MediaType.TEXT_PLAIN)) {
            return;
        }
        ParseContext context = new ParseContext();
        context.set(Parser.class, (Object)this.parser);
        Set supproted = this.parser.getSupportedTypes(context);
        if (supproted.contains(plainMediaType)) {
            ToXMLContentHandler xhtmlHandler;
            ContentSink plainTextSink;
            final InputStream in = mtas.in == null ? ci.getStream() : mtas.in;
            Metadata metadata = new Metadata();
            metadata.set("Content-Type", mtas.mediaType.toString());
            try {
                plainTextSink = this.ciFactory.createContentSink(MediaType.TEXT_PLAIN + "; charset=" + UTF8.name());
            }
            catch (IOException e) {
                IOUtils.closeQuietly((InputStream)in);
                throw new EngineException("Error while initialising Blob forwriting the text/plain version of the parsed content", (Throwable)e);
            }
            OutputStreamWriter plainTextWriter = new OutputStreamWriter(plainTextSink.getOutputStream(), UTF8);
            BodyContentHandler textHandler = new BodyContentHandler((ContentHandler)((Object)new PlainTextHandler(plainTextWriter, false, this.skipLinebreaks)));
            ContentSink xhtmlSink = null;
            try {
                Object mainHandler;
                if (!plainMediaType.equals((Object)XHTML)) {
                    try {
                        xhtmlSink = this.ciFactory.createContentSink(XHTML + "; charset=" + UTF8.name());
                    }
                    catch (IOException e) {
                        throw new EngineException("Error while initialising Blob forwriting the application/xhtml+xml version of the parsed content", (Throwable)e);
                    }
                    try {
                        xhtmlHandler = new ToXMLContentHandler(xhtmlSink.getOutputStream(), UTF8.name());
                    }
                    catch (UnsupportedEncodingException e) {
                        throw new EngineException("This system does not support the encoding " + UTF8, (Throwable)e);
                    }
                    mainHandler = new MultiHandler(new ContentHandler[]{textHandler, xhtmlHandler});
                } else {
                    mainHandler = textHandler;
                    xhtmlHandler = null;
                    xhtmlSink = null;
                }
                try {
                    AccessController.doPrivileged(new PrivilegedExceptionAction<Object>((ContentHandler)mainHandler, metadata, context){
                        final /* synthetic */ ContentHandler val$mainHandler;
                        final /* synthetic */ Metadata val$metadata;
                        final /* synthetic */ ParseContext val$context;
                        {
                            this.val$mainHandler = contentHandler;
                            this.val$metadata = metadata;
                            this.val$context = parseContext;
                        }

                        @Override
                        public Object run() throws IOException, SAXException, TikaException {
                            TikaEngine.this.parser.parse(in, this.val$mainHandler, this.val$metadata, this.val$context);
                            return null;
                        }
                    });
                }
                catch (PrivilegedActionException pae) {
                    Exception e = pae.getException();
                    if (e instanceof IOException || e instanceof SAXException || e instanceof TikaException) {
                        throw new EngineException("Unable to convert ContentItem " + ci.getUri() + " with mimeType '" + ci.getMimeType() + "' to " + "plain text!", (Throwable)e);
                    }
                    throw (RuntimeException)RuntimeException.class.cast(e);
                }
            }
            finally {
                IOUtils.closeQuietly((InputStream)in);
                IOUtils.closeQuietly((Writer)plainTextWriter);
                if (xhtmlSink != null) {
                    IOUtils.closeQuietly((OutputStream)xhtmlSink.getOutputStream());
                }
            }
            String random = EnhancementEngineHelper.randomUUID().toString();
            UriRef textBlobUri = new UriRef("urn:tika:text:" + random);
            ci.addPart(textBlobUri, (Object)plainTextSink.getBlob());
            if (xhtmlHandler != null) {
                UriRef xhtmlBlobUri = new UriRef("urn:tika:xhtml:" + random);
                ci.addPart(xhtmlBlobUri, (Object)xhtmlSink.getBlob());
            }
            if (this.log.isInfoEnabled()) {
                for (String name : metadata.names()) {
                    this.log.info("{}: {}", (Object)name, (Object)Arrays.toString(metadata.getValues(name)));
                }
            }
            ci.getLock().writeLock().lock();
            try {
                this.ontologyMappings.apply(ci.getMetadata(), ci.getUri(), metadata);
            }
            finally {
                ci.getLock().writeLock().unlock();
            }
        }
    }

    private MediaTypeAndStream extractMediaType(ContentItem ci) {
        MediaTypeAndStream mtas = new MediaTypeAndStream();
        mtas.mediaType = this.getMediaType(ci.getBlob());
        mtas.uri = ci.getUri().getUnicodeString();
        if (mtas.mediaType == null || mtas.mediaType.equals((Object)MediaType.OCTET_STREAM)) {
            mtas.in = new BufferedInputStream(ci.getStream());
            Metadata m = new Metadata();
            m.add("resourceName", mtas.uri);
            try {
                mtas.mediaType = this.detector.detect(mtas.in, m);
            }
            catch (IOException e) {
                this.log.warn("Exception while detection the MediaType of theparsed ContentItem " + ci.getUri(), (Throwable)e);
                IOUtils.closeQuietly((InputStream)mtas.in);
                mtas.in = null;
            }
        }
        return mtas;
    }

    private MediaType getMediaType(Blob blob) {
        String[] mediaTypeArray = blob.getMimeType().split("/");
        if (mediaTypeArray.length != 2) {
            this.log.warn("Encounterd illegal formatted mediaType '{}'  -> will try to detect the mediaType based on the parsed content!", (Object)blob.getMimeType());
            return null;
        }
        return new MediaType(mediaTypeArray[0], mediaTypeArray[1], blob.getParameter());
    }

    protected void activate(ComponentContext ctx) throws ConfigurationException {
        super.activate(ctx);
        this.config = TikaConfig.getDefaultConfig();
        this.detector = this.config.getDetector();
        this.parser = new AutoDetectParser(this.config);
        this.skipLinebreaks = TikaEngine.getBoolean(ctx.getProperties(), SKIP_LINEBREAKS_WITHIN_CONTENT, false);
        this.ontologyMappings = new OntologyMappings();
        if (TikaEngine.getBoolean(ctx.getProperties(), MAPPING_MEDIA_RESOURCE, true)) {
            OntologyMappings.addMediaResourceOntologyMappings(this.ontologyMappings);
        }
        if (TikaEngine.getBoolean(ctx.getProperties(), MAPPING_DUBLIN_CORE_TERMS, true)) {
            OntologyMappings.addDcMappings(this.ontologyMappings);
        }
        if (TikaEngine.getBoolean(ctx.getProperties(), MAPPING_NEPOMUK_MESSAGE, true)) {
            OntologyMappings.addNepomukMessageMappings(this.ontologyMappings);
        }
        if (TikaEngine.getBoolean(ctx.getProperties(), MAPPING_NEPOMUK_EXIF, true)) {
            OntologyMappings.addNepomukExifMappings(this.ontologyMappings);
        }
        if (TikaEngine.getBoolean(ctx.getProperties(), MAPPING_SKOS, false)) {
            OntologyMappings.addSkosMappings(this.ontologyMappings);
        }
        if (TikaEngine.getBoolean(ctx.getProperties(), MAPPING_RDFS, false)) {
            OntologyMappings.addRdfsMappings(this.ontologyMappings);
        }
        if (TikaEngine.getBoolean(ctx.getProperties(), MAPPING_GEO, true)) {
            OntologyMappings.addGeoMappings(this.ontologyMappings);
        }
    }

    protected void deactivate(ComponentContext ctx) throws RuntimeException {
        this.config = null;
        this.parser = null;
        this.detector = null;
        this.skipLinebreaks = false;
        this.ontologyMappings = null;
        super.deactivate(ctx);
    }

    private static boolean getBoolean(Dictionary<?, ?> properties, String key, boolean defaultState) {
        Object value = properties.get(key);
        return value instanceof Boolean ? (Boolean)value : (value != null ? Boolean.parseBoolean(value.toString()) : defaultState);
    }

    public Map<String, Object> getServiceProperties() {
        return Collections.unmodifiableMap(Collections.singletonMap("org.apache.stanbol.enhancer.engine.order", defaultOrder));
    }

    protected void bindCiFactory(ContentItemFactory contentItemFactory) {
        this.ciFactory = contentItemFactory;
    }

    protected void unbindCiFactory(ContentItemFactory contentItemFactory) {
        if (this.ciFactory == contentItemFactory) {
            this.ciFactory = null;
        }
    }

    private static class MediaTypeAndStream {
        String uri;
        MediaType mediaType;
        InputStream in;

        private MediaTypeAndStream() {
        }
    }
}

