package org.apache.clerezza.uima.metadatagenerator.mediatype;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import javax.ws.rs.core.MediaType;
import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;

/* loaded from: input_file:org/apache/clerezza/uima/metadatagenerator/mediatype/TikaTextExtractor.class */
public class TikaTextExtractor implements MediaTypeTextExtractor {
    private Tika tika;
    private TikaConfig config;

    public TikaTextExtractor() {
        try {
            this.config = TikaConfig.getDefaultConfig();
            this.tika = new Tika(this.config);
        } catch (Exception e) {
            throw new RuntimeException("Error while loading Tika configuration.", e);
        }
    }

    public TikaTextExtractor(String str) {
        InputStream resourceAsStream = getResourceAsStream(str);
        try {
            this.config = new TikaConfig(resourceAsStream);
            resourceAsStream.close();
            this.tika = new Tika(this.config);
        } catch (Exception e) {
            throw new RuntimeException("Error while loading Tika configuration.", e);
        }
    }

    @Override // org.apache.clerezza.uima.metadatagenerator.mediatype.MediaTypeTextExtractor
    public boolean supports(MediaType mediaType) {
        return this.tika.detect(mediaType.getType()) != null;
    }

    @Override // org.apache.clerezza.uima.metadatagenerator.mediatype.MediaTypeTextExtractor
    public String extract(byte[] bArr) throws UnsupportedMediaTypeException {
        ByteArrayInputStream byteArrayInputStream = new ByteArrayInputStream(bArr);
        try {
            String detect = this.tika.detect(byteArrayInputStream);
            if (this.tika.detect(detect) == null) {
                throw new UnsupportedMediaTypeException(String.format("[%s] mime type is not supported", detect));
            }
            Metadata metadata = new Metadata();
            metadata.set("Content-Type", detect);
            try {
                BufferedReader bufferedReader = new BufferedReader(this.tika.parse(byteArrayInputStream, metadata));
                String str = null;
                try {
                    for (String readLine = bufferedReader.readLine(); readLine != null; readLine = bufferedReader.readLine()) {
                        str = readLine;
                    }
                    return str;
                } catch (IOException e) {
                    throw new RuntimeException("Error while parsing the provided input");
                }
            } catch (IOException e2) {
                throw new RuntimeException("Error while parsing the provided input");
            }
        } catch (IOException e3) {
            throw new RuntimeException("Error while detecting mime type", e3);
        }
    }

    private InputStream getResourceAsStream(String str) {
        InputStream resourceAsStream = TikaTextExtractor.class.getResourceAsStream(str);
        if (resourceAsStream == null) {
            resourceAsStream = TikaTextExtractor.class.getClassLoader().getResourceAsStream(str);
            if (resourceAsStream == null) {
                resourceAsStream = ClassLoader.getSystemResourceAsStream(str);
            }
        }
        return resourceAsStream;
    }
}
