package org.apache.any23.mime;

import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.regex.Pattern;
import org.apache.any23.extractor.csv.CSVReaderBuilder;
import org.apache.any23.mime.purifier.Purifier;
import org.apache.any23.mime.purifier.WhiteSpacesPurifier;
import org.apache.tika.Tika;
import org.apache.tika.config.TikaConfig;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.TikaMetadataKeys;
import org.apache.tika.mime.MimeType;
import org.apache.tika.mime.MimeTypeException;
import org.apache.tika.mime.MimeTypes;
import org.openrdf.rio.RDFFormat;
import org.openrdf.rio.RDFParser;
import org.openrdf.rio.Rio;

/* loaded from: input_file:WEB-INF/lib/apache-any23-mime-1.0.jar:org/apache/any23/mime/TikaMIMETypeDetector.class */
public class TikaMIMETypeDetector implements MIMETypeDetector {
    private Purifier purifier;
    public static final String CSV_MIMETYPE = "text/csv";
    public static final String RESOURCE_NAME = "/org/apache/any23/mime/tika-config.xml";
    private static final Pattern[] N3_PATTERNS = {Pattern.compile("^\\S+\\s*<\\S+>\\s*<\\S+>\\s*\\."), Pattern.compile("^\\S+\\s*<\\S+>\\s*_:\\S+\\s*\\."), Pattern.compile("^\\S+\\s*<\\S+>\\s*\".*\"(@\\S+)?\\s*\\."), Pattern.compile("^\\S+\\s*<\\S+>\\s*\".*\"(\\^\\^\\S+)?\\s*\\.")};
    private static final Pattern[] NQUADS_PATTERNS = {Pattern.compile("^\\S+\\s*<\\S+>\\s*<\\S+>\\s*\\<\\S+>\\s*\\."), Pattern.compile("^\\S+\\s*<\\S+>\\s*_:\\S+\\s*\\<\\S+>\\s*\\."), Pattern.compile("^\\S+\\s*<\\S+>\\s*\".*\"(@\\S+)?\\s*\\<\\S+>\\s*\\."), Pattern.compile("^\\S+\\s*<\\S+>\\s*\".*\"(\\^\\^\\S+)?\\s*\\<\\S+>\\s*\\.")};
    private static TikaConfig config = null;
    private static Tika tika;
    private static MimeTypes types;

    public static boolean checkN3Format(InputStream inputStream) throws IOException {
        return findPattern(N3_PATTERNS, '.', inputStream);
    }

    public static boolean checkNQuadsFormat(InputStream inputStream) throws IOException {
        return findPattern(NQUADS_PATTERNS, '.', inputStream);
    }

    public static boolean checkTurtleFormat(InputStream inputStream) throws IOException {
        String extractDataSample = extractDataSample(inputStream, '.');
        RDFParser createParser = Rio.createParser(RDFFormat.TURTLE);
        createParser.setDatatypeHandling(RDFParser.DatatypeHandling.VERIFY);
        createParser.setStopAtFirstError(true);
        createParser.setVerifyData(true);
        try {
            createParser.parse(new ByteArrayInputStream(extractDataSample.getBytes()), "");
            return true;
        } catch (Exception e) {
            return false;
        }
    }

    public static boolean checkCSVFormat(InputStream inputStream) throws IOException {
        return CSVReaderBuilder.isCSV(inputStream);
    }

    private static boolean findPattern(Pattern[] patternArr, char c, InputStream inputStream) throws IOException {
        String extractDataSample = extractDataSample(inputStream, c);
        for (Pattern pattern : patternArr) {
            if (pattern.matcher(extractDataSample).find()) {
                return true;
            }
        }
        return false;
    }

    private static String extractDataSample(InputStream inputStream, char c) throws IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(inputStream));
        StringBuilder sb = new StringBuilder();
        boolean z = false;
        int i = 0;
        bufferedReader.mark(2048);
        while (true) {
            try {
                int read = bufferedReader.read();
                if (read == -1) {
                    break;
                }
                i++;
                if (i > 2048) {
                    break;
                }
                if (60 == read) {
                    z = true;
                } else if (62 == read) {
                    z = false;
                } else if (34 == read) {
                    z = !z;
                }
                sb.append((char) read);
                if (!z && c == read) {
                    break;
                }
            } finally {
                inputStream.reset();
                bufferedReader.reset();
            }
        }
        return sb.toString();
    }

    public TikaMIMETypeDetector(Purifier purifier) {
        this.purifier = purifier;
        InputStream resourceAsStream = getResourceAsStream();
        if (config == null) {
            try {
                config = new TikaConfig(resourceAsStream);
            } catch (Exception e) {
                throw new RuntimeException("Error while loading Tika configuration.", e);
            }
        }
        if (types == null) {
            types = config.getMimeRepository();
        }
        if (tika == null) {
            tika = new Tika(config);
        }
    }

    public TikaMIMETypeDetector() {
        this(new WhiteSpacesPurifier());
    }

    @Override // org.apache.any23.mime.MIMETypeDetector
    public MIMEType guessMIMEType(String str, InputStream inputStream, MIMEType mIMEType) {
        if (inputStream != null) {
            try {
                this.purifier.purify(inputStream);
            } catch (IOException e) {
                throw new RuntimeException("Error while purifying the provided input", e);
            }
        }
        Metadata metadata = new Metadata();
        if (mIMEType != null) {
            metadata.set("Content-Type", mIMEType.getFullType());
        }
        if (str != null) {
            metadata.set(TikaMetadataKeys.RESOURCE_NAME_KEY, str);
        }
        try {
            String guessMimeTypeByInputAndMeta = guessMimeTypeByInputAndMeta(inputStream, metadata);
            return MIMEType.parse(!"application/octet-stream".equals(guessMimeTypeByInputAndMeta) ? guessMimeTypeByInputAndMeta : checkN3Format(inputStream) ? RDFFormat.N3.getDefaultMIMEType() : checkNQuadsFormat(inputStream) ? RDFFormat.NQUADS.getDefaultMIMEType() : checkTurtleFormat(inputStream) ? RDFFormat.TURTLE.getDefaultMIMEType() : checkCSVFormat(inputStream) ? CSV_MIMETYPE : "application/octet-stream");
        } catch (IOException e2) {
            throw new RuntimeException("Error while retrieving mime type.", e2);
        }
    }

    private InputStream getResourceAsStream() {
        InputStream resourceAsStream = TikaMIMETypeDetector.class.getResourceAsStream(RESOURCE_NAME);
        if (resourceAsStream == null) {
            resourceAsStream = TikaMIMETypeDetector.class.getClassLoader().getResourceAsStream(RESOURCE_NAME);
            if (resourceAsStream == null) {
                resourceAsStream = ClassLoader.getSystemResourceAsStream(RESOURCE_NAME);
            }
        }
        return resourceAsStream;
    }

    private String guessMimeTypeByInputAndMeta(InputStream inputStream, Metadata metadata) throws IOException {
        MimeType mimeType;
        String detect;
        if (inputStream != null && (detect = tika.detect(inputStream)) != null && !isGenericMIMEType(detect)) {
            return detect;
        }
        String str = metadata.get("Content-Type");
        String str2 = null;
        if (str != null) {
            try {
                MimeType forName = types.forName(str);
                if (forName != null) {
                    if (!isPlainMIMEType(forName.getName())) {
                        return forName.getName();
                    }
                    str2 = forName.getName();
                }
            } catch (MimeTypeException e) {
            }
        }
        String str3 = metadata.get(TikaMetadataKeys.RESOURCE_NAME_KEY);
        return (str3 == null || (mimeType = types.getMimeType(str3)) == null) ? str2 != null ? str2 : "application/octet-stream" : mimeType.getName();
    }

    private boolean isPlainMIMEType(String str) {
        return str.equals("application/octet-stream") || str.equals("text/plain");
    }

    private boolean isGenericMIMEType(String str) {
        return isPlainMIMEType(str) || str.equals("application/xml");
    }
}
