package org.apache.any23;

import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.Locale;
import org.apache.any23.configuration.Configuration;
import org.apache.any23.configuration.DefaultConfiguration;
import org.apache.any23.extractor.ExtractionException;
import org.apache.any23.extractor.ExtractionParameters;
import org.apache.any23.extractor.ExtractorFactory;
import org.apache.any23.extractor.ExtractorGroup;
import org.apache.any23.extractor.ExtractorRegistryImpl;
import org.apache.any23.extractor.SingleDocumentExtraction;
import org.apache.any23.extractor.SingleDocumentExtractionReport;
import org.apache.any23.http.AcceptHeaderBuilder;
import org.apache.any23.http.DefaultHTTPClient;
import org.apache.any23.http.DefaultHTTPClientConfiguration;
import org.apache.any23.http.HTTPClient;
import org.apache.any23.mime.MIMETypeDetector;
import org.apache.any23.mime.TikaMIMETypeDetector;
import org.apache.any23.mime.purifier.WhiteSpacesPurifier;
import org.apache.any23.source.DocumentSource;
import org.apache.any23.source.FileDocumentSource;
import org.apache.any23.source.HTTPDocumentSource;
import org.apache.any23.source.LocalCopyFactory;
import org.apache.any23.source.MemCopyFactory;
import org.apache.any23.source.StringDocumentSource;
import org.apache.any23.writer.TripleHandler;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/any23/Any23.class */
public class Any23 {
    public static final String VERSION = DefaultConfiguration.singleton().getPropertyOrFail("any23.core.version");
    public static final String DEFAULT_HTTP_CLIENT_USER_AGENT = DefaultConfiguration.singleton().getPropertyOrFail("any23.http.user.agent.default");
    protected static final Logger logger = LoggerFactory.getLogger(Any23.class);
    private final Configuration configuration;
    private final String defaultUserAgent;
    private MIMETypeDetector mimeTypeDetector;
    private HTTPClient httpClient;
    private boolean httpClientInitialized;
    private final ExtractorGroup factories;
    private LocalCopyFactory streamCache;
    private String userAgent;

    public Any23(Configuration configuration, ExtractorGroup extractorGroup) {
        this.mimeTypeDetector = new TikaMIMETypeDetector(new WhiteSpacesPurifier());
        this.httpClient = new DefaultHTTPClient();
        this.httpClientInitialized = false;
        if (configuration == null) {
            throw new NullPointerException("configuration must be not null.");
        }
        this.configuration = configuration;
        if (logger.isDebugEnabled()) {
            logger.debug(configuration.getConfigurationDump());
        }
        this.defaultUserAgent = configuration.getPropertyOrFail("any23.http.user.agent.default");
        this.factories = extractorGroup == null ? ExtractorRegistryImpl.getInstance().getExtractorGroup() : extractorGroup;
        setCacheFactory(new MemCopyFactory());
    }

    public Any23(ExtractorGroup extractorGroup) {
        this((Configuration) DefaultConfiguration.singleton(), extractorGroup);
    }

    public Any23(Configuration configuration, String... strArr) {
        this(configuration, strArr == null ? null : ExtractorRegistryImpl.getInstance().getExtractorGroup(Arrays.asList(strArr)));
    }

    public Any23(String... strArr) {
        this((Configuration) DefaultConfiguration.singleton(), strArr);
    }

    public Any23(Configuration configuration) {
        this(configuration, (String[]) null);
    }

    public Any23() {
        this((Configuration) DefaultConfiguration.singleton());
    }

    public void setHTTPUserAgent(String str) {
        if (this.httpClientInitialized) {
            throw new IllegalStateException("Cannot change HTTP configuration after client has been initialized");
        }
        if (str == null) {
            str = this.defaultUserAgent;
        }
        if (str.trim().length() == 0) {
            throw new IllegalArgumentException(String.format(Locale.ROOT, "Invalid user agent: '%s'", str));
        }
        this.userAgent = str;
    }

    public String getHTTPUserAgent() {
        return this.userAgent;
    }

    public void setHTTPClient(HTTPClient hTTPClient) {
        if (hTTPClient == null) {
            throw new NullPointerException("httpClient cannot be null.");
        }
        if (this.httpClientInitialized) {
            throw new IllegalStateException("Cannot change HTTP configuration after client has been initialized");
        }
        this.httpClient = hTTPClient;
    }

    public HTTPClient getHTTPClient() throws IOException {
        if (!this.httpClientInitialized) {
            if (this.userAgent == null) {
                throw new IOException("Must call " + Any23.class.getSimpleName() + ".setHTTPUserAgent(String) before extracting from HTTP IRI");
            }
            this.httpClient.init(new DefaultHTTPClientConfiguration(getAcceptHeader()));
            this.httpClientInitialized = true;
        }
        return this.httpClient;
    }

    public void setCacheFactory(LocalCopyFactory localCopyFactory) {
        if (localCopyFactory == null) {
            throw new NullPointerException("cache cannot be null.");
        }
        this.streamCache = localCopyFactory;
    }

    public void setMIMETypeDetector(MIMETypeDetector mIMETypeDetector) {
        this.mimeTypeDetector = mIMETypeDetector;
    }

    public DocumentSource createDocumentSource(String str) throws URISyntaxException, IOException {
        if (str == null) {
            throw new NullPointerException("documentIRI cannot be null.");
        }
        if (str.toLowerCase(Locale.ROOT).startsWith("file:")) {
            return new FileDocumentSource(new File(new URI(str)));
        }
        if (str.toLowerCase(Locale.ROOT).startsWith("http:") || str.toLowerCase(Locale.ROOT).startsWith("https:")) {
            return new HTTPDocumentSource(getHTTPClient(), str);
        }
        throw new IllegalArgumentException(String.format(Locale.ROOT, "Unsupported protocol for document IRI: '%s' . Check that document IRI contains a protocol.", str));
    }

    public ExtractionReport extract(ExtractionParameters extractionParameters, DocumentSource documentSource, TripleHandler tripleHandler, String str) throws IOException, ExtractionException {
        SingleDocumentExtraction singleDocumentExtraction = new SingleDocumentExtraction(this.configuration, documentSource, this.factories, tripleHandler);
        singleDocumentExtraction.setMIMETypeDetector(this.mimeTypeDetector);
        singleDocumentExtraction.setLocalCopyFactory(this.streamCache);
        singleDocumentExtraction.setParserEncoding(str);
        SingleDocumentExtractionReport run = singleDocumentExtraction.run(extractionParameters);
        return new ExtractionReport(singleDocumentExtraction.getMatchingExtractors(), singleDocumentExtraction.getParserEncoding(), singleDocumentExtraction.getDetectedMIMEType(), run.getValidationReport(), run.getExtractorToIssues());
    }

    public ExtractionReport extract(String str, String str2, String str3, String str4, TripleHandler tripleHandler) throws IOException, ExtractionException {
        return extract(new StringDocumentSource(str, str2, str3, str4), tripleHandler);
    }

    public ExtractionReport extract(String str, String str2, TripleHandler tripleHandler) throws IOException, ExtractionException {
        return extract(new StringDocumentSource(str, str2), tripleHandler);
    }

    public ExtractionReport extract(File file, TripleHandler tripleHandler) throws IOException, ExtractionException {
        return extract(new FileDocumentSource(file), tripleHandler);
    }

    public ExtractionReport extract(ExtractionParameters extractionParameters, String str, TripleHandler tripleHandler) throws IOException, ExtractionException {
        try {
            return extract(extractionParameters, createDocumentSource(str), tripleHandler);
        } catch (URISyntaxException e) {
            throw new ExtractionException("Error while extracting data from document IRI.", e);
        }
    }

    public ExtractionReport extract(String str, TripleHandler tripleHandler) throws IOException, ExtractionException {
        return extract((ExtractionParameters) null, str, tripleHandler);
    }

    public ExtractionReport extract(DocumentSource documentSource, TripleHandler tripleHandler, String str) throws IOException, ExtractionException {
        return extract(null, documentSource, tripleHandler, str);
    }

    public ExtractionReport extract(DocumentSource documentSource, TripleHandler tripleHandler) throws IOException, ExtractionException {
        return extract(null, documentSource, tripleHandler, null);
    }

    public ExtractionReport extract(ExtractionParameters extractionParameters, DocumentSource documentSource, TripleHandler tripleHandler) throws IOException, ExtractionException {
        return extract(extractionParameters, documentSource, tripleHandler, null);
    }

    private String getAcceptHeader() {
        ArrayList arrayList = new ArrayList();
        Iterator it = this.factories.iterator();
        while (it.hasNext()) {
            arrayList.addAll(((ExtractorFactory) it.next()).getSupportedMIMETypes());
        }
        return new AcceptHeaderBuilder(arrayList).getAcceptHeader();
    }
}
