package org.apache.tika.pipes.fetcher.http;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.InetAddress;
import java.net.MalformedURLException;
import java.net.URI;
import java.net.URL;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.StandardCopyOption;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.commons.io.IOUtils;
import org.apache.commons.io.output.UnsynchronizedByteArrayOutputStream;
import org.apache.http.ConnectionClosedException;
import org.apache.http.Header;
import org.apache.http.HttpConnection;
import org.apache.http.HttpEntity;
import org.apache.http.HttpHeaders;
import org.apache.http.HttpInetConnection;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.config.RequestConfig;
import org.apache.http.client.methods.CloseableHttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.client.protocol.HttpClientContext;
import org.apache.http.impl.conn.ConnectionShutdownException;
import org.apache.http.util.EntityUtils;
import org.apache.tika.client.HttpClientFactory;
import org.apache.tika.config.Field;
import org.apache.tika.config.Initializable;
import org.apache.tika.config.InitializableProblemHandler;
import org.apache.tika.config.Param;
import org.apache.tika.exception.TikaConfigException;
import org.apache.tika.exception.TikaException;
import org.apache.tika.exception.TikaTimeoutException;
import org.apache.tika.io.TemporaryResources;
import org.apache.tika.io.TikaInputStream;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.metadata.Property;
import org.apache.tika.pipes.fetcher.AbstractFetcher;
import org.apache.tika.pipes.fetcher.RangeFetcher;
import org.apache.tika.utils.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/tika/pipes/fetcher/http/HttpFetcher.class */
public class HttpFetcher extends AbstractFetcher implements Initializable, RangeFetcher {
    public static String HTTP_HEADER_PREFIX = "http-header:";
    public static String HTTP_FETCH_PREFIX = "http-connection:";
    public static Property HTTP_STATUS_CODE = Property.externalInteger(HTTP_HEADER_PREFIX + "status-code");
    public static Property HTTP_NUM_REDIRECTS = Property.externalInteger(HTTP_FETCH_PREFIX + "num-redirects");
    public static Property HTTP_TARGET_URL = Property.externalText(HTTP_FETCH_PREFIX + "target-url");
    public static Property HTTP_TARGET_IP_ADDRESS = Property.externalText(HTTP_FETCH_PREFIX + "target-ip-address");
    public static Property HTTP_FETCH_TRUNCATED = Property.externalBoolean(HTTP_FETCH_PREFIX + "fetch-truncated");
    public static Property HTTP_CONTENT_ENCODING = Property.externalText(HTTP_HEADER_PREFIX + "content-encoding");
    public static Property HTTP_CONTENT_TYPE = Property.externalText(HTTP_HEADER_PREFIX + "content-type");
    private static String USER_AGENT = "User-Agent";
    private HttpClient httpClient;
    private HttpClient noCompressHttpClient;
    Logger LOG = LoggerFactory.getLogger(HttpFetcher.class);
    private HttpClientFactory httpClientFactory = new HttpClientFactory();
    private int maxRedirects = 10;
    private long overallTimeout = -1;
    private long maxSpoolSize = -1;
    private int maxErrMsgSize = 10000;
    private Set<String> httpHeaders = new HashSet();
    private String userAgent = null;

    public InputStream fetch(String str, Metadata metadata) throws IOException, TikaException {
        HttpGet httpGet = new HttpGet(str);
        httpGet.setConfig(RequestConfig.custom().setMaxRedirects(this.maxRedirects).setRedirectsEnabled(true).build());
        if (!StringUtils.isBlank(this.userAgent)) {
            httpGet.setHeader(USER_AGENT, this.userAgent);
        }
        return execute(httpGet, metadata, this.httpClient, true);
    }

    public InputStream fetch(String str, long j, long j2, Metadata metadata) throws IOException {
        HttpGet httpGet = new HttpGet(str);
        if (!StringUtils.isBlank(this.userAgent)) {
            httpGet.setHeader(USER_AGENT, this.userAgent);
        }
        httpGet.setHeader(HttpHeaders.RANGE, "bytes=" + j + "-" + j2);
        return execute(httpGet, metadata, this.httpClient, true);
    }

    private InputStream execute(final HttpGet httpGet, Metadata metadata, HttpClient httpClient, boolean z) throws IOException {
        HttpClientContext create = HttpClientContext.create();
        HttpResponse httpResponse = null;
        final AtomicBoolean atomicBoolean = new AtomicBoolean(false);
        Timer timer = null;
        try {
            try {
                if (this.overallTimeout > -1) {
                    TimerTask timerTask = new TimerTask() { // from class: org.apache.tika.pipes.fetcher.http.HttpFetcher.1
                        @Override // java.util.TimerTask, java.lang.Runnable
                        public void run() {
                            atomicBoolean.set(true);
                            if (httpGet != null) {
                                httpGet.abort();
                            }
                        }
                    };
                    timer = new Timer(false);
                    timer.schedule(timerTask, this.overallTimeout);
                }
                HttpResponse execute = httpClient.execute(httpGet, create);
                updateMetadata(httpGet.getURI().toString(), execute, create, metadata);
                int statusCode = execute.getStatusLine().getStatusCode();
                if (statusCode < 200 || statusCode > 299) {
                    throw new IOException("bad status code: " + statusCode + " :: " + responseToString(execute));
                }
                InputStream content = execute.getEntity().getContent();
                Throwable th = null;
                try {
                    try {
                        InputStream spool = spool(content, metadata);
                        if (content != null) {
                            if (0 != 0) {
                                try {
                                    content.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                content.close();
                            }
                        }
                        if (timer != null) {
                            timer.cancel();
                            timer.purge();
                        }
                        if (execute != null) {
                            EntityUtils.consumeQuietly(execute.getEntity());
                        }
                        if (execute instanceof CloseableHttpResponse) {
                            ((CloseableHttpResponse) execute).close();
                        }
                        return spool;
                    } finally {
                    }
                } catch (Throwable th3) {
                    if (content != null) {
                        if (th != null) {
                            try {
                                content.close();
                            } catch (Throwable th4) {
                                th.addSuppressed(th4);
                            }
                        } else {
                            content.close();
                        }
                    }
                    throw th3;
                }
            } catch (Throwable th5) {
                if (0 != 0) {
                    timer.cancel();
                    timer.purge();
                }
                if (0 != 0) {
                    EntityUtils.consumeQuietly(httpResponse.getEntity());
                }
                if (httpResponse instanceof CloseableHttpResponse) {
                    ((CloseableHttpResponse) null).close();
                }
                throw th5;
            }
        } catch (ConnectionClosedException e) {
            if (!z || e.getMessage() == null || !e.getMessage().contains("Premature end of Content-Length delimited message")) {
                throw e;
            }
            this.LOG.warn("premature end of content-length delimited message; retrying with content compression disabled for {}", httpGet.getURI());
            InputStream execute2 = execute(httpGet, metadata, this.noCompressHttpClient, false);
            if (0 != 0) {
                timer.cancel();
                timer.purge();
            }
            if (0 != 0) {
                EntityUtils.consumeQuietly(httpResponse.getEntity());
            }
            if (httpResponse instanceof CloseableHttpResponse) {
                ((CloseableHttpResponse) null).close();
            }
            return execute2;
        } catch (IOException e2) {
            if (atomicBoolean.get()) {
                throw new TikaTimeoutException("Overall timeout after " + this.overallTimeout + "ms");
            }
            throw e2;
        }
    }

    private InputStream spool(InputStream inputStream, Metadata metadata) throws IOException {
        long currentTimeMillis = System.currentTimeMillis();
        TemporaryResources temporaryResources = new TemporaryResources();
        Path createTempFile = temporaryResources.createTempFile(metadata);
        if (this.maxSpoolSize < 0) {
            Files.copy(inputStream, createTempFile, StandardCopyOption.REPLACE_EXISTING);
        } else {
            OutputStream newOutputStream = Files.newOutputStream(createTempFile, new OpenOption[0]);
            Throwable th = null;
            try {
                try {
                    if (IOUtils.copyLarge(inputStream, newOutputStream, 0L, this.maxSpoolSize) == this.maxSpoolSize && inputStream.read() != -1) {
                        metadata.set(HTTP_FETCH_TRUNCATED, "true");
                    }
                    if (newOutputStream != null) {
                        if (0 != 0) {
                            try {
                                newOutputStream.close();
                            } catch (Throwable th2) {
                                th.addSuppressed(th2);
                            }
                        } else {
                            newOutputStream.close();
                        }
                    }
                } finally {
                }
            } catch (Throwable th3) {
                if (newOutputStream != null) {
                    if (th != null) {
                        try {
                            newOutputStream.close();
                        } catch (Throwable th4) {
                            th.addSuppressed(th4);
                        }
                    } else {
                        newOutputStream.close();
                    }
                }
                throw th3;
            }
        }
        this.LOG.debug("took {} ms to copy to local tmp file", Long.valueOf(System.currentTimeMillis() - currentTimeMillis));
        return TikaInputStream.get(createTempFile, metadata, temporaryResources);
    }

    private void updateMetadata(String str, HttpResponse httpResponse, HttpClientContext httpClientContext, Metadata metadata) {
        if (httpResponse == null) {
            return;
        }
        if (httpResponse.getStatusLine() != null) {
            metadata.set(HTTP_STATUS_CODE, httpResponse.getStatusLine().getStatusCode());
        }
        HttpEntity entity = httpResponse.getEntity();
        if (entity != null && entity.getContentEncoding() != null) {
            metadata.set(HTTP_CONTENT_ENCODING, entity.getContentEncoding().getValue());
        }
        if (entity != null && entity.getContentType() != null) {
            metadata.set(HTTP_CONTENT_TYPE, entity.getContentType().getValue());
        }
        for (String str2 : this.httpHeaders) {
            Header[] headers = httpResponse.getHeaders(str2);
            if (headers != null && headers.length > 0) {
                String str3 = HTTP_HEADER_PREFIX + str2;
                for (Header header : headers) {
                    metadata.add(str3, header.getValue());
                }
            }
        }
        List<URI> redirectLocations = httpClientContext.getRedirectLocations();
        if (redirectLocations == null) {
            metadata.set(HTTP_NUM_REDIRECTS, 0);
            metadata.set(HTTP_TARGET_URL, str);
        } else {
            metadata.set(HTTP_NUM_REDIRECTS, redirectLocations.size());
            try {
                URI uri = redirectLocations.get(redirectLocations.size() - 1);
                if (uri != null) {
                    URL url = uri.toURL();
                    metadata.set(HTTP_TARGET_URL, url.toString());
                    metadata.set("resourceName", url.getFile());
                }
            } catch (MalformedURLException e) {
            }
        }
        HttpConnection connection = httpClientContext.getConnection();
        if (connection instanceof HttpInetConnection) {
            try {
                InetAddress remoteAddress = ((HttpInetConnection) connection).getRemoteAddress();
                if (remoteAddress != null) {
                    metadata.set(HTTP_TARGET_IP_ADDRESS, remoteAddress.getHostAddress());
                }
            } catch (ConnectionShutdownException e2) {
                this.LOG.warn("connection shutdown while trying to get target URL: " + str);
            }
        }
    }

    private String responseToString(HttpResponse httpResponse) {
        try {
            if (httpResponse.getEntity() == null) {
                return "";
            }
            try {
                InputStream content = httpResponse.getEntity().getContent();
                Throwable th = null;
                try {
                    try {
                        UnsynchronizedByteArrayOutputStream unsynchronizedByteArrayOutputStream = new UnsynchronizedByteArrayOutputStream();
                        IOUtils.copyLarge(content, unsynchronizedByteArrayOutputStream, 0L, this.maxErrMsgSize);
                        String unsynchronizedByteArrayOutputStream2 = unsynchronizedByteArrayOutputStream.toString(StandardCharsets.UTF_8);
                        if (content != null) {
                            if (0 != 0) {
                                try {
                                    content.close();
                                } catch (Throwable th2) {
                                    th.addSuppressed(th2);
                                }
                            } else {
                                content.close();
                            }
                        }
                        EntityUtils.consumeQuietly(httpResponse.getEntity());
                        return unsynchronizedByteArrayOutputStream2;
                    } finally {
                    }
                } catch (Throwable th3) {
                    if (content != null) {
                        if (th != null) {
                            try {
                                content.close();
                            } catch (Throwable th4) {
                                th.addSuppressed(th4);
                            }
                        } else {
                            content.close();
                        }
                    }
                    throw th3;
                }
            } catch (IOException e) {
                this.LOG.warn("IOException trying to read error message", e);
                EntityUtils.consumeQuietly(httpResponse.getEntity());
                return "";
            } catch (NullPointerException e2) {
                EntityUtils.consumeQuietly(httpResponse.getEntity());
                return "";
            }
        } catch (Throwable th5) {
            EntityUtils.consumeQuietly(httpResponse.getEntity());
            throw th5;
        }
    }

    @Field
    public void setUserName(String str) {
        this.httpClientFactory.setUserName(str);
    }

    @Field
    public void setPassword(String str) {
        this.httpClientFactory.setPassword(str);
    }

    @Field
    public void setNtDomain(String str) {
        this.httpClientFactory.setNtDomain(str);
    }

    @Field
    public void setAuthScheme(String str) {
        this.httpClientFactory.setAuthScheme(str);
    }

    @Field
    public void setProxyHost(String str) {
        this.httpClientFactory.setProxyHost(str);
    }

    @Field
    public void setProxyPort(int i) {
        this.httpClientFactory.setProxyPort(i);
    }

    @Field
    public void setConnectTimeout(int i) {
        this.httpClientFactory.setConnectTimeout(i);
    }

    @Field
    public void setRequestTimeout(int i) {
        this.httpClientFactory.setRequestTimeout(i);
    }

    @Field
    public void setSocketTimeout(int i) {
        this.httpClientFactory.setSocketTimeout(i);
    }

    @Field
    public void setMaxConnections(int i) {
        this.httpClientFactory.setMaxConnections(i);
    }

    @Field
    public void setMaxConnectionsPerRoute(int i) {
        this.httpClientFactory.setMaxConnectionsPerRoute(i);
    }

    @Field
    public void setMaxSpoolSize(long j) {
        this.maxSpoolSize = j;
    }

    @Field
    public void setMaxRedirects(int i) {
        this.maxRedirects = i;
    }

    @Field
    public void setHttpHeaders(List<String> list) {
        this.httpHeaders.clear();
        this.httpHeaders.addAll(list);
    }

    @Field
    public void setOverallTimeout(long j) {
        this.overallTimeout = j;
    }

    @Field
    public void setMaxErrMsgSize(int i) {
        this.maxErrMsgSize = i;
    }

    @Field
    public void setUserAgent(String str) {
        this.userAgent = str;
    }

    public void initialize(Map<String, Param> map) throws TikaConfigException {
        this.httpClient = this.httpClientFactory.build();
        HttpClientFactory copy = this.httpClientFactory.copy();
        copy.setDisableContentCompression(true);
        this.noCompressHttpClient = copy.build();
    }

    public void checkInitialization(InitializableProblemHandler initializableProblemHandler) throws TikaConfigException {
    }

    void setHttpClientFactory(HttpClientFactory httpClientFactory) {
        this.httpClientFactory = httpClientFactory;
    }
}
