package org.apache.streams.urls;

import com.google.common.base.Preconditions;
import com.google.common.base.Strings;
import java.io.IOException;
import java.io.Serializable;
import java.net.HttpURLConnection;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.commons.codec.net.URLCodec;
import org.apache.commons.validator.routines.UrlValidator;
import org.apache.streams.urls.LinkDetails;
import org.joda.time.DateTime;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/streams/urls/LinkResolver.class */
public class LinkResolver implements Serializable {
    private static final int MAX_ALLOWED_REDIRECTS = 30;
    private static final int DEFAULT_HTTP_TIMEOUT = 10000;
    private static final String LOCATION_IDENTIFIER = "location";
    private static final String SET_COOKIE_IDENTIFIER = "set-cookie";
    private static final Logger LOGGER = LoggerFactory.getLogger(LinkResolver.class);
    private static final Map<String, String> SPOOF_HTTP_HEADERS = new HashMap<String, String>() { // from class: org.apache.streams.urls.LinkResolver.1
        {
            put("Connection", "Keep-Alive");
            put("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_9_0) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/31.0.1650.48 Safari/537.36");
            put("Accept-Language", "en-US,en;q=0.8,zh;q=0.6");
        }
    };
    private static final Collection<String> BOTS_ARE_OK = new ArrayList<String>() { // from class: org.apache.streams.urls.LinkResolver.2
        {
            add("t.co");
        }
    };
    private static final Collection<String> URL_TRACKING_TO_REMOVE = new ArrayList<String>() { // from class: org.apache.streams.urls.LinkResolver.3
        {
            add("([\\?&])utm_source(=)[^&?]*");
            add("([\\?&])utm_medium(=)[^&?]*");
            add("([\\?&])utm_term(=)[^&?]*");
            add("([\\?&])utm_content(=)[^&?]*");
            add("([\\?&])utm_campaign(=)[^&?]*");
        }
    };
    private Collection<String> domainsSensitiveTo = new HashSet();
    private final LinkDetails linkDetails = new LinkDetails();

    public LinkDetails getLinkDetails() {
        return this.linkDetails;
    }

    public LinkResolver(String str) {
        this.linkDetails.setOriginalURL(str);
    }

    public void run() {
        Preconditions.checkNotNull(this.linkDetails.getOriginalURL());
        this.linkDetails.setStartTime(DateTime.now());
        for (int i = 0; i < 3 && this.linkDetails.getFinalURL() == null; i++) {
            if (this.linkDetails.getLinkStatus() != LinkDetails.LinkStatus.SUCCESS) {
                unwindLink(this.linkDetails.getOriginalURL());
            }
        }
        if (this.linkDetails.getRedirectCount().longValue() == 0 || this.linkDetails.getRedirected() == null) {
            this.linkDetails.setRedirected(false);
        }
        this.linkDetails.setFinalURL(cleanURL(this.linkDetails.getFinalURL()));
        if (!Strings.isNullOrEmpty(this.linkDetails.getFinalURL())) {
            this.linkDetails.setNormalizedURL(normalizeURL(this.linkDetails.getFinalURL()));
        }
        if (!Strings.isNullOrEmpty(this.linkDetails.getNormalizedURL())) {
            this.linkDetails.setUrlParts(tokenizeURL(this.linkDetails.getNormalizedURL()));
        }
        updateTookInMillis();
    }

    protected void updateTookInMillis() {
        Preconditions.checkNotNull(this.linkDetails.getStartTime());
        this.linkDetails.setTookInMills(Long.valueOf(DateTime.now().minus(this.linkDetails.getStartTime().getMillis()).getMillis()));
    }

    public void unwindLink(String str) {
        Preconditions.checkNotNull(this.linkDetails);
        Preconditions.checkNotNull(str);
        if (!new UrlValidator().isValid(str)) {
            this.linkDetails.setLinkStatus(LinkDetails.LinkStatus.MALFORMED_URL);
            return;
        }
        if ((this.linkDetails.getRedirectCount() != null && this.linkDetails.getRedirectCount().longValue() > 0 && (this.linkDetails.getOriginalURL().equals(str) || this.linkDetails.getRedirects().contains(str))) || (this.linkDetails.getRedirectCount() != null && this.linkDetails.getRedirectCount().longValue() > 30)) {
            this.linkDetails.setLinkStatus(LinkDetails.LinkStatus.LOOP);
            return;
        }
        if (!this.linkDetails.getOriginalURL().equals(str)) {
            this.linkDetails.getRedirects().add(str);
        }
        HttpURLConnection httpURLConnection = null;
        String str2 = null;
        try {
            try {
                URL url = new URL(str);
                try {
                    String lowerCase = url.getHost().toLowerCase();
                    if (!this.domainsSensitiveTo.contains(lowerCase)) {
                        this.domainsSensitiveTo.add(lowerCase);
                        long waitTimeForDomain = LinkResolverHelperFunctions.waitTimeForDomain(url.getHost());
                        if (waitTimeForDomain > 0) {
                            LOGGER.debug("Waiting for domain: {}", Long.valueOf(waitTimeForDomain));
                            Thread.sleep(waitTimeForDomain);
                        }
                    }
                } catch (Exception e) {
                }
                HttpURLConnection httpURLConnection2 = (HttpURLConnection) new URL(str).openConnection();
                if (!BOTS_ARE_OK.contains(url.getHost())) {
                    httpURLConnection2.addRequestProperty("Host", url.getHost());
                    for (String str3 : SPOOF_HTTP_HEADERS.keySet()) {
                        httpURLConnection2.addRequestProperty(str3, SPOOF_HTTP_HEADERS.get(str3));
                    }
                    if (this.linkDetails.getRedirectCount().longValue() > 0 && BOTS_ARE_OK.contains(url.getHost())) {
                        httpURLConnection2.addRequestProperty("Referrer", this.linkDetails.getOriginalURL());
                    }
                }
                httpURLConnection2.setReadTimeout(DEFAULT_HTTP_TIMEOUT);
                httpURLConnection2.setConnectTimeout(DEFAULT_HTTP_TIMEOUT);
                httpURLConnection2.setInstanceFollowRedirects(false);
                if (this.linkDetails.getCookies() != null) {
                    Iterator<String> it = this.linkDetails.getCookies().iterator();
                    while (it.hasNext()) {
                        httpURLConnection2.addRequestProperty("Cookie", it.next().split(";", 1)[0]);
                    }
                }
                httpURLConnection2.connect();
                this.linkDetails.setFinalResponseCode(Long.valueOf(httpURLConnection2.getResponseCode()));
                Map<String, List<String>> createCaseInsensitiveMap = createCaseInsensitiveMap(httpURLConnection2.getHeaderFields());
                if (createCaseInsensitiveMap.containsKey(SET_COOKIE_IDENTIFIER)) {
                    this.linkDetails.getCookies().add(createCaseInsensitiveMap.get(SET_COOKIE_IDENTIFIER).get(0));
                }
                switch (this.linkDetails.getFinalResponseCode().intValue()) {
                    case 200:
                        this.linkDetails.setFinalURL(httpURLConnection2.getURL().toString());
                        this.linkDetails.setDomain(new URL(this.linkDetails.getFinalURL()).getHost());
                        this.linkDetails.setLinkStatus(LinkDetails.LinkStatus.SUCCESS);
                        break;
                    case 300:
                    case 301:
                    case 302:
                    case 303:
                    case 304:
                    case 306:
                    case 307:
                        if (!this.linkDetails.getOriginalURL().toLowerCase().equals(httpURLConnection2.getURL().toString().toLowerCase())) {
                            this.linkDetails.setFinalURL(httpURLConnection2.getURL().toString());
                        }
                        if (createCaseInsensitiveMap.containsKey(LOCATION_IDENTIFIER)) {
                            this.linkDetails.setRedirected(Boolean.TRUE);
                            this.linkDetails.setRedirectCount(Long.valueOf(this.linkDetails.getRedirectCount().longValue() + 1));
                            str2 = httpURLConnection2.getHeaderField(LOCATION_IDENTIFIER);
                            break;
                        } else {
                            LOGGER.info("Headers: {}", createCaseInsensitiveMap);
                            this.linkDetails.setLinkStatus(LinkDetails.LinkStatus.REDIRECT_ERROR);
                            break;
                        }
                    case 305:
                        break;
                    case 401:
                        this.linkDetails.setLinkStatus(LinkDetails.LinkStatus.UNAUTHORIZED);
                        break;
                    case 403:
                        this.linkDetails.setLinkStatus(LinkDetails.LinkStatus.FORBIDDEN);
                        break;
                    case 404:
                        this.linkDetails.setLinkStatus(LinkDetails.LinkStatus.NOT_FOUND);
                        break;
                    case 500:
                    case 501:
                    case 502:
                    case 503:
                    case 504:
                    case 505:
                        this.linkDetails.setLinkStatus(LinkDetails.LinkStatus.HTTP_ERROR_STATUS);
                        break;
                    default:
                        LOGGER.info("Unrecognized HTTP Response Code: {}", this.linkDetails.getFinalResponseCode());
                        this.linkDetails.setLinkStatus(LinkDetails.LinkStatus.NOT_FOUND);
                        break;
                }
                if (httpURLConnection2 != null) {
                    httpURLConnection2.disconnect();
                }
            } catch (MalformedURLException e2) {
                this.linkDetails.setLinkStatus(LinkDetails.LinkStatus.MALFORMED_URL);
                if (0 != 0) {
                    httpURLConnection.disconnect();
                }
            } catch (IOException e3) {
                this.linkDetails.setLinkStatus(LinkDetails.LinkStatus.ERROR);
                if (0 != 0) {
                    httpURLConnection.disconnect();
                }
            } catch (Exception e4) {
                this.linkDetails.setLinkStatus(LinkDetails.LinkStatus.EXCEPTION);
                if (0 != 0) {
                    httpURLConnection.disconnect();
                }
            }
            if (str2 != null) {
                unwindLink(str2);
            }
        } catch (Throwable th) {
            if (0 != 0) {
                httpURLConnection.disconnect();
            }
            throw th;
        }
    }

    private Map<String, List<String>> createCaseInsensitiveMap(Map<String, List<String>> map) {
        HashMap hashMap = new HashMap();
        for (String str : map.keySet()) {
            if (str != null && map.get(str) != null) {
                hashMap.put(str.toLowerCase(), map.get(str));
            }
        }
        return hashMap;
    }

    private String cleanURL(String str) {
        if (str == null) {
            return null;
        }
        int length = str.length();
        Iterator<String> it = URL_TRACKING_TO_REMOVE.iterator();
        while (it.hasNext()) {
            str = str.replaceAll(it.next(), "");
        }
        if (str.length() < length) {
            this.linkDetails.setTracked(Boolean.TRUE);
        }
        return str;
    }

    public static String normalizeURL(String str) {
        String str2 = str;
        try {
            str2 = new URLCodec().decode(str);
            if (str2.contains("://")) {
                str2 = str2.split(":/{2}")[1];
            }
        } catch (NullPointerException e) {
            System.err.println("NPE Decoding URL. Decoding skipped.");
            e.printStackTrace();
        } catch (Throwable th) {
            System.err.println("Misc error Decoding URL. Decoding skipped.");
            th.printStackTrace();
        }
        return str2;
    }

    public static List<String> tokenizeURL(String str) {
        String normalizeURL = normalizeURL(str);
        ArrayList arrayList = new ArrayList();
        for (String str2 : normalizeURL.split("/+")) {
            arrayList.add(str2.toLowerCase());
        }
        return arrayList;
    }
}
