package org.apache.droids.parse.html;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.droids.LinkTask;
import org.apache.droids.api.Link;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

/* loaded from: input_file:org/apache/droids/parse/html/LinkExtractor.class */
public class LinkExtractor extends DefaultHandler {
    private static final String BASE_ELEMENT = "base";
    private static final String BASE_ATTRIBUTE = "href";
    private final Link base;
    private final Map<String, String> elements;
    private URI baseUri;
    protected final Log log = LogFactory.getLog(getClass());
    private ArrayList<Link> links = new ArrayList<>();
    private Set<String> history = null;
    private boolean checkBase = true;
    private URI link = null;
    private StringBuilder anchorText = new StringBuilder();

    public LinkExtractor(Link link, Map<String, String> map) {
        this.baseUri = null;
        this.base = link;
        this.elements = map;
        this.baseUri = link.getURI();
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startDocument() throws SAXException {
        this.history = new HashSet();
        this.history.add(this.base.getURI().toString());
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void startElement(String str, String str2, String str3, Attributes attributes) throws SAXException {
        if (this.checkBase && BASE_ELEMENT.equalsIgnoreCase(str2) && attributes.getValue(BASE_ATTRIBUTE) != null) {
            try {
                this.baseUri = new URI(attributes.getValue(BASE_ATTRIBUTE));
                this.log.debug("Found base URI: " + this.baseUri);
                this.checkBase = false;
            } catch (URISyntaxException e) {
                this.log.error("Base URI not valid: " + attributes.getValue(BASE_ATTRIBUTE));
            }
        }
        for (String str4 : this.elements.keySet()) {
            String str5 = this.elements.get(str4);
            if (str4.equalsIgnoreCase(str2) && attributes.getValue(str5) != null) {
                this.link = getURI(attributes.getValue(str5));
                this.log.debug("Found element: " + str4 + " with link: " + this.link);
                if (this.link != null) {
                    addOutlinkURI(this.link.toString());
                    this.link = null;
                    this.anchorText = new StringBuilder();
                }
            }
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void characters(char[] cArr, int i, int i2) {
        this.anchorText.append(cArr, i, i2);
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endElement(String str, String str2, String str3) {
        Iterator<String> it = this.elements.keySet().iterator();
        while (it.hasNext()) {
            if (it.next().equalsIgnoreCase(str2)) {
                addAnchorText(this.anchorText.toString());
            }
        }
    }

    @Override // org.xml.sax.helpers.DefaultHandler, org.xml.sax.ContentHandler
    public void endDocument() throws SAXException {
        this.history = null;
        this.log.debug("Found " + this.links.size() + " outliks");
    }

    private void addAnchorText(String str) {
        if (this.links.size() > 0) {
            LinkTask linkTask = (LinkTask) this.links.get(this.links.size() - 1);
            linkTask.setAnchorText(str.replaceAll("\\s+", " ").trim());
            this.log.debug("Adding anchor: " + linkTask.getAnchorText() + " on link: " + linkTask);
        }
    }

    public void addOutlinkURI(String str) {
        if (this.history == null) {
            this.history = new HashSet();
        }
        if (this.links == null) {
            this.links = new ArrayList<>();
        }
        if (this.history.add(this.link.toString())) {
            this.links.add(new LinkTask(this.base, this.link, this.base.getDepth() + 1));
            this.log.debug("Added outlink: " + this.link + " with depth: " + this.base.getDepth() + 1);
        }
    }

    public Collection<Link> getLinks() {
        return this.links;
    }

    public Map<String, String> getElements() {
        return this.elements;
    }

    private URI getURI(String str) {
        String replaceAll = str.replaceAll("\\s", "%20");
        try {
            if (!replaceAll.toLowerCase().startsWith("javascript") && !replaceAll.contains(":/")) {
                return this.baseUri.resolve(replaceAll.split("#")[0]);
            }
            if (replaceAll.toLowerCase().startsWith("javascript")) {
                return null;
            }
            return new URI(replaceAll.split("#")[0]);
        } catch (Exception e) {
            this.log.error("URI not valid: " + replaceAll);
            return null;
        }
    }
}
