package nyla.solutions.global.patterns.creational.builder.mapped;

import java.net.MalformedURLException;
import java.net.URL;
import java.net.URLConnection;
import java.util.HashSet;
import java.util.Set;
import nyla.solutions.global.exception.SystemException;
import nyla.solutions.global.net.http.HTTP;
import nyla.solutions.global.patterns.cache.CacheFarm;
import nyla.solutions.global.patterns.command.commas.CommasConstants;
import nyla.solutions.global.util.Debugger;
import nyla.solutions.global.util.Text;
import nyla.solutions.global.xml.DOM4J;
import nyla.solutions.global.xml.DomJsoupHtmlStragety;
import org.w3c.dom.Document;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.NodeList;

/* loaded from: input_file:nyla/solutions/global/patterns/creational/builder/mapped/WebCrawlerMappedKeyDirector.class */
public class WebCrawlerMappedKeyDirector<K, V> extends MappedKeyDirector<K, V> {
    private String xmlCacheKey = "xml";
    private String documentCacheKey = "document";
    private Set<Object> crawledPaths = new HashSet();
    private String ignorePathRegExp = CommasConstants.ROOT_SERVICE_NAME;

    @Override // nyla.solutions.global.patterns.creational.builder.mapped.MappedKeyDirector
    public void constructDocument(String str, MappedKeyEngineer<K, V> mappedKeyEngineer) {
        CacheFarm.getCache().remove(this.documentCacheKey);
        try {
            crawl(str, mappedKeyEngineer);
            CacheFarm.getCache().remove(this.documentCacheKey);
            this.crawledPaths.clear();
        } catch (Throwable th) {
            CacheFarm.getCache().remove(this.documentCacheKey);
            this.crawledPaths.clear();
            throw th;
        }
    }

    protected void crawl(String str, MappedKeyEngineer<K, V> mappedKeyEngineer) {
        Debugger.println(this, "checked =" + str);
        if (Text.matches(str, this.ignorePathRegExp) || this.crawledPaths.contains(str)) {
            Debugger.println(this, "skipped =" + str);
            return;
        }
        Debugger.println(this, "processing =" + str);
        try {
            URL url = new URL(str);
            URLConnection openConnection = url.openConnection();
            String contentType = openConnection.getContentType();
            Document document = new DomJsoupHtmlStragety().toDocument(openConnection.getInputStream());
            CacheFarm.getCache().put(this.documentCacheKey, document);
            CacheFarm.getCache().put(this.xmlCacheKey, new DOM4J(document));
            mappedKeyEngineer.construct(str, super.constructMapToText(str));
            if (HTTP.isHtmlContentType(contentType)) {
                NodeList elementsByTagName = document.getElementsByTagName("a");
                for (int i = 0; i < elementsByTagName.getLength(); i++) {
                    NamedNodeMap attributes = elementsByTagName.item(i).getAttributes();
                    if (attributes != null) {
                        String nodeValue = attributes.getNamedItem("href").getNodeValue();
                        if (!mustSkip(nodeValue)) {
                            try {
                                crawl(HTTP.toURL(url, nodeValue).toString(), mappedKeyEngineer);
                            } catch (MalformedURLException e) {
                                Debugger.printWarn(this, "skipped parent url=" + url + " link=" + nodeValue + " " + e);
                            }
                        }
                    }
                }
            }
        } catch (Exception e2) {
            throw new SystemException("unable to process url=" + str + " error=" + Debugger.stackTrace(e2));
        }
    }

    private boolean mustSkip(String str) {
        if (!Text.matches(str, this.ignorePathRegExp) && !this.crawledPaths.contains(str)) {
            return false;
        }
        Debugger.println(this, "skipped =" + str);
        return true;
    }

    public String getXmlCacheKey() {
        return this.xmlCacheKey;
    }

    public void setXmlCacheKey(String str) {
        this.xmlCacheKey = str;
    }
}
