package org.apache.droids.robot.crawler;

import java.io.IOException;
import java.net.URI;
import java.util.Collection;
import java.util.LinkedHashMap;
import org.apache.droids.api.ContentEntity;
import org.apache.droids.api.Link;
import org.apache.droids.api.ManagedContentEntity;
import org.apache.droids.api.Parse;
import org.apache.droids.api.Parser;
import org.apache.droids.api.Protocol;
import org.apache.droids.api.TaskValidator;
import org.apache.droids.api.Worker;
import org.apache.droids.exception.DroidsException;
import org.apache.droids.helper.factories.HandlerFactory;
import org.apache.droids.helper.factories.URLFiltersFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:org/apache/droids/robot/crawler/CrawlingWorker.class */
public class CrawlingWorker implements Worker<Link> {
    private final Logger log = LoggerFactory.getLogger(CrawlingWorker.class);
    private final CrawlingDroid droid;
    HandlerFactory handlerFactory;

    public CrawlingWorker(CrawlingDroid crawlingDroid) {
        this.droid = crawlingDroid;
    }

    @Override // org.apache.droids.api.Worker
    public void execute(Link link) throws DroidsException, IOException {
        String canonicalName = getClass().getCanonicalName();
        if (this.log.isDebugEnabled()) {
            this.log.debug("Starting " + canonicalName);
        }
        URI uri = link.getURI();
        Protocol protocol = this.droid.getProtocolFactory().getProtocol(uri);
        if (protocol == null) {
            if (this.log.isWarnEnabled()) {
                this.log.warn("Unsupported protocol scheme '" + uri.getScheme() + "'");
                return;
            }
            return;
        }
        if (!protocol.isAllowed(uri)) {
            if (this.log.isInfoEnabled()) {
                this.log.info("Stopping processing since bots are not allowed for " + uri);
                return;
            }
            return;
        }
        if (this.log.isInfoEnabled()) {
            this.log.info("Loading " + uri);
        }
        ManagedContentEntity load = protocol.load(uri);
        try {
            String mimeType = load.getMimeType();
            if (this.log.isDebugEnabled()) {
                this.log.debug("Content type " + mimeType);
            }
            if (mimeType == null) {
                this.log.info("Missing content type... can't parse...");
            } else {
                Parser parser = this.droid.getParserFactory().getParser(mimeType);
                if (parser != null) {
                    Parse parse = parser.parse(load, link);
                    if (parse.getOutlinks() != null) {
                        this.droid.getQueue().addAll(getFilteredOutlinks(parse));
                    }
                    load.setParse(parse);
                    handle(load, link);
                } else if (this.log.isDebugEnabled()) {
                    this.log.debug("Could not find parser for " + mimeType);
                }
            }
        } finally {
            load.finish();
        }
    }

    protected void handle(ContentEntity contentEntity, Link link) throws DroidsException, IOException {
        getHandlerFactory().handle(link.getURI(), contentEntity);
    }

    protected Collection<Link> getFilteredOutlinks(Parse parse) {
        URLFiltersFactory filtersFactory = this.droid.getFiltersFactory();
        TaskValidator<Link> linkValidator = this.droid.getLinkValidator();
        LinkedHashMap linkedHashMap = new LinkedHashMap();
        for (Link link : parse.getOutlinks()) {
            String id = link.getId();
            if (filtersFactory.accept(id) && !linkedHashMap.containsKey(id)) {
                if (linkValidator == null) {
                    linkedHashMap.put(id, link);
                } else if (linkValidator.validate(link)) {
                    linkedHashMap.put(id, link);
                }
            }
        }
        return linkedHashMap.values();
    }

    public HandlerFactory getHandlerFactory() {
        return this.handlerFactory;
    }

    public void setHandlerFactory(HandlerFactory handlerFactory) {
        this.handlerFactory = handlerFactory;
    }
}
