package gov.nasa.pds.validate.crawler;

import gov.nasa.pds.tools.validate.Target;
import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedHashSet;
import java.util.List;
import org.apache.commons.io.FilenameUtils;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;

/* loaded from: input_file:gov/nasa/pds/validate/crawler/URLCrawler.class */
public class URLCrawler extends Crawler {
    public URLCrawler(boolean z, List<String> list) {
        super(z, list);
    }

    @Override // gov.nasa.pds.validate.crawler.Crawler
    public List<Target> crawl(URL url) throws IOException {
        Document document = Jsoup.connect(url.toString()).get();
        LinkedHashSet linkedHashSet = new LinkedHashSet();
        Iterator it = document.select("a").iterator();
        while (it.hasNext()) {
            String attr = ((Element) it.next()).attr("abs:href");
            if (attr.contains(url.toString())) {
                if (FilenameUtils.getExtension(attr).length() == 3) {
                    if (this.fileFilter.accept(new File(attr))) {
                        linkedHashSet.add(new Target(new URL(attr), false));
                    }
                } else if (this.getDirectories && attr.indexOf(35) == -1 && attr.indexOf(63) == -1) {
                    URL url2 = new URL(attr);
                    if (!new File(url.getFile()).getParent().equalsIgnoreCase(new File(url2.getFile()).toString())) {
                        linkedHashSet.add(new Target(url2, true));
                    }
                }
            }
        }
        return new ArrayList(linkedHashSet);
    }
}
