package gov.nasa.pds.harvest.search.crawler;

import gov.nasa.jpl.oodt.cas.crawl.ProductCrawler;
import gov.nasa.jpl.oodt.cas.crawl.action.CrawlerAction;
import gov.nasa.jpl.oodt.cas.crawl.action.CrawlerActionRepo;
import gov.nasa.jpl.oodt.cas.metadata.Metadata;
import gov.nasa.jpl.oodt.cas.metadata.exceptions.MetExtractionException;
import gov.nasa.pds.harvest.search.constants.Constants;
import gov.nasa.pds.harvest.search.crawler.actions.LidCheckerAction;
import gov.nasa.pds.harvest.search.crawler.actions.LogMissingReqMetadataAction;
import gov.nasa.pds.harvest.search.crawler.actions.TitleLengthCheckerAction;
import gov.nasa.pds.harvest.search.crawler.metadata.extractor.BundleMetExtractor;
import gov.nasa.pds.harvest.search.crawler.metadata.extractor.CollectionMetExtractor;
import gov.nasa.pds.harvest.search.crawler.metadata.extractor.Pds4MetExtractor;
import gov.nasa.pds.harvest.search.crawler.metadata.extractor.Pds4MetExtractorConfig;
import gov.nasa.pds.harvest.search.doc.SearchDocState;
import gov.nasa.pds.harvest.search.logging.ToolsLevel;
import gov.nasa.pds.harvest.search.logging.ToolsLogRecord;
import gov.nasa.pds.harvest.search.policy.DirectoryFilter;
import gov.nasa.pds.harvest.search.policy.FileFilter;
import gov.nasa.pds.harvest.search.stats.HarvestSolrStats;
import gov.nasa.pds.harvest.search.util.LidVid;
import gov.nasa.pds.harvest.search.util.XMLExtractor;
import java.io.File;
import java.net.MalformedURLException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
import net.sf.saxon.trans.XPathException;
import org.apache.commons.io.filefilter.AndFileFilter;
import org.apache.commons.io.filefilter.FileFilterUtils;
import org.apache.commons.io.filefilter.NotFileFilter;
import org.xml.sax.SAXParseException;

/* loaded from: input_file:gov/nasa/pds/harvest/search/crawler/PDSProductCrawler.class */
public class PDSProductCrawler extends ProductCrawler {
    private static Logger log = Logger.getLogger(PDSProductCrawler.class.getName());
    private Pds4MetExtractorConfig metExtractorConfig;
    private List<CrawlerAction> crawlerActions;
    private String objectType;
    protected boolean inPersistanceMode;
    protected Map<File, Long> touchedFiles;
    private SearchDocState searchDocState;

    public PDSProductCrawler() {
        this(null);
    }

    public PDSProductCrawler(Pds4MetExtractorConfig pds4MetExtractorConfig) {
        this.objectType = "";
        this.metExtractorConfig = pds4MetExtractorConfig;
        this.crawlerActions = new ArrayList();
        this.inPersistanceMode = false;
        this.touchedFiles = new HashMap();
        setRequiredMetadata(Arrays.asList("version_id", "logical_identifier", Constants.OBJECT_TYPE));
        ArrayList arrayList = new ArrayList();
        arrayList.add(FileFilterUtils.fileFileFilter());
        arrayList.add(new WildcardOSFilter("*"));
        this.FILE_FILTER = new AndFileFilter(arrayList);
        this.crawlerActions.add(new LogMissingReqMetadataAction(getRequiredMetadata()));
        this.crawlerActions.add(new LidCheckerAction());
        this.crawlerActions.add(new TitleLengthCheckerAction());
    }

    public Pds4MetExtractorConfig getMetExtractorConfig() {
        return this.metExtractorConfig;
    }

    public void setMetExtractorConfig(Pds4MetExtractorConfig pds4MetExtractorConfig) {
        this.metExtractorConfig = pds4MetExtractorConfig;
    }

    public void setInPersistanceMode(boolean z) {
        this.inPersistanceMode = z;
    }

    public void setFileFilter(FileFilter fileFilter) {
        ArrayList arrayList = new ArrayList();
        arrayList.add(FileFilterUtils.fileFileFilter());
        if (fileFilter != null && !fileFilter.getInclude().isEmpty()) {
            arrayList.add(new WildcardOSFilter(fileFilter.getInclude()));
        } else if (fileFilter != null && !fileFilter.getExclude().isEmpty()) {
            arrayList.add(new NotFileFilter(new WildcardOSFilter(fileFilter.getExclude())));
        }
        this.FILE_FILTER = new AndFileFilter(arrayList);
    }

    public void setDirectoryFilter(DirectoryFilter directoryFilter) {
        if (directoryFilter.getExclude().isEmpty()) {
            return;
        }
        ArrayList arrayList = new ArrayList();
        arrayList.add(FileFilterUtils.directoryFileFilter());
        arrayList.add(new NotFileFilter(new WildcardOSFilter(directoryFilter.getExclude())));
        this.DIR_FILTER = new AndFileFilter(arrayList);
    }

    protected void addKnownMetadata(File file, Metadata metadata) {
    }

    public void crawl(File file) {
        CrawlerActionRepo crawlerActionRepo = new CrawlerActionRepo();
        crawlerActionRepo.loadActions(this.crawlerActions);
        setActionRepo(crawlerActionRepo);
        try {
            super.crawl(file);
        } catch (IllegalArgumentException e) {
            log.log(new ToolsLogRecord(ToolsLevel.SEVERE, e.getMessage()));
        }
    }

    public void addAction(CrawlerAction crawlerAction) {
        this.crawlerActions.add(crawlerAction);
    }

    public void addActions(List<CrawlerAction> list) {
        this.crawlerActions.addAll(list);
    }

    public List<CrawlerAction> getActions() {
        return this.crawlerActions;
    }

    protected Metadata getMetadataForProduct(File file) {
        try {
            return (this.objectType.equalsIgnoreCase(Constants.BUNDLE) ? new BundleMetExtractor(this.metExtractorConfig) : this.objectType.equalsIgnoreCase(Constants.COLLECTION) ? new CollectionMetExtractor(this.metExtractorConfig) : new Pds4MetExtractor(this.metExtractorConfig)).extractMetadata(file);
        } catch (MetExtractionException e) {
            log.log(new ToolsLogRecord(ToolsLevel.SEVERE, "Error while gathering metadata: " + e.getMessage(), file));
            return new Metadata();
        }
    }

    protected boolean passesPreconditions(File file) {
        boolean z;
        if (this.inPersistanceMode) {
            if (this.touchedFiles.containsKey(file)) {
                if (file.lastModified() == this.touchedFiles.get(file).longValue()) {
                    return false;
                }
                this.touchedFiles.put(file, Long.valueOf(file.lastModified()));
            } else {
                this.touchedFiles.put(file, Long.valueOf(file.lastModified()));
            }
        }
        if (Constants.collections.contains(file)) {
            return false;
        }
        log.log(new ToolsLogRecord(ToolsLevel.DEBUG, "Begin processing.", file));
        boolean z2 = true;
        this.objectType = "";
        XMLExtractor xMLExtractor = new XMLExtractor();
        try {
            xMLExtractor.parse(file);
        } catch (XPathException e) {
            if (e.getException() instanceof SAXParseException) {
                SAXParseException sAXParseException = (SAXParseException) e.getException();
                log.log(new ToolsLogRecord(ToolsLevel.SEVERE, sAXParseException.getMessage(), file.toString(), sAXParseException.getLineNumber()));
            } else {
                log.log(new ToolsLogRecord(ToolsLevel.SEVERE, "Parse failure: " + e.getMessage(), file));
            }
            z2 = false;
        }
        if (!z2) {
            HarvestSolrStats.numBadFiles++;
            return false;
        }
        try {
            String valueFromDoc = xMLExtractor.getValueFromDoc(Constants.coreXpathsMap.get("logical_identifier"));
            String valueFromDoc2 = xMLExtractor.getValueFromDoc(Constants.coreXpathsMap.get("version_id"));
            int indexOf = Constants.nonPrimaryMembers.indexOf(new LidVid(valueFromDoc));
            if (indexOf != -1) {
                LidVid lidVid = Constants.nonPrimaryMembers.get(indexOf);
                if (!lidVid.hasVersion()) {
                    log.log(new ToolsLogRecord(ToolsLevel.SKIP, "Not a primary member.", file));
                    HarvestSolrStats.numFilesSkipped++;
                    return false;
                }
                if (lidVid.getVersion().equals(valueFromDoc2)) {
                    log.log(new ToolsLogRecord(ToolsLevel.SKIP, "Not a primary member.", file));
                    HarvestSolrStats.numFilesSkipped++;
                    return false;
                }
            }
            try {
                this.objectType = xMLExtractor.getValueFromDoc(Constants.coreXpathsMap.get("product_class"));
                if ("".equals(this.objectType)) {
                    log.log(new ToolsLogRecord(ToolsLevel.SKIP, "No product_class element found.", file));
                    HarvestSolrStats.numFilesSkipped++;
                    z = false;
                } else if (this.metExtractorConfig.hasObjectType(this.objectType)) {
                    HarvestSolrStats.numGoodFiles++;
                    z = true;
                } else {
                    log.log(new ToolsLogRecord(ToolsLevel.SKIP, "'" + this.objectType + "' is not an object type found in the policy file.", file));
                    HarvestSolrStats.numFilesSkipped++;
                    z = false;
                }
                return z;
            } catch (Exception e2) {
                log.log(new ToolsLogRecord(ToolsLevel.SEVERE, "Problem getting 'product_class': " + e2.getMessage(), file));
                HarvestSolrStats.numBadFiles++;
                return false;
            }
        } catch (Exception e3) {
            log.log(new ToolsLogRecord(ToolsLevel.SEVERE, "Problem extracting LIDVID: " + e3.getMessage(), file));
            HarvestSolrStats.numBadFiles++;
            return false;
        }
    }

    public void setSearchUrl(String str) throws MalformedURLException {
        setFilemgrUrl(str);
    }

    public void setCounter(SearchDocState searchDocState) {
        this.searchDocState = searchDocState;
    }
}
