package gov.nasa.pds.harvest.crawler;

import gov.nasa.pds.harvest.cfg.model.BundleCfg;
import gov.nasa.pds.harvest.cfg.model.Configuration;
import gov.nasa.pds.harvest.dao.RegistryManager;
import gov.nasa.pds.harvest.util.out.WriterManager;
import gov.nasa.pds.harvest.util.xml.XmlIs;
import gov.nasa.pds.registry.common.meta.Metadata;
import gov.nasa.pds.registry.common.util.CloseUtils;
import gov.nasa.pds.registry.common.util.xml.XmlDomUtils;
import gov.nasa.pds.registry.common.util.xml.XmlNamespaces;
import java.io.File;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Iterator;
import java.util.Set;
import java.util.function.BiPredicate;
import java.util.stream.Stream;
import org.w3c.dom.Document;

/* loaded from: input_file:BOOT-INF/classes/gov/nasa/pds/harvest/crawler/ProductProcessor.class */
public class ProductProcessor extends BaseProcessor {

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:BOOT-INF/classes/gov/nasa/pds/harvest/crawler/ProductProcessor$FileMatcher.class */
    public static class FileMatcher implements BiPredicate<Path, BasicFileAttributes> {
        private Set<String> includeDirs;
        private int startIndex;

        public FileMatcher(BundleCfg bundleCfg) {
            this.includeDirs = bundleCfg.productDirs;
            this.startIndex = new File(bundleCfg.dir).toPath().toUri().toString().length();
        }

        @Override // java.util.function.BiPredicate
        public boolean test(Path path, BasicFileAttributes basicFileAttributes) {
            if (!XmlIs.aLabel(path.toString())) {
                return false;
            }
            if (this.includeDirs == null) {
                return true;
            }
            String lowerCase = path.getParent().toUri().toString().toLowerCase();
            Iterator<String> it = this.includeDirs.iterator();
            while (it.hasNext()) {
                if (lowerCase.indexOf(it.next(), this.startIndex) >= 0) {
                    return true;
                }
            }
            return false;
        }
    }

    public ProductProcessor(Configuration configuration) throws Exception {
        super(configuration);
    }

    public void process(BundleCfg bundleCfg) throws Exception {
        this.log.info("Processing products...");
        Stream<Path> stream = null;
        try {
            stream = Files.find(new File(bundleCfg.dir).toPath(), 20, new FileMatcher(bundleCfg), FileVisitOption.FOLLOW_LINKS);
            Iterator<Path> it = stream.iterator();
            while (it.hasNext()) {
                onFile(it.next().toFile());
            }
            CloseUtils.close(stream);
        } catch (Throwable th) {
            CloseUtils.close(stream);
            throw th;
        }
    }

    public void onFile(File file) throws Exception {
        Counter counter = RegistryManager.getInstance().getCounter();
        try {
            if (file.length() > 10000000) {
                this.log.warn("File is too big to parse: " + file.getAbsolutePath());
                counter.skippedFileCount++;
                return;
            }
            Document readXml = XmlDomUtils.readXml(this.dbf, file);
            String nodeName = readXml.getDocumentElement().getNodeName();
            if ("Product_Bundle".equals(nodeName) || "Product_Collection".equals(nodeName)) {
                return;
            }
            if (this.config.filters.prodClassInclude != null) {
                if (!this.config.filters.prodClassInclude.contains(nodeName)) {
                    return;
                }
            } else if (this.config.filters.prodClassExclude != null && this.config.filters.prodClassExclude.contains(nodeName)) {
                return;
            }
            try {
                processMetadata(file, readXml);
            } catch (Exception e) {
                this.log.error(e.getMessage());
                counter.failedFileCount++;
            }
        } catch (Exception e2) {
            this.log.warn(e2.getMessage());
            counter.failedFileCount++;
        }
    }

    private void processMetadata(File file, Document document) throws Exception {
        Counter counter = RegistryManager.getInstance().getCounter();
        Metadata extract = this.basicExtractor.extract(file, document);
        extract.setNodeName(this.config.nodeName);
        LidVidCache prodRefsCache = RefsCache.getInstance().getProdRefsCache();
        boolean z = (prodRefsCache.containsLidVid(extract.lidvid) || prodRefsCache.containsLid(extract.lid)) ? false : true;
        boolean isOverwrite = RegistryManager.getInstance().isOverwrite();
        if (z && !isOverwrite) {
            this.log.info("Skipping product " + file.getAbsolutePath() + " (LIDVID/LID is not in collection inventory or already exists in registry database)");
            counter.skippedFileCount++;
            return;
        }
        this.log.info("Processing product " + file.getAbsolutePath());
        this.refExtractor.addRefs(extract.intRefs, document);
        this.xpathExtractor.extract(document, extract.fields);
        XmlNamespaces extract2 = this.autogenExtractor.extract(file, extract.fields);
        this.searchExtractor.extract(document, extract.fields);
        this.fileDataExtractor.extract(file, extract, this.config.fileInfo.fileRef);
        save(extract, extract2);
        if (document.getDocumentElement().getNodeName().equals("Product_Metadata_Supplemental")) {
            WriterManager.getInstance().getSupplementalWriter().write(file);
        }
    }
}
