package gov.nasa.pds.harvest.crawler;

import gov.nasa.pds.harvest.cfg.model.BundleCfg;
import gov.nasa.pds.harvest.cfg.model.Configuration;
import gov.nasa.pds.harvest.dao.RegistryDao;
import gov.nasa.pds.harvest.dao.RegistryManager;
import gov.nasa.pds.registry.common.es.service.CollectionInventoryWriter;
import gov.nasa.pds.registry.common.meta.CollectionMetadataExtractor;
import gov.nasa.pds.registry.common.meta.Metadata;
import gov.nasa.pds.registry.common.util.xml.XmlDomUtils;
import gov.nasa.pds.registry.common.util.xml.XmlNamespaces;
import java.io.File;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.BasicFileAttributes;
import java.util.Iterator;
import java.util.Set;
import java.util.function.BiPredicate;
import org.w3c.dom.Document;

/* loaded from: input_file:BOOT-INF/classes/gov/nasa/pds/harvest/crawler/CollectionProcessor.class */
public class CollectionProcessor extends BaseProcessor {
    private CollectionInventoryProcessor invProc;
    private CollectionInventoryWriter invWriter;
    private CollectionMetadataExtractor collectionExtractor;
    private int collectionCount;

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:BOOT-INF/classes/gov/nasa/pds/harvest/crawler/CollectionProcessor$CollectionMatcher.class */
    public static class CollectionMatcher implements BiPredicate<Path, BasicFileAttributes> {
        private CollectionMatcher() {
        }

        @Override // java.util.function.BiPredicate
        public boolean test(Path path, BasicFileAttributes basicFileAttributes) {
            String lowerCase = path.getFileName().toString().toLowerCase();
            return lowerCase.endsWith(".xml") && lowerCase.contains("collection");
        }
    }

    public CollectionProcessor(Configuration configuration) throws Exception {
        super(configuration);
        this.invWriter = new CollectionInventoryWriter(configuration.registryCfg);
        this.invProc = new CollectionInventoryProcessor(configuration.refsCfg.primaryOnly);
        this.collectionExtractor = new CollectionMetadataExtractor();
    }

    public int process(BundleCfg bundleCfg) throws Exception {
        this.collectionCount = 0;
        Iterator<Path> it = Files.find(new File(bundleCfg.dir).toPath(), 2, new CollectionMatcher(), new FileVisitOption[0]).iterator();
        while (it.hasNext()) {
            onCollection(it.next().toFile(), bundleCfg);
        }
        return this.collectionCount;
    }

    private void onCollection(File file, BundleCfg bundleCfg) throws Exception {
        if (file.length() > 10000000) {
            this.log.warn("File is too big to parse: " + file.getAbsolutePath());
            return;
        }
        Document readXml = XmlDomUtils.readXml(this.dbf, file);
        if ("Product_Collection".equals(readXml.getDocumentElement().getNodeName())) {
            processMetadata(file, readXml, bundleCfg);
        }
    }

    private void processMetadata(File file, Document document, BundleCfg bundleCfg) throws Exception {
        Metadata extract = this.basicExtractor.extract(file, document);
        extract.setNodeName(this.config.nodeName);
        if (bundleCfg.collectionLids == null || bundleCfg.collectionLids.contains(extract.lid)) {
            if (bundleCfg.collectionLidVids == null || bundleCfg.collectionLidVids.contains(extract.lidvid)) {
                LidVidCache collectionRefsCache = RefsCache.getInstance().getCollectionRefsCache();
                if (collectionRefsCache.containsLidVid(extract.lidvid) || collectionRefsCache.containsLid(extract.lid)) {
                    this.log.info("Processing collection " + file.getAbsolutePath());
                    this.collectionCount++;
                    RegistryDao registryDao = RegistryManager.getInstance().getRegistryDao();
                    Counter counter = RegistryManager.getInstance().getCounter();
                    if (registryDao.idExists(extract.lidvid)) {
                        this.log.warn("Collection " + extract.lidvid + " already registered. Skipping.");
                        processInventoryFiles(file, document, extract, false);
                        counter.skippedFileCount++;
                        return;
                    }
                    this.refExtractor.addRefs(extract.intRefs, document);
                    this.xpathExtractor.extract(document, extract.fields);
                    XmlNamespaces extract2 = this.autogenExtractor.extract(file, extract.fields);
                    this.searchExtractor.extract(document, extract.fields);
                    this.fileDataExtractor.extract(file, extract, this.config.fileInfo.fileRef);
                    save(extract, extract2);
                    processInventoryFiles(file, document, extract, true);
                }
            }
        }
    }

    private void processInventoryFiles(File file, Document document, Metadata metadata, boolean z) throws Exception {
        Set<String> extractInventoryFileNames = this.collectionExtractor.extractInventoryFileNames(document);
        if (extractInventoryFileNames == null) {
            return;
        }
        Iterator<String> it = extractInventoryFileNames.iterator();
        while (it.hasNext()) {
            File file2 = new File(file.getParentFile(), it.next());
            if (z) {
                this.invWriter.writeCollectionInventory(metadata.lidvid, file2, this.jobId);
                this.invProc.cacheNonRegisteredInventory(file2);
            } else {
                this.invProc.cacheNonRegisteredInventory(file2);
            }
        }
    }
}
