package gov.nasa.pds.harvest.proc;

import gov.nasa.pds.harvest.cfg.HarvestCfg;
import gov.nasa.pds.harvest.dao.RegistryManager;
import gov.nasa.pds.harvest.job.Job;
import gov.nasa.pds.registry.common.es.service.MissingFieldsProcessor;
import gov.nasa.pds.registry.common.meta.AutogenExtractor;
import gov.nasa.pds.registry.common.meta.BasicMetadataExtractor;
import gov.nasa.pds.registry.common.meta.BundleMetadataExtractor;
import gov.nasa.pds.registry.common.meta.FileMetadataExtractor;
import gov.nasa.pds.registry.common.meta.InternalReferenceExtractor;
import gov.nasa.pds.registry.common.meta.Metadata;
import gov.nasa.pds.registry.common.meta.MetadataNormalizer;
import gov.nasa.pds.registry.common.meta.SearchMetadataExtractor;
import gov.nasa.pds.registry.common.util.doc.RegistryDocWriter;
import gov.nasa.pds.registry.common.util.xml.XmlDomUtils;
import gov.nasa.pds.registry.common.util.xml.XmlNamespaces;
import java.io.File;
import java.util.Iterator;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.w3c.dom.Document;

/* loaded from: input_file:BOOT-INF/classes/gov/nasa/pds/harvest/proc/ProductProcessor.class */
public class ProductProcessor {
    private Logger log;
    private static final long MAX_XML_FILE_LENGTH = 10000000;
    private DocumentBuilderFactory dbf;
    private BundleMetadataExtractor bundleExtractor;
    private BasicMetadataExtractor basicExtractor;
    private InternalReferenceExtractor refExtractor;
    private AutogenExtractor autogenExtractor;
    private SearchMetadataExtractor searchExtractor;
    private FileMetadataExtractor fileDataExtractor;
    private MissingFieldsProcessor mfProc;
    private MetadataNormalizer metaNormalizer;
    private RegistryDocWriter writer;

    public ProductProcessor(HarvestCfg harvestCfg, RegistryDocWriter registryDocWriter) throws Exception {
        if (harvestCfg == null) {
            throw new IllegalArgumentException("Configuration is null");
        }
        if (registryDocWriter == null) {
            throw new IllegalArgumentException("Writer is null");
        }
        this.writer = registryDocWriter;
        this.log = LogManager.getLogger(getClass());
        this.dbf = DocumentBuilderFactory.newInstance();
        this.dbf.setNamespaceAware(false);
        this.basicExtractor = new BasicMetadataExtractor();
        this.refExtractor = new InternalReferenceExtractor();
        this.autogenExtractor = new AutogenExtractor();
        this.searchExtractor = new SearchMetadataExtractor();
        this.fileDataExtractor = new FileMetadataExtractor();
        this.fileDataExtractor.setProcessDataFiles(harvestCfg.processDataFiles);
        this.fileDataExtractor.setStoreLabels(harvestCfg.storeLabels, harvestCfg.storeJsonLabels);
        this.bundleExtractor = new BundleMetadataExtractor();
        RegistryManager registryManager = RegistryManager.getInstance();
        this.mfProc = registryManager.createMissingFieldsProcessor();
        this.metaNormalizer = registryManager.createMetadataNormalizer();
    }

    public void processFile(File file, Job job) throws Exception {
        if (file.length() > MAX_XML_FILE_LENGTH) {
            this.log.warn("File is too big to parse: " + file.getAbsolutePath());
        } else {
            processMetadata(file, XmlDomUtils.readXml(this.dbf, file), job);
        }
    }

    private void processMetadata(File file, Document document, Job job) throws Exception {
        Metadata extract = this.basicExtractor.extract(file, document);
        extract.setNodeName(job.nodeName);
        this.log.info("Processing " + file.getAbsolutePath());
        if ("Product_Bundle".equals(document.getDocumentElement().getNodeName())) {
            addCollectionRefs(extract, document);
        }
        this.refExtractor.addRefs(extract.intRefs, document);
        XmlNamespaces extract2 = this.autogenExtractor.extract(file, extract.fields);
        this.searchExtractor.extract(document, extract.fields);
        this.fileDataExtractor.extract(file, extract, job.fileRefRules);
        this.mfProc.processDoc(extract.fields, extract2);
        this.metaNormalizer.normalizeValues(extract.fields);
        this.writer.write(extract, job.jobId);
    }

    private void addCollectionRefs(Metadata metadata, Document document) throws Exception {
        Iterator<BundleMetadataExtractor.BundleMemberEntry> it = this.bundleExtractor.extractBundleMemberEntries(document).iterator();
        while (it.hasNext()) {
            this.bundleExtractor.addRefs(metadata.intRefs, it.next());
        }
    }
}
