package gov.nasa.pds.harvest.crawler;

import gov.nasa.pds.harvest.cfg.model.Configuration;
import gov.nasa.pds.harvest.meta.AutogenExtractor;
import gov.nasa.pds.harvest.meta.BasicMetadataExtractor;
import gov.nasa.pds.harvest.meta.FileMetadataExtractor;
import gov.nasa.pds.harvest.meta.InternalReferenceExtractor;
import gov.nasa.pds.harvest.meta.Metadata;
import gov.nasa.pds.harvest.meta.XPathExtractor;
import gov.nasa.pds.harvest.util.DocWriter;
import gov.nasa.pds.harvest.util.xml.XmlDomUtils;
import java.io.File;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.w3c.dom.Document;

/* loaded from: input_file:BOOT-INF/classes/gov/nasa/pds/harvest/crawler/MetadataProcessor.class */
public class MetadataProcessor {
    private Configuration config;
    private DocWriter writer;
    private InternalReferenceExtractor refExtractor;
    private AutogenExtractor autogenExtractor;
    private FileMetadataExtractor fileDataExtractor;
    private Logger LOG = LogManager.getLogger(getClass());
    private BasicMetadataExtractor basicExtractor = new BasicMetadataExtractor();
    private XPathExtractor xpathExtractor = new XPathExtractor();

    public MetadataProcessor(DocWriter docWriter, Configuration configuration) throws Exception {
        this.writer = docWriter;
        this.refExtractor = new InternalReferenceExtractor(configuration.internalRefs);
        this.autogenExtractor = new AutogenExtractor(configuration.autogen);
        this.fileDataExtractor = new FileMetadataExtractor(configuration);
        this.config = configuration;
    }

    public void process(File file, Counter counter) throws Exception {
        this.LOG.info("Processing file " + file.toURI().getPath());
        DocumentBuilderFactory newInstance = DocumentBuilderFactory.newInstance();
        newInstance.setNamespaceAware(false);
        Document readXml = XmlDomUtils.readXml(newInstance, file);
        String nodeName = readXml.getDocumentElement().getNodeName();
        Metadata extract = this.basicExtractor.extract(readXml);
        validate(extract, file);
        extract.intRefs = this.refExtractor.extract(readXml);
        this.xpathExtractor.extract(readXml, extract.fields);
        if (this.config.autogen != null) {
            this.autogenExtractor.extract(file, extract.fields);
        }
        this.fileDataExtractor.extract(file, extract);
        this.writer.write(extract);
        counter.prodCounters.inc(nodeName);
    }

    public void close() throws Exception {
        this.writer.close();
    }

    private void validate(Metadata metadata, File file) throws Exception {
        if (metadata.lid == null || metadata.lid.isEmpty()) {
            throw new Exception("Missing logical identifier: " + file.toURI().getPath());
        }
        if (metadata.vid == null || metadata.vid.isEmpty()) {
            throw new Exception("Missing version id: " + file.toURI().getPath());
        }
    }
}
