package gov.nasa.pds.harvest.crawler;

import gov.nasa.pds.harvest.cfg.model.Configuration;
import gov.nasa.pds.harvest.dao.RegistryManager;
import gov.nasa.pds.harvest.meta.XPathExtractor;
import gov.nasa.pds.harvest.util.PackageIdGenerator;
import gov.nasa.pds.registry.common.es.service.MissingFieldsProcessor;
import gov.nasa.pds.registry.common.meta.AutogenExtractor;
import gov.nasa.pds.registry.common.meta.BasicMetadataExtractor;
import gov.nasa.pds.registry.common.meta.FileMetadataExtractor;
import gov.nasa.pds.registry.common.meta.InternalReferenceExtractor;
import gov.nasa.pds.registry.common.meta.Metadata;
import gov.nasa.pds.registry.common.meta.MetadataNormalizer;
import gov.nasa.pds.registry.common.meta.SearchMetadataExtractor;
import gov.nasa.pds.registry.common.util.xml.XmlNamespaces;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

/* loaded from: input_file:BOOT-INF/classes/gov/nasa/pds/harvest/crawler/BaseProcessor.class */
public class BaseProcessor {
    protected static final long MAX_XML_FILE_LENGTH = 10000000;
    protected Configuration config;
    protected BasicMetadataExtractor basicExtractor;
    protected AutogenExtractor autogenExtractor;
    protected FileMetadataExtractor fileDataExtractor;
    protected InternalReferenceExtractor refExtractor;
    protected SearchMetadataExtractor searchExtractor;
    protected XPathExtractor xpathExtractor;
    private MissingFieldsProcessor mfProc;
    private MetadataNormalizer metaNormalizer;
    protected String jobId;
    protected Logger log = LogManager.getLogger(getClass());
    protected DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance();

    public BaseProcessor(Configuration configuration) throws Exception {
        this.config = configuration;
        this.dbf.setNamespaceAware(false);
        this.basicExtractor = new BasicMetadataExtractor();
        this.refExtractor = new InternalReferenceExtractor();
        this.searchExtractor = new SearchMetadataExtractor();
        this.xpathExtractor = new XPathExtractor();
        this.autogenExtractor = new AutogenExtractor();
        if (configuration.autogen != null) {
            this.autogenExtractor.setClassFilters(configuration.autogen.classFilterIncludes, configuration.autogen.classFilterExcludes);
        }
        this.fileDataExtractor = new FileMetadataExtractor();
        if (configuration.fileInfo != null) {
            this.fileDataExtractor.setProcessDataFiles(configuration.fileInfo.processDataFiles);
            this.fileDataExtractor.setStoreLabels(configuration.fileInfo.storeLabels, configuration.fileInfo.storeJsonLabels);
        }
        RegistryManager registryManager = RegistryManager.getInstance();
        this.mfProc = registryManager.createMissingFieldsProcessor();
        this.metaNormalizer = registryManager.createMetadataNormalizer();
        this.jobId = PackageIdGenerator.getInstance().getPackageId();
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public void save(Metadata metadata, XmlNamespaces xmlNamespaces) throws Exception {
        this.mfProc.processDoc(metadata.fields, xmlNamespaces);
        this.metaNormalizer.normalizeValues(metadata.fields);
        RegistryManager.getInstance().getRegistryWriter().write(metadata);
    }
}
