package gov.nasa.pds.harvest.crawler;

import gov.nasa.pds.harvest.cfg.model.Configuration;
import gov.nasa.pds.harvest.crawler.ProductCrawler;
import gov.nasa.pds.harvest.util.CounterMap;
import gov.nasa.pds.harvest.util.DocWriter;
import gov.nasa.pds.harvest.util.ExceptionUtils;
import gov.nasa.pds.harvest.util.xml.XmlStreamUtils;
import java.io.File;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.apache.tika.Tika;
import org.apache.tika.mime.MimeTypes;

/* loaded from: input_file:BOOT-INF/classes/gov/nasa/pds/harvest/crawler/FileProcessor.class */
public class FileProcessor implements ProductCrawler.Callback {
    private static final long MAX_XML_FILE_LENGTH = 10000000;
    private Configuration cfg;
    private MetadataProcessor metaProcessor;
    private boolean stoppedOnError = false;
    private Logger LOG = LogManager.getLogger(getClass());
    private XmlStreamUtils xmlUtils = new XmlStreamUtils();
    private Counter counter = new Counter();
    private Tika tika = new Tika();

    public FileProcessor(Configuration configuration, DocWriter docWriter) throws Exception {
        this.cfg = configuration;
        this.metaProcessor = new MetadataProcessor(docWriter, configuration);
    }

    public boolean stoppedOnError() {
        return this.stoppedOnError;
    }

    public int getSkippedFileCount() {
        return this.counter.skippedFileCount;
    }

    public CounterMap getProdTypeCounter() {
        return this.counter.prodCounters;
    }

    @Override // gov.nasa.pds.harvest.crawler.ProductCrawler.Callback
    public boolean onFile(File file) {
        try {
            processFile(file);
            return true;
        } catch (Exception e) {
            this.LOG.error(ExceptionUtils.getMessage(e));
            this.counter.skippedFileCount++;
            this.stoppedOnError = true;
            return false;
        }
    }

    public void close() throws Exception {
        this.metaProcessor.close();
    }

    private void processFile(File file) throws Exception {
        String detect = this.tika.detect(file);
        if (MimeTypes.XML.equals(detect)) {
            processXmlFile(file);
            return;
        }
        this.LOG.warn("Unsupported MIME type: " + detect + " (" + file.toURI().getPath() + ")");
        this.counter.skippedFileCount++;
    }

    private void processXmlFile(File file) throws Exception {
        if (file.length() > MAX_XML_FILE_LENGTH) {
            this.LOG.warn("File is too big to parse: " + file.toURI().getPath());
            this.counter.skippedFileCount++;
        } else if (includeXmlFile(file)) {
            this.metaProcessor.process(file, this.counter);
        }
    }

    private boolean includeXmlFile(File file) throws Exception {
        if (this.cfg.directories.prodFilterIncludes != null) {
            String rootElement = this.xmlUtils.getRootElement(file);
            if (rootElement != null) {
                return this.cfg.directories.prodFilterIncludes.contains(rootElement);
            }
            this.LOG.warn("Invalid XML file: " + file.getAbsolutePath());
            this.counter.skippedFileCount++;
            return false;
        }
        if (this.cfg.directories.prodFilterExcludes == null) {
            return true;
        }
        String rootElement2 = this.xmlUtils.getRootElement(file);
        if (rootElement2 != null) {
            return !this.cfg.directories.prodFilterExcludes.contains(rootElement2);
        }
        this.LOG.warn("Invalid XML file: " + file.getAbsolutePath());
        this.counter.skippedFileCount++;
        return false;
    }
}
