/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.reader.xml;

import de.julielab.jcore.types.casmultiplier.JCoReURI;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.nio.file.FileSystem;
import java.nio.file.FileSystems;
import java.nio.file.FileVisitOption;
import java.nio.file.Files;
import java.nio.file.LinkOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.Optional;
import java.util.stream.Stream;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.ducc.Workitem;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.fit.descriptor.ResourceMetaData;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ResourceMetaData(name="JCoRe XML Multiplier Reader", description="Reads Medline/PubMed XML blobs as downloaded from the NCBI FTP. Each blob is one large XML file containing a PubmedArticleSet. This component is UIMA DUCC compatible and forwards the work item CAS to the CAS consumer in order to indicate the finishing of the current XML blob. It also sets the work item feature 'lastBlock' to true if there are not more work items and, thus, the processing comes to an end.")
public class XMLMultiplierReader
extends CollectionReader_ImplBase {
    public static final String PARAM_INPUT_DIR = "InputDirectory";
    public static final String PARAM_INPUT_FILE = "InputFile";
    public static final String PARAM_FILE_NAME_REGEX = "FileNameRegex";
    public static final String PARAM_SEARCH_IN_ZIP = "SearchInZipFiles";
    public static final String PARAM_SEND_CAS_TO_LAST = "SendCasToLast";
    private static Logger LOGGER = LoggerFactory.getLogger(XMLMultiplierReader.class);
    private Deque<URI> inputUris;
    private int currentIndex = 0;
    @ConfigurationParameter(name="InputDirectory", mandatory=false)
    private String directoryName;
    @ConfigurationParameter(name="InputFile", mandatory=false)
    private String isSingleFileProcessing;
    @ConfigurationParameter(name="FileNameRegex", description="If a directory is given, all inputUris with a name matching one of these regular expressions will be read, others will be discarded. Defaults to {'.*\\.xml', '.*\\.xml.gz'}.", defaultValue={".*\\.xml", ".*\\xml\\.gz"})
    private String[] fileNameRegex = new String[]{".*\\.xml", ".*\\.xml.gz"};
    @ConfigurationParameter(name="SearchInZipFiles", mandatory=false, description="If set to true, contents of ZIP files in the given input directory will also be searched for files matching the specified file name regular expression. Defaults to false.", defaultValue={"false"})
    private boolean searchZip;
    @ConfigurationParameter(name="SendCasToLast", mandatory=false, defaultValue={"false"}, description="UIMA DUCC relevant parameter when using a CAS multiplier. When set to true, the worker CAS from the collection reader is forwarded to the last component in the pipeline. This can be used to send information about the progress to the CAS consumer in order to have it perform batch operations. For this purpose, a feature structure of type WorkItem from the DUCC library is added to the worker CAS. This feature structure has information about the current progress.")
    private boolean sendCasToLast;

    public void initialize() throws ResourceInitializationException {
        try {
            if (LOGGER.isInfoEnabled()) {
                LOGGER.info("Component configuration:");
                for (String name : this.getUimaContext().getConfigParameterNames()) {
                    LOGGER.info("{}: {}", (Object)name, this.getConfigParameterValue(name));
                }
            }
            this.sendCasToLast = (Boolean)Optional.ofNullable(this.getConfigParameterValue(PARAM_SEND_CAS_TO_LAST)).orElse(false);
            this.getInputFiles();
        }
        catch (Throwable e) {
            LOGGER.error("Exception or error while initializing reader: ", e);
            throw e;
        }
    }

    public void getNext(CAS cas) throws CollectionException {
        try {
            URI uri = this.inputUris.removeFirst();
            LOGGER.debug("Reading URI " + uri.toString());
            try {
                JCoReURI fileType = new JCoReURI(cas.getJCas());
                fileType.setUri(uri.toString());
                fileType.addToIndexes();
            }
            catch (Exception e) {
                LOGGER.error("Exception with URI: " + uri.toString(), (Throwable)e);
                throw new CollectionException((Throwable)e);
            }
            if (this.sendCasToLast) {
                Workitem workitem = new Workitem(cas.getJCas());
                workitem.setSendToLast(true);
                workitem.setBlockindex(this.currentIndex);
                if (!this.hasNext()) {
                    workitem.setLastBlock(true);
                }
                workitem.addToIndexes();
            }
            ++this.currentIndex;
        }
        catch (CASException e) {
            LOGGER.error("Could not get the JCAS from the CAS: ", (Throwable)e);
            throw new CollectionException((Throwable)e);
        }
        catch (Throwable e) {
            LOGGER.warn("Exception or error while filling CAS: ", e);
            throw e;
        }
    }

    private void getInputFiles() throws ResourceInitializationException {
        this.inputUris = new ArrayDeque<URI>();
        this.currentIndex = 0;
        if (this.isSingleProcessing()) {
            this.getSingleFile();
            return;
        }
        this.directoryName = (String)this.getConfigParameterValue(PARAM_INPUT_DIR);
        if (this.directoryName == null) {
            throw new ResourceInitializationException("resource_data_not_valid", new Object[]{"null", PARAM_INPUT_DIR});
        }
        if (this.getConfigParameterValue(PARAM_FILE_NAME_REGEX) != null) {
            this.fileNameRegex = (String[])this.getConfigParameterValue(PARAM_FILE_NAME_REGEX);
        }
        this.searchZip = Optional.ofNullable((Boolean)this.getConfigParameterValue(PARAM_SEARCH_IN_ZIP)).orElse(false);
        File inputDirectory = new File(this.directoryName.trim());
        if (!inputDirectory.exists()) {
            throw new ResourceInitializationException((Throwable)new FileNotFoundException("The directory " + inputDirectory.getAbsolutePath() + " does not exist."));
        }
        if (!inputDirectory.isDirectory()) {
            throw new ResourceInitializationException((Throwable)new IllegalArgumentException("The file " + inputDirectory.getAbsolutePath() + " is not a directory."));
        }
        for (File f : inputDirectory.listFiles((dir, name) -> this.matchesFileNameRegex(name))) {
            URI uri = f.toURI();
            if (uri.toString().toLowerCase().endsWith(".zip")) {
                LOGGER.debug("Searching ZIP archive {} for eligible documents", (Object)uri);
                try (FileSystem fs = FileSystems.newFileSystem(Paths.get(uri), null);){
                    Iterable<Path> rootDirectories = fs.getRootDirectories();
                    for (Path rootDir : rootDirectories) {
                        Stream<Path> walk = Files.walk(rootDir, new FileVisitOption[0]);
                        walk.filter(x$0 -> Files.isRegularFile(x$0, new LinkOption[0])).forEach(p -> {
                            LOGGER.trace("Current ZIP archive entry: {}", (Object)p.toString());
                            if (this.matchesFileNameRegex(p.getFileName().toString())) {
                                this.inputUris.push(p.toUri());
                            }
                        });
                    }
                    continue;
                }
                catch (IOException e) {
                    LOGGER.error("Could not read from {}", (Object)uri);
                    throw new ResourceInitializationException((Throwable)e);
                }
            }
            this.inputUris.push(uri);
        }
        LOGGER.debug("Found {} input files.", (Object)this.inputUris.size());
    }

    private boolean matchesFileNameRegex(String name) {
        for (String regex : this.fileNameRegex) {
            if (!name.matches(regex) && (!this.searchZip || !name.toLowerCase().endsWith("zip"))) continue;
            return true;
        }
        return false;
    }

    private void getSingleFile() throws ResourceInitializationException {
        LOGGER.info("XML reader is used in SINGLE FILE mode.");
        String singleFile = (String)this.getConfigParameterValue(PARAM_INPUT_FILE);
        if (singleFile == null) {
            return;
        }
        File file = new File(singleFile.trim());
        if (!file.exists() || file.isDirectory()) {
            throw new ResourceInitializationException("resource_data_not_valid", new Object[]{"file does not exist or is a directoryInputFile"});
        }
        this.inputUris.push(file.toURI());
    }

    private boolean isSingleProcessing() {
        Object value = this.getConfigParameterValue(PARAM_INPUT_FILE);
        if (null != value) {
            this.isSingleFileProcessing = (String)value;
        }
        return this.isSingleFileProcessing != null;
    }

    public boolean hasNext() {
        return !this.inputUris.isEmpty();
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.currentIndex, this.inputUris.size(), "entities")};
    }

    public void close() {
    }
}

