/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.reader.pmc;

import de.julielab.jcore.reader.pmc.NXMLURIIterator;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.net.URI;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Optional;
import java.util.Set;
import java.util.stream.Collectors;
import org.apache.commons.lang.StringUtils;
import org.apache.uima.UimaContext;
import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public abstract class PMCReaderBase
extends JCasCollectionReader_ImplBase {
    public static final String PARAM_INPUT = "Input";
    public static final String PARAM_RECURSIVELY = "SearchRecursively";
    public static final String PARAM_SEARCH_ZIP = "SearchInZipFiles";
    public static final String PARAM_WHITELIST = "WhitelistFile";
    private static final Logger log = LoggerFactory.getLogger(PMCReaderBase.class);
    @ConfigurationParameter(name="Input", description="The path to an NXML file or a directory with NXML files and possibly subdirectories holding more NXML files.")
    protected File input;
    @ConfigurationParameter(name="SearchRecursively", defaultValue={"false"}, mandatory=false, description="If set to true, subdirectories of the given input directory Input are also searched for NXML files. Defaults to false.")
    protected boolean searchRecursively;
    @ConfigurationParameter(name="SearchInZipFiles", defaultValue={"false"}, mandatory=false, description="If set to true, ZIP files found among the input are opened and also searched for NXML files. Defaults to false.")
    protected boolean searchZip;
    @ConfigurationParameter(name="WhitelistFile", mandatory=false, description="A file listing the file names that should be read. All other files will be discarded. The file name must be given without any extensions and subdirectories. For example, the file \"Neural_Regen_Res/PMC2847692.nxml.gz\" would be represented as \"PMC2847692\" in the whitelist file. Each file name must appear on a line of its own. An empty file will cause nothing to be read. A file containing only the keyword \"all\" will behave as if no file was given at all.")
    protected File whitelistFile;
    protected Iterator<URI> pmcFiles;
    protected int completed;

    public void initialize(UimaContext context) throws ResourceInitializationException {
        if (log.isInfoEnabled()) {
            log.info("Component configuration:");
            for (String configName : context.getConfigParameterNames()) {
                log.info("    {}: {}", (Object)configName, this.getConfigParameterValue(configName));
            }
        }
        this.input = new File((String)this.getConfigParameterValue(PARAM_INPUT));
        this.searchRecursively = Optional.ofNullable((Boolean)this.getConfigParameterValue(PARAM_RECURSIVELY)).orElse(false);
        this.searchZip = Optional.ofNullable((Boolean)this.getConfigParameterValue(PARAM_SEARCH_ZIP)).orElse(false);
        this.whitelistFile = Optional.ofNullable((String)this.getConfigParameterValue(PARAM_WHITELIST)).map(File::new).orElse(null);
        log.info("Reading PubmedCentral NXML file(s) from {}", (Object)this.input);
        try {
            Set<String> whitelist = this.readWhitelist(this.whitelistFile);
            this.pmcFiles = new NXMLURIIterator(this.input, whitelist, this.searchRecursively, this.searchZip);
        }
        catch (IOException e) {
            throw new ResourceInitializationException((Throwable)e);
        }
        this.completed = 0;
    }

    private Set<String> readWhitelist(File whitelistFile) throws IOException {
        Set<String> whitelist = new HashSet<String>();
        if (whitelistFile == null || !whitelistFile.exists()) {
            whitelist.add("all");
        } else {
            try (BufferedReader br = Files.newBufferedReader(whitelistFile.toPath(), StandardCharsets.UTF_8);){
                whitelist = br.lines().filter(l -> !StringUtils.isBlank((String)l)).collect(Collectors.toSet());
            }
            log.debug("Read whitelist with {} entries from {}", (Object)whitelist.size(), (Object)whitelistFile);
        }
        return whitelist;
    }

    public boolean hasNext() {
        return this.pmcFiles.hasNext();
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.completed, -1, "documents")};
    }

    public void close() {
        this.pmcFiles = null;
    }
}

