package org.apache.ctakes.core.cr;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.lang.reflect.Array;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.ctakes.core.config.ConfigParameterConstants;
import org.apache.ctakes.core.patient.PatientNoteStore;
import org.apache.ctakes.core.pipeline.ProgressManager;
import org.apache.ctakes.core.resource.FileLocator;
import org.apache.ctakes.core.util.BannerWriter;
import org.apache.ctakes.core.util.NumberedSuffixComparator;
import org.apache.ctakes.core.util.doc.DocIdUtil;
import org.apache.ctakes.core.util.doc.NoteSpecs;
import org.apache.ctakes.core.util.doc.SourceMetadataUtil;
import org.apache.log4j.Logger;
import org.apache.uima.UimaContext;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.impl.CollectionReaderDescription_impl;
import org.apache.uima.fit.component.JCasCollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceConfigurationException;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.resource.ResourceManager;
import org.apache.uima.resource.metadata.ConfigurationParameterDeclarations;
import org.apache.uima.resource.metadata.ConfigurationParameterSettings;
import org.apache.uima.resource.metadata.NameValuePair;
import org.apache.uima.resource.metadata.ResourceMetaData;
import org.apache.uima.resource.metadata.impl.ConfigurationParameterDeclarations_impl;
import org.apache.uima.resource.metadata.impl.ConfigurationParameterSettings_impl;
import org.apache.uima.resource.metadata.impl.PropertyXmlInfo;
import org.apache.uima.resource.metadata.impl.XmlizationInfo;
import org.apache.uima.util.InvalidXMLException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;

/* loaded from: input_file:org/apache/ctakes/core/cr/AbstractFileTreeReader.class */
public abstract class AbstractFileTreeReader extends JCasCollectionReader_ImplBase {
    public static final String PARAM_WRITE_BANNER = "WriteBanner";

    @ConfigurationParameter(name = PARAM_WRITE_BANNER, description = "Write a large banner at each major step of the pipeline.", mandatory = false, defaultValue = {"no"})
    private String _writeBannerChoice;

    @ConfigurationParameter(name = "InputDirectory", description = ConfigParameterConstants.DESC_INPUTDIR)
    private String _rootDirPath;
    public static final String PARAM_ENCODING = "Encoding";
    public static final String UNICODE = "unicode";

    @ConfigurationParameter(name = "Encoding", description = "The character encoding used by the input files.", mandatory = false)
    private String _encoding;
    public static final String PARAM_EXTENSIONS = "Extensions";

    @ConfigurationParameter(name = "Extensions", description = "The extensions of the files that the collection reader will read.", defaultValue = {"*"}, mandatory = false)
    private String[] _explicitExtensions;
    public static final String PARAM_KEEP_CR = "KeepCR";
    public static final String CR_TO_SPACE = "CRtoSpace";
    public static final String PATIENT_LEVEL = "PatientLevel";
    public static final String STRIP_QUOTES = "StripQuotes";
    protected static final String UNKNOWN = "Unknown";
    private boolean _writeBanner;
    private File _rootDir;
    private Collection<String> _validExtensions;
    private List<File> _files;
    private Map<File, String> _filePatients;
    private int _currentIndex;
    private Comparator<File> _fileComparator;
    private static final Logger LOGGER = Logger.getLogger("AbstractFileTreeReader");
    private static final DateFormat DATE_FORMAT = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
    private static final Pattern CR_LF = Pattern.compile("\\r\\n");

    @ConfigurationParameter(name = PARAM_KEEP_CR, description = "Keep windows-format carriage return characters at line endings.  This will only keep existing characters, it will not add them.", mandatory = false)
    private boolean _keepCrChar = true;

    @ConfigurationParameter(name = CR_TO_SPACE, description = "Change windows-format CR + LF character sequences to LF + <Space>.", mandatory = false)
    private boolean _crToSpace = false;

    @ConfigurationParameter(name = PATIENT_LEVEL, description = "The level in the directory hierarchy at which patient identifiers exist.Default value is 1; directly under root input directory.", mandatory = false)
    private int _patientLevel = 1;

    @ConfigurationParameter(name = STRIP_QUOTES, description = "Replace document-enclosing quote characters with space characters.", mandatory = false)
    private boolean _stripQuotes = false;
    private Map<String, Integer> _patientDocCounts = new HashMap();

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/ctakes/core/cr/AbstractFileTreeReader$FileComparator.class */
    public static class FileComparator implements Comparator<File> {
        private final Comparator<String> __delegate;

        private FileComparator() {
            this.__delegate = new NumberedSuffixComparator();
        }

        @Override // java.util.Comparator
        public int compare(File file, File file2) {
            return this.__delegate.compare(file.getName(), file2.getName());
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:org/apache/ctakes/core/cr/AbstractFileTreeReader$ReaderMetadata.class */
    public static final class ReaderMetadata extends CollectionReaderDescription_impl implements ResourceMetaData {
        static final long serialVersionUID = 3408359518094534817L;
        private String mUUID;
        private String mName;
        private String mDescription;
        private String mVersion;
        private String mVendor;
        private String mCopyright;
        private ConfigurationParameterDeclarations mConfigurationParameterDeclarations;
        private ConfigurationParameterSettings mConfigurationParameterSettings;
        private static final XmlizationInfo XMLIZATION_INFO = new XmlizationInfo("resourceMetaData", new PropertyXmlInfo[]{new PropertyXmlInfo("name", false), new PropertyXmlInfo("description"), new PropertyXmlInfo("version"), new PropertyXmlInfo("vendor"), new PropertyXmlInfo("copyright"), new PropertyXmlInfo("configurationParameterDeclarations", (String) null), new PropertyXmlInfo("configurationParameterSettings", (String) null)});

        private ReaderMetadata() {
            this.mConfigurationParameterDeclarations = new ConfigurationParameterDeclarations_impl();
            this.mConfigurationParameterSettings = new ConfigurationParameterSettings_impl();
        }

        public void resolveImports() throws InvalidXMLException {
        }

        public void resolveImports(ResourceManager resourceManager) throws InvalidXMLException {
        }

        public String getUUID() {
            return this.mUUID;
        }

        public void setUUID(String str) {
            this.mUUID = str;
        }

        public String getName() {
            return this.mName;
        }

        public void setName(String str) {
            this.mName = str;
        }

        public String getVersion() {
            return this.mVersion;
        }

        public void setVersion(String str) {
            this.mVersion = str;
        }

        public String getDescription() {
            return this.mDescription;
        }

        public void setDescription(String str) {
            this.mDescription = str;
        }

        public String getVendor() {
            return this.mVendor;
        }

        public void setVendor(String str) {
            this.mVendor = str;
        }

        public String getCopyright() {
            return this.mCopyright;
        }

        public void setCopyright(String str) {
            this.mCopyright = str;
        }

        public ConfigurationParameterSettings getConfigurationParameterSettings() {
            return this.mConfigurationParameterSettings;
        }

        public void setConfigurationParameterSettings(ConfigurationParameterSettings configurationParameterSettings) {
            this.mConfigurationParameterSettings = configurationParameterSettings;
        }

        public ConfigurationParameterDeclarations getConfigurationParameterDeclarations() {
            return this.mConfigurationParameterDeclarations;
        }

        public void setConfigurationParameterDeclarations(ConfigurationParameterDeclarations configurationParameterDeclarations) {
            this.mConfigurationParameterDeclarations = configurationParameterDeclarations;
        }

        public void validateConfigurationParameterSettings() throws ResourceConfigurationException {
            ConfigurationParameterDeclarations configurationParameterDeclarations = getConfigurationParameterDeclarations();
            ConfigurationParameterSettings configurationParameterSettings = getConfigurationParameterSettings();
            NameValuePair[] parameterSettings = configurationParameterSettings.getParameterSettings();
            if (parameterSettings.length > 0) {
                validateConfigurationParameterSettings(parameterSettings, (String) null, configurationParameterDeclarations);
                return;
            }
            for (Map.Entry entry : configurationParameterSettings.getSettingsForGroups().entrySet()) {
                String str = (String) entry.getKey();
                NameValuePair[] nameValuePairArr = (NameValuePair[]) entry.getValue();
                if (nameValuePairArr != null) {
                    validateConfigurationParameterSettings(nameValuePairArr, str, configurationParameterDeclarations);
                }
            }
        }

        protected void validateConfigurationParameterSettings(NameValuePair[] nameValuePairArr, String str, ConfigurationParameterDeclarations configurationParameterDeclarations) throws ResourceConfigurationException {
            for (int i = 0; i < nameValuePairArr.length; i++) {
                String name = nameValuePairArr[i].getName();
                org.apache.uima.resource.metadata.ConfigurationParameter configurationParameter = configurationParameterDeclarations.getConfigurationParameter(str, name);
                if (configurationParameter == null) {
                    if (str != null) {
                        throw new ResourceConfigurationException("nonexistent_parameter_in_group", new Object[]{name, str, getName()});
                    }
                    throw new ResourceConfigurationException("nonexistent_parameter", new Object[]{name, getName()});
                }
                validateConfigurationParameterDataTypeMatch(configurationParameter, nameValuePairArr[i]);
            }
        }

        protected void validateConfigurationParameterDataTypeMatch(org.apache.uima.resource.metadata.ConfigurationParameter configurationParameter, NameValuePair nameValuePair) throws ResourceConfigurationException {
            String name = configurationParameter.getName();
            String type = configurationParameter.getType();
            Class<?> cls = nameValuePair.getValue().getClass();
            if (configurationParameter.isMultiValued()) {
                if (!cls.isArray()) {
                    throw new ResourceConfigurationException("array_required", new Object[]{name, getName()});
                }
                cls = cls.getComponentType();
                if (Array.getLength(nameValuePair.getValue()) == 0 && cls.equals(Object.class)) {
                    nameValuePair.setValue(Array.newInstance((Class<?>) getClassForParameterType(type), 0));
                    return;
                }
            }
            if (cls != getClassForParameterType(type)) {
                throw new ResourceConfigurationException("parameter_type_mismatch", new Object[]{getName(), cls.getName(), name, type});
            }
        }

        protected Class getClassForParameterType(String str) {
            if ("String".equals(str)) {
                return String.class;
            }
            if ("Boolean".equals(str)) {
                return Boolean.class;
            }
            if ("Integer".equals(str)) {
                return Integer.class;
            }
            if ("Float".equals(str)) {
                return Float.class;
            }
            return null;
        }
    }

    public AbstractFileTreeReader() {
        setMetaData(createMetaData());
    }

    protected abstract void readFile(JCas jCas, File file) throws IOException;

    protected Comparator<File> createFileComparator() {
        return new FileComparator();
    }

    public DateFormat getDateFormat() {
        return DATE_FORMAT;
    }

    protected List<File> getFiles() {
        return this._files;
    }

    protected int getCurrentIndex() {
        return this._currentIndex;
    }

    protected void setCurrentIndex(int i) {
        this._currentIndex = i;
    }

    protected String getPatientId(File file) {
        return this._filePatients.getOrDefault(file, "UnknownPatient");
    }

    public int getNoteCount() {
        if (this._files != null) {
            return this._files.size();
        }
        LOGGER.error("Not yet initialized");
        return 0;
    }

    protected File getRootDir() {
        if (this._rootDir != null) {
            return this._rootDir;
        }
        LOGGER.error("Not yet initialized");
        return null;
    }

    protected String getRootPath() {
        File rootDir = getRootDir();
        if (rootDir != null) {
            return rootDir.getAbsolutePath();
        }
        LOGGER.error("Not yet initialized");
        return "Unknown";
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public final String getValidEncoding() {
        if (this._rootDir != null) {
            return (this._encoding == null || this._encoding.isEmpty()) ? "Unknown" : this._encoding;
        }
        LOGGER.error("Not yet initialized");
        return "Unknown";
    }

    protected Collection<String> getValidExtensions() {
        if (this._validExtensions != null) {
            return this._validExtensions;
        }
        LOGGER.error("Not yet initialized");
        return Collections.emptyList();
    }

    public void initialize(UimaContext uimaContext) throws ResourceInitializationException {
        super.initialize(uimaContext);
        this._writeBanner = this._writeBannerChoice.equalsIgnoreCase("yes") || this._writeBannerChoice.equalsIgnoreCase("true");
        if (this._writeBanner) {
            BannerWriter.writeHello();
        }
        try {
            this._rootDir = FileLocator.getFile(this._rootDirPath);
            this._validExtensions = createValidExtensions(this._explicitExtensions);
            this._currentIndex = 0;
            if (this._rootDir.isFile()) {
                String name = this._rootDir.getParentFile().getName();
                this._files = Collections.singletonList(this._rootDir);
                this._filePatients = Collections.singletonMap(this._rootDir, name);
                PatientNoteStore.getInstance().setWantedDocCount(name, 1);
            } else {
                File[] listFiles = this._rootDir.listFiles();
                if (listFiles == null || listFiles.length == 0) {
                    this._filePatients = Collections.emptyMap();
                    this._files = Collections.emptyList();
                    return;
                }
                if (Arrays.stream(listFiles).noneMatch((v0) -> {
                    return v0.isDirectory();
                })) {
                    this._patientLevel = 0;
                }
                this._filePatients = new HashMap();
                this._fileComparator = createFileComparator();
                this._files = getDescendentFiles(this._rootDir, this._validExtensions, 0);
                this._patientDocCounts.forEach((str, num) -> {
                    PatientNoteStore.getInstance().setWantedDocCount(str, num.intValue());
                });
            }
            ProgressManager.getInstance().initializeProgress(this._rootDirPath, this._files.size());
        } catch (FileNotFoundException e) {
            LOGGER.error("No Directory found at " + this._rootDirPath);
            throw new ResourceInitializationException(e);
        }
    }

    protected static Collection<String> createValidExtensions(String... strArr) {
        if (strArr == null || strArr.length == 0) {
            return Collections.emptyList();
        }
        if (strArr.length == 1 && (strArr[0].equals("*") || strArr[0].equals(".*"))) {
            return Collections.emptyList();
        }
        ArrayList arrayList = new ArrayList(strArr.length);
        for (String str : strArr) {
            if (str.startsWith(".")) {
                arrayList.add(str);
            } else {
                arrayList.add('.' + str);
            }
        }
        return arrayList;
    }

    private List<File> getDescendentFiles(File file, Collection<String> collection, int i) {
        File[] listFiles = file.listFiles();
        if (listFiles == null || listFiles.length == 0) {
            return Collections.emptyList();
        }
        ArrayList arrayList = new ArrayList();
        ArrayList arrayList2 = new ArrayList();
        for (File file2 : listFiles) {
            if (file2.isDirectory()) {
                arrayList.add(file2);
            } else if (isExtensionValid(file2, collection) && !file2.isHidden()) {
                arrayList2.add(file2);
            }
        }
        arrayList.sort(this._fileComparator);
        arrayList2.sort(this._fileComparator);
        ArrayList arrayList3 = new ArrayList(arrayList2);
        Iterator it = arrayList.iterator();
        while (it.hasNext()) {
            arrayList3.addAll(getDescendentFiles((File) it.next(), collection, i + 1));
        }
        if (i == this._patientLevel) {
            String name = file.getName();
            this._patientDocCounts.put(name, Integer.valueOf(this._patientDocCounts.getOrDefault(name, 0).intValue() + arrayList3.size()));
            arrayList3.forEach(file3 -> {
                this._filePatients.put(file3, name);
            });
        }
        return arrayList3;
    }

    protected static boolean isExtensionValid(File file, Collection<String> collection) {
        if (collection.isEmpty()) {
            return true;
        }
        String name = file.getName();
        for (String str : collection) {
            if (name.endsWith(str)) {
                if (!name.equals(str)) {
                    return true;
                }
                LOGGER.warn("File " + file.getPath() + " name exactly matches extension " + str + " so it will not be read.");
                return false;
            }
        }
        return false;
    }

    protected static String createDocumentID(File file, Collection<String> collection) {
        String name = file.getName();
        String str = "";
        for (String str2 : collection) {
            if (name.endsWith(str2) && str2.length() > str.length()) {
                str = str2;
            }
        }
        int lastIndexOf = name.lastIndexOf(46);
        if (!str.isEmpty()) {
            lastIndexOf = name.length() - str.length();
        }
        return lastIndexOf < 0 ? name : name.substring(0, lastIndexOf);
    }

    protected String createDocumentIdPrefix(File file, File file2) {
        String parent = file.getParent();
        String path = file2.getPath();
        return (parent.equals(path) || !parent.startsWith(path)) ? "" : parent.substring(path.length() + 1);
    }

    protected String createDocumentType(String str) {
        int lastIndexOf = str.lastIndexOf(95);
        return (lastIndexOf < 0 || lastIndexOf == str.length() - 1) ? NoteSpecs.ID_NAME_CLINICAL_NOTE : str.substring(lastIndexOf + 1);
    }

    protected String createDocumentTime(File file) {
        return getDateFormat().format(Long.valueOf(file.lastModified()));
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public final boolean isKeepCrChar() {
        return this._keepCrChar;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public final String handleTextEol(String str) {
        String str2 = str;
        if (!isKeepCrChar() && !str2.isEmpty() && str2.contains("\r")) {
            LOGGER.debug("Removing Carriage-Return characters ...");
            str2 = CR_LF.matcher(str2).replaceAll("\n");
        }
        if (!str2.isEmpty() && !str2.endsWith("\n")) {
            str2 = str2 + "\n";
        }
        return str2;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public final String handleQuotedDoc(String str) {
        return (!this._stripQuotes || str.isEmpty()) ? str : handleQuotedDoc(handleQuotedDoc(str, '\"'), '\'');
    }

    private static String handleQuotedDoc(String str, char c) {
        String trim = str.trim();
        if (trim.indexOf(c) == 0 && trim.lastIndexOf(c) == trim.length() - 1) {
            LOGGER.debug("Replacing document-enclosing quote characters " + c + " ...");
            int indexOf = str.indexOf(c);
            String str2 = indexOf == 0 ? " " + str.substring(1) : str.substring(0, indexOf) + " " + str.substring(indexOf + 1);
            int lastIndexOf = str2.lastIndexOf(c);
            return lastIndexOf == str2.length() - 1 ? str2.substring(0, str2.length() - 1) + " " : str2.substring(0, lastIndexOf) + " " + str2.substring(lastIndexOf + 1);
        }
        return str;
    }

    protected org.apache.ctakes.core.util.doc.JCasBuilder getJCasBuilder(File file) {
        String createDocumentID = createDocumentID(file, getValidExtensions());
        String createDocumentIdPrefix = createDocumentIdPrefix(file, getRootDir());
        String createDocumentType = createDocumentType(createDocumentID);
        String createDocumentTime = createDocumentTime(file);
        return new org.apache.ctakes.core.util.doc.JCasBuilder().setDocId(createDocumentID).setDocIdPrefix(createDocumentIdPrefix).setDocType(createDocumentType).setDocTime(createDocumentTime).setPatientId(getPatientId(file)).setDocPath(file.getAbsolutePath()).nullDocText();
    }

    public boolean hasNext() {
        if (this._currentIndex == 0 && this._writeBanner) {
            BannerWriter.writeProcess();
        }
        boolean z = this._currentIndex < this._files.size();
        if (!z) {
            ProgressManager.getInstance().updatePatientId(ProgressManager.PROGRESS_COMPLETE);
            ProgressManager.getInstance().updateDocId(ProgressManager.PROGRESS_COMPLETE);
            ProgressManager.getInstance().updateProgress(this._files.size());
            if (this._writeBanner) {
                BannerWriter.writeFinished();
            }
        }
        return z;
    }

    public void getNext(JCas jCas) throws IOException, CollectionException {
        File file = this._files.get(this._currentIndex);
        getJCasBuilder(file).populate(jCas);
        ProgressManager.getInstance().updatePatientId(SourceMetadataUtil.getPatientIdentifier(jCas));
        ProgressManager.getInstance().updateDocId(DocIdUtil.getDocumentID(jCas));
        ProgressManager.getInstance().updateProgress(this._currentIndex);
        this._currentIndex++;
        readFile(jCas, file);
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this._currentIndex, this._files.size(), "entities")};
    }

    private static ResourceMetaData createMetaData() {
        ReaderMetadata readerMetadata = new ReaderMetadata();
        readerMetadata.setUUID("AFTR");
        readerMetadata.setName("AbstractFileTreeReader");
        readerMetadata.setVersion("1");
        readerMetadata.setDescription("Abstract for reader of files in a directory tree");
        readerMetadata.setVendor("ctakes");
        readerMetadata.setCopyright("2017");
        return readerMetadata;
    }
}
