/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.reader.file.main;

import de.julielab.jcore.types.Date;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.types.Token;
import de.julielab.jcore.types.pubmed.Header;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.Serializable;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.Optional;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import org.apache.commons.io.FileUtils;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.CASException;
import org.apache.uima.collection.CollectionException;
import org.apache.uima.collection.CollectionReader_ImplBase;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.resource.ResourceInitializationException;
import org.apache.uima.util.Progress;
import org.apache.uima.util.ProgressImpl;

public class FileReader
extends CollectionReader_ImplBase {
    public static final String DIRECTORY_INPUT = "InputDirectory";
    public static final String FILENAME_AS_DOC_ID = "UseFilenameAsDocId";
    public static final String PUBLICATION_DATES_FILE = "PublicationDatesFile";
    public static final String ALLOWED_FILE_EXTENSIONS = "AllowedFileExtensions";
    public static final String SENTENCE_PER_LINE = "SentencePerLine";
    public static final String TOKEN_BY_TOKEN = "TokenByToken";
    public static final String FILE_NAME_SPLIT_UNDERSCORE = "FileNameSplitUnderscore";
    public static final String DIRECTORY_SUBDIRS = "ReadSubDirs";
    public static final String DIRECTORY_ORIG_FILES = "OriginalFolder";
    public static final String ORIG_FILES_EXT = "OriginalFileExt";
    private ArrayList<File> files;
    private int fileIndex;
    @ConfigurationParameter(name="InputDirectory", mandatory=true)
    private File inputDirectory;
    @ConfigurationParameter(name="UseFilenameAsDocId", mandatory=false)
    private boolean useFilenameAsDocId;
    @ConfigurationParameter(name="PublicationDatesFile", mandatory=false)
    private File publicationDatesFile;
    @ConfigurationParameter(name="SentencePerLine", mandatory=false)
    private boolean sentencePerLine;
    @ConfigurationParameter(name="TokenByToken", mandatory=false)
    private boolean tokenByToken;
    @ConfigurationParameter(name="FileNameSplitUnderscore", mandatory=false)
    private boolean fileNameSplitUnderscore;
    @ConfigurationParameter(name="AllowedFileExtensions", mandatory=false)
    private String[] allowedExtensionsArray;
    @ConfigurationParameter(name="ReadSubDirs", mandatory=false)
    private boolean useSubDirs;
    @ConfigurationParameter(name="OriginalFolder", mandatory=false)
    private File origFolder;
    @ConfigurationParameter(name="OriginalFileExt", mandatory=false)
    private String origFileExt;

    public void initialize() throws ResourceInitializationException {
        Boolean subdir;
        Boolean spl;
        this.inputDirectory = new File(((String)this.getConfigParameterValue(DIRECTORY_INPUT)).trim());
        if (this.getConfigParameterValue(PUBLICATION_DATES_FILE) != null) {
            this.publicationDatesFile = new File(((String)this.getConfigParameterValue(PUBLICATION_DATES_FILE)).trim());
        }
        this.sentencePerLine = (spl = (Boolean)this.getConfigParameterValue(SENTENCE_PER_LINE)) == null ? false : spl;
        Boolean tokspl = (Boolean)this.getConfigParameterValue(TOKEN_BY_TOKEN);
        this.tokenByToken = null == tokspl ? false : tokspl;
        Boolean fnsu = (Boolean)this.getConfigParameterValue(FILE_NAME_SPLIT_UNDERSCORE);
        this.fileNameSplitUnderscore = null == fnsu ? false : fnsu;
        Boolean filenameAsDocId = (Boolean)this.getConfigParameterValue(FILENAME_AS_DOC_ID);
        this.useFilenameAsDocId = null == filenameAsDocId ? false : filenameAsDocId;
        this.allowedExtensionsArray = (String[])this.getConfigParameterValue(ALLOWED_FILE_EXTENSIONS);
        HashSet<String> allowedExtensions = new HashSet<String>();
        if (null != this.allowedExtensionsArray) {
            for (int i = 0; i < this.allowedExtensionsArray.length; ++i) {
                String allowedExtension = this.allowedExtensionsArray[i];
                allowedExtensions.add(allowedExtension);
            }
        }
        this.useSubDirs = null == (subdir = (Boolean)this.getConfigParameterValue(DIRECTORY_SUBDIRS)) ? false : subdir;
        String sentfoo = (String)this.getConfigParameterValue(DIRECTORY_ORIG_FILES);
        this.origFolder = null == sentfoo ? null : new File(sentfoo.trim());
        String sentfile_ext = (String)this.getConfigParameterValue(ORIG_FILES_EXT);
        if (null == sentfile_ext) {
            this.origFileExt = "txt";
        } else {
            this.origFileExt = sentfile_ext;
            if (sentfile_ext.startsWith(".")) {
                this.origFileExt = sentfile_ext.substring(1);
            }
        }
        if (!this.inputDirectory.exists()) {
            throw new ResourceInitializationException("annotator_resource_not_found", new Object[]{this.inputDirectory.getAbsolutePath()});
        }
        this.fileIndex = 0;
        this.files = new ArrayList();
        try {
            this.createFileListByType(this.inputDirectory, allowedExtensions);
        }
        catch (IOException e) {
            e.printStackTrace();
        }
    }

    public boolean hasNext() {
        return this.fileIndex < this.files.size();
    }

    public void getNext(CAS aCAS) throws IOException, CollectionException {
        Serializable p;
        Object sent;
        JCas jcas;
        try {
            jcas = aCAS.getJCas();
        }
        catch (CASException e) {
            throw new CollectionException((Throwable)e);
        }
        File file = this.files.get(this.fileIndex++);
        String text = FileUtils.readFileToString((File)file, (String)"UTF-8");
        Pattern nws = Pattern.compile("[^\\s]+", 258);
        String origText = null;
        if (this.origFolder != null) {
            File origFile = new File(this.origFolder, this.getFileName(file) + "." + this.origFileExt);
            origText = FileUtils.readFileToString((File)origFile, (String)"UTF-8");
        }
        if (this.sentencePerLine) {
            String line;
            BufferedReader rdr = new BufferedReader(new StringReader(text));
            ArrayList<String> lines = new ArrayList<String>();
            ArrayList<Integer> start = new ArrayList<Integer>();
            ArrayList<Integer> end = new ArrayList<Integer>();
            Integer tmp = 0;
            while ((line = rdr.readLine()) != null) {
                if (!Pattern.matches("\\s*", line)) {
                    lines.add(line);
                    start.add(tmp);
                    end.add(tmp + line.length());
                }
                tmp = tmp + (line.length() + 1);
            }
            rdr.close();
            int index_tmp = 0;
            Integer i = 0;
            while (i < lines.size()) {
                boolean addSent2index22 = true;
                sent = new Sentence(jcas);
                if (origText != null) {
                    Optional<String> newLine = Stream.of(((String)lines.get(i)).split("\\s+")).map(x -> Pattern.quote(x)).reduce((x, y) -> x + "\\s*" + y);
                    p = Pattern.compile(newLine.get(), 256);
                    Matcher m = ((Pattern)p).matcher(origText);
                    if (m.find(index_tmp)) {
                        int newStart = m.start();
                        int newEnd = m.end();
                        index_tmp = m.end() + 1;
                        sent.setBegin(newStart);
                        sent.setEnd(newEnd);
                    } else {
                        addSent2index22 = false;
                    }
                } else {
                    sent.setBegin(((Integer)start.get(i)).intValue());
                    sent.setEnd(((Integer)end.get(i)).intValue());
                }
                sent.setComponentId(((Object)((Object)this)).getClass().getName() + " : Sentence per Line Mode");
                if (addSent2index22) {
                    sent.addToIndexes();
                }
                Integer addSent2index22 = i;
                i = i + 1;
                sent = i;
            }
        }
        if (this.tokenByToken) {
            ArrayList<String> tokensList = new ArrayList<String>();
            ArrayList<Integer> tokStart = new ArrayList<Integer>();
            ArrayList<Integer> tokEnd = new ArrayList<Integer>();
            Integer tmpTok = 0;
            Integer globalNumberOfToken = 0;
            Integer numberOfTokens = 0;
            Matcher m = nws.matcher(text);
            while (m.find()) {
                String token = m.group();
                int start = m.start();
                int end = m.end();
                tokensList.add(token);
                tokStart.add(start);
                tokEnd.add(end);
                sent = numberOfTokens;
                numberOfTokens = numberOfTokens + 1;
                p = numberOfTokens;
            }
            globalNumberOfToken = globalNumberOfToken + numberOfTokens;
            int index_tmp = 0;
            Integer j = 0;
            while (j < tokensList.size()) {
                boolean addToken2index = true;
                Token token = new Token(jcas);
                if (origText != null) {
                    int newEnd;
                    String tok = (String)tokensList.get(j);
                    int newStart = origText.indexOf(tok, index_tmp);
                    index_tmp = newEnd = newStart + tok.length();
                    token.setBegin(newStart);
                    token.setEnd(newEnd);
                } else {
                    token.setBegin(((Integer)tokStart.get(j)).intValue());
                    token.setEnd(((Integer)tokEnd.get(j)).intValue());
                }
                token.setComponentId(((Object)((Object)this)).getClass().getName() + " : Tokenized Mode");
                if (addToken2index) {
                    token.addToIndexes();
                }
                Integer n = j;
                Integer n2 = j = Integer.valueOf(j + 1);
            }
        }
        if (origText != null) {
            jcas.setDocumentText(origText);
        } else {
            jcas.setDocumentText(text);
        }
        if (this.useFilenameAsDocId) {
            String filename = this.getFileName(file);
            Header header = new Header(jcas);
            header.setDocId(filename);
            this.addDateForID(header, jcas, filename);
            header.addToIndexes();
        }
    }

    private void addDateForID(Header header, JCas jCas, String id) {
        if (this.publicationDatesFile != null && this.publicationDatesFile.exists() && this.publicationDatesFile.isFile()) {
            try {
                BufferedReader br = new BufferedReader(new java.io.FileReader(this.publicationDatesFile));
                String line = "";
                while ((line = br.readLine()) != null) {
                    String[] tokens = line.split("\\s+");
                    if (tokens.length != 2 || !tokens[0].equals(id) || tokens[1].length() != 7) continue;
                    Date pubDate = new Date(jCas);
                    int year = 0;
                    int month = 0;
                    try {
                        year = Integer.parseInt(tokens[1].substring(0, 4));
                        month = Integer.parseInt(tokens[1].substring(5));
                    }
                    catch (NumberFormatException numberFormatException) {
                        // empty catch block
                    }
                    if (month != 0) {
                        pubDate.setMonth(month);
                    }
                    if (year != 0) {
                        pubDate.setYear(year);
                        pubDate.addToIndexes();
                    }
                    break;
                }
            }
            catch (FileNotFoundException e) {
                e.printStackTrace();
            }
            catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public void close() throws IOException {
    }

    public Progress[] getProgress() {
        return new Progress[]{new ProgressImpl(this.fileIndex, this.files.size(), "entities")};
    }

    private String[] createFileListByType(File inputDirectory, Set<String> allowedExtensions) throws IOException {
        String[] path = new File(inputDirectory.getPath()).list();
        for (int i = 0; i < path.length; ++i) {
            File file = new File(inputDirectory.getAbsolutePath() + "/" + path[i]);
            if (!this.useSubDirs && file.isDirectory()) continue;
            String CurrentExtension = path[i].substring(path[i].lastIndexOf(46) + 1);
            if (allowedExtensions.isEmpty() || allowedExtensions.contains(CurrentExtension)) {
                this.files.add(file);
            }
            if (!this.useSubDirs || !file.isDirectory()) continue;
            this.createFileListByType(file, allowedExtensions);
        }
        return path;
    }

    private String getFileName(File fi) {
        int extUnderScoreIndex;
        String filename = fi.getName();
        int extDotIndex = filename.lastIndexOf(46);
        if (extDotIndex > 0) {
            filename = filename.substring(0, extDotIndex);
        }
        if (this.fileNameSplitUnderscore && (extUnderScoreIndex = filename.lastIndexOf(95)) > 0) {
            filename = filename.substring(0, extUnderScoreIndex);
        }
        return filename;
    }
}

