/*
 * Decompiled with CFR 0.152.
 */
package de.julielab.jcore.consumer.txt;

import de.julielab.java.utilities.FileUtilities;
import de.julielab.jcore.types.Header;
import de.julielab.jcore.types.POSTag;
import de.julielab.jcore.types.Sentence;
import de.julielab.jcore.types.Token;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;
import java.util.zip.ZipEntry;
import java.util.zip.ZipException;
import java.util.zip.ZipOutputStream;
import org.apache.uima.UimaContext;
import org.apache.uima.analysis_component.JCasAnnotator_ImplBase;
import org.apache.uima.analysis_engine.AnalysisEngineProcessException;
import org.apache.uima.cas.CASException;
import org.apache.uima.cas.CASRuntimeException;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.cas.text.AnnotationIndex;
import org.apache.uima.fit.descriptor.ConfigurationParameter;
import org.apache.uima.jcas.JCas;
import org.apache.uima.jcas.JFSIndexRepository;
import org.apache.uima.jcas.cas.FSArray;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SentenceTokenConsumer
extends JCasAnnotator_ImplBase {
    public static final String PARAM_OUTPUT_DIR = "outDirectory";
    public static final String PARAM_DELIMITER = "delimiter";
    public static final String PARAM_LOWERCASE = "lowercase";
    public static final String PARAM_MODE = "mode";
    public static final String PARAM_GZIP = "gzip";
    public static final String PARAM_ZIP_ARCHIVE = "zipArchive";
    public static final String PARAM_ZIP_MAX_SIZE = "maxZipSize";
    public static final String PARAM_ZIP_PREFIX = "zipFilePrefix";
    private static final Logger LOGGER = LoggerFactory.getLogger(SentenceTokenConsumer.class);
    private static final String DEFAULT_DELIMITER = "";
    private static final boolean DEFAULT_PARAM_POS_TAG = false;
    private final byte[] linesepBytes = System.getProperty("line.separator").getBytes(StandardCharsets.UTF_8);
    int docs = 0;
    @ConfigurationParameter(name="outDirectory", description="The directory where to write the text files to.")
    private File directory;
    @ConfigurationParameter(name="delimiter", mandatory=false, description="If this parameter is given, each token will have its part of speech tag appended where the PoS tag is delimited from the token by the string given with this parameter.")
    private String delimiter;
    @ConfigurationParameter(name="lowercase", mandatory=false, defaultValue={"false"}, description="If set to true, this parameter causes all written text output to be lowercased. Defaults to false.")
    private Boolean lowercase;
    @ConfigurationParameter(name="mode", mandatory=false, description="Possible values: TOKEN and DOCUMENT. The first prints out tokens with one sentence per line, the second just prints out the CAS document text without changing it in any way.")
    private Mode mode;
    @ConfigurationParameter(name="gzip", mandatory=false, defaultValue={"false"}, description="If set to true, the output files are stored in the GZIP format. The .gz extension is automatically appended. Defaults to false.")
    private Boolean gzip;
    @ConfigurationParameter(name="zipArchive", mandatory=false, defaultValue={"false"}, description="If set to true, this parameter causes the output files to be stored in ZIP archives. The maximum size in terms of entries of each archive is given by the maxZipSize parameter and defaults to 10,000. The archive names are built using the prefix specified with the zipFilePrefix parameter followed by a serially added number and the host name.")
    private Boolean zip;
    @ConfigurationParameter(name="maxZipSize", mandatory=false, defaultValue={"10000"}, description="If the parameter zipArchive is set to true, ZIP archives will be written with a maximum number of entries to be specified with this paramter. Defaults to 10,000.")
    private Integer zipSize;
    @ConfigurationParameter(name="zipFilePrefix", mandatory=false, defaultValue={"TXTConsumerArchive"}, description="Specifies the base name for ZIP archives that are created in case the zipArchive parameter is enabled.")
    private String zipFilePrefix;
    private boolean addPOSTAG;
    private OutputStream currentArchive;
    private int archiveNumber = 1;
    private int currentArchiveSize = 0;

    public void initialize(UimaContext aContext) {
        LOGGER.info("INITIALIZING TXT Consumer ...");
        String dirName = (String)aContext.getConfigParameterValue(PARAM_OUTPUT_DIR);
        this.directory = new File(dirName);
        if (!this.directory.exists()) {
            this.directory.mkdir();
        }
        LOGGER.info("Writing txt files to output directory '" + this.directory + "'");
        this.delimiter = (String)aContext.getConfigParameterValue(PARAM_DELIMITER);
        if (this.delimiter == null) {
            this.delimiter = DEFAULT_DELIMITER;
        }
        this.lowercase = (Boolean)Optional.ofNullable(aContext.getConfigParameterValue(PARAM_LOWERCASE)).orElse(false);
        this.gzip = (Boolean)aContext.getConfigParameterValue(PARAM_GZIP);
        if (this.gzip == null) {
            this.gzip = false;
        }
        if (aContext.getConfigParameterValue(PARAM_DELIMITER) != null) {
            this.addPOSTAG = true;
            LOGGER.info("Adding POSTags ...");
        } else {
            this.addPOSTAG = false;
        }
        this.zip = (Boolean)Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ZIP_ARCHIVE)).orElse(false);
        this.zipSize = (Integer)Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ZIP_MAX_SIZE)).orElse(10000);
        this.zipFilePrefix = (String)Optional.ofNullable(aContext.getConfigParameterValue(PARAM_ZIP_PREFIX)).orElse("TXTConsumerArchive");
        String mode = (String)aContext.getConfigParameterValue(PARAM_MODE);
        if (mode == null) {
            mode = Mode.TOKEN.name();
        }
        this.mode = Mode.valueOf(mode);
    }

    private OutputStream createNextArchiveStream() throws IOException {
        File outputfile = new File(this.directory.getCanonicalPath() + File.separator + this.zipFilePrefix + this.archiveNumber + "-" + this.getHostName() + "-" + this.getPid() + "-" + Thread.currentThread().getName() + ".zip");
        if (outputfile.exists()) {
            throw new IllegalStateException("The next file to write for the current thread '" + Thread.currentThread().getName() + "' should be " + outputfile.getAbsolutePath() + ", but this file does already exist.");
        }
        this.currentArchive = new ZipOutputStream(new BufferedOutputStream(new FileOutputStream(outputfile)));
        ++this.archiveNumber;
        this.currentArchiveSize = 0;
        return this.currentArchive;
    }

    public void process(JCas jcas) throws AnalysisEngineProcessException {
        LOGGER.trace("Processing next document ... ");
        try {
            String fileId = this.getDocID(jcas);
            if (fileId == null) {
                fileId = new Integer(this.docs++).toString();
            }
            if (this.mode == Mode.TOKEN) {
                FSIterator sentenceIterator = jcas.getAnnotationIndex(Sentence.type).iterator();
                AnnotationIndex tokenIndex = jcas.getAnnotationIndex(Token.type);
                ArrayList<String> sentences = new ArrayList<String>();
                while (sentenceIterator.hasNext()) {
                    Sentence sentence = (Sentence)sentenceIterator.next();
                    FSIterator tokIterator = tokenIndex.subiterator((AnnotationFS)sentence);
                    String sentenceText = DEFAULT_DELIMITER;
                    while (tokIterator.hasNext()) {
                        if (this.addPOSTAG) {
                            sentenceText = this.returnWithPOSTAG(tokIterator, sentenceText);
                            continue;
                        }
                        sentenceText = this.returnWithoutPOSTAG(tokIterator, sentenceText);
                    }
                    sentences.add(sentenceText);
                }
                this.writeSentences2File(fileId, sentences);
            } else if (this.mode == Mode.DOCUMENT) {
                File outputFile = new File(this.directory.getCanonicalPath() + File.separator + fileId + ".txt" + (this.gzip != false ? ".gz" : DEFAULT_DELIMITER));
                LOGGER.trace("Writing the verbatim CAS document text to {}", (Object)outputFile);
                this.writeSentences2File(fileId, Arrays.asList(jcas.getDocumentText()));
            }
        }
        catch (IOException | CASException | CASRuntimeException e) {
            LOGGER.error("Error while writing: ", e);
            throw new AnalysisEngineProcessException(e);
        }
    }

    private String returnWithoutPOSTAG(FSIterator tokIterator, String sentenceText) {
        Token token = (Token)tokIterator.next();
        String tokenText = token.getCoveredText();
        sentenceText = ((String)sentenceText).equals(DEFAULT_DELIMITER) ? tokenText : (String)sentenceText + " " + tokenText;
        return sentenceText;
    }

    private String returnWithPOSTAG(FSIterator tokIterator, String sentenceText) {
        Token token = (Token)tokIterator.next();
        String tokenText = token.getCoveredText();
        POSTag posTag = null;
        FSArray postags = token.getPosTag();
        if (postags != null && postags.size() > 0) {
            posTag = (POSTag)postags.get(0);
        }
        String postagText = posTag.getValue();
        sentenceText = ((String)sentenceText).equals(DEFAULT_DELIMITER) ? tokenText + this.delimiter + postagText : (String)sentenceText + " " + tokenText + this.delimiter + postagText;
        return sentenceText;
    }

    public String getDocID(JCas jcas) throws CASException {
        String docID = null;
        JFSIndexRepository indexes = jcas.getJFSIndexRepository();
        for (Header h : indexes.getAnnotationIndex(Header.type)) {
            docID = h.getDocId();
        }
        return docID;
    }

    public void collectionProcessComplete() throws AnalysisEngineProcessException {
        if (this.currentArchive != null) {
            try {
                this.currentArchive.close();
            }
            catch (IOException e) {
                throw new AnalysisEngineProcessException();
            }
        }
    }

    /*
     * WARNING - Removed try catching itself - possible behaviour change.
     */
    private void writeSentences2File(String fileId, List<String> sentences) throws IOException {
        OutputStream os = null;
        boolean zipContentWritten = false;
        try {
            File outputFile = new File(this.directory.getCanonicalPath() + File.separator + fileId + ".txt" + (this.gzip != false ? ".gz" : DEFAULT_DELIMITER));
            OutputStream outputStream = os = this.zip != false ? this.currentArchive : FileUtilities.getOutputStreamToFile((File)outputFile);
            if (this.zip.booleanValue()) {
                if (os == null) {
                    os = this.createNextArchiveStream();
                }
                try {
                    ((ZipOutputStream)os).putNextEntry(new ZipEntry(outputFile.getName()));
                    zipContentWritten = true;
                }
                catch (ZipException e) {
                    if (e.getMessage().contains("duplicate")) {
                        LOGGER.warn("The file {} is already present in the current ZIP archive. Thus, the current file is omitted.", (Object)outputFile.getName());
                    }
                    throw e;
                }
            }
            if (!this.zip.booleanValue() || zipContentWritten) {
                for (String text : sentences) {
                    byte[] bytes = this.lowercase != false ? text.toLowerCase().getBytes(StandardCharsets.UTF_8) : text.getBytes(StandardCharsets.UTF_8);
                    os.write(bytes, 0, bytes.length);
                    os.write(this.linesepBytes, 0, this.linesepBytes.length);
                }
            }
        }
        finally {
            if (zipContentWritten) {
                ((ZipOutputStream)os).closeEntry();
                ++this.currentArchiveSize;
                if (this.currentArchiveSize >= this.zipSize) {
                    os.close();
                    this.createNextArchiveStream();
                }
            } else if (!this.zip.booleanValue()) {
                os.close();
            }
        }
    }

    private long getPid() {
        return ProcessHandle.current().pid();
    }

    private String getHostName() {
        String hostName;
        try {
            InetAddress address = InetAddress.getLocalHost();
            hostName = address.getHostName();
        }
        catch (UnknownHostException e) {
            throw new IllegalStateException(e);
        }
        return hostName;
    }

    private static enum Mode {
        TOKEN,
        DOCUMENT;

    }
}

