/*
 * Decompiled with CFR 0.152.
 */
package net.sf.okapi.steps.encodingconversion;

import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.net.URI;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import net.sf.okapi.common.BOMNewlineEncodingDetector;
import net.sf.okapi.common.Event;
import net.sf.okapi.common.HTMLCharacterEntities;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.IResource;
import net.sf.okapi.common.UsingParameters;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.exceptions.OkapiException;
import net.sf.okapi.common.exceptions.OkapiIOException;
import net.sf.okapi.common.pipeline.BasePipelineStep;
import net.sf.okapi.common.pipeline.annotations.StepParameterMapping;
import net.sf.okapi.common.pipeline.annotations.StepParameterType;
import net.sf.okapi.common.resource.RawDocument;
import net.sf.okapi.steps.encodingconversion.Parameters;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@UsingParameters(value=Parameters.class)
public class EncodingConversionStep
extends BasePipelineStep {
    private final Logger logger = LoggerFactory.getLogger(((Object)((Object)this)).getClass());
    private static final int MAXBUF = 1024;
    private Parameters params = new Parameters();
    private String outFormat;
    private CharsetEncoder outputEncoder;
    private boolean useCER;
    private CharBuffer buffer;
    private Pattern pattern;
    private Pattern xmlEncDecl;
    private Pattern xmlDecl;
    private Pattern htmlEncDecl;
    private Pattern htmlDecl;
    private Pattern htmlHead;
    private String prevBuf;
    private boolean isXML;
    private boolean isHTML;
    private URI outputURI;
    private URI inputURI;
    private String outputEncoding;
    private HTMLCharacterEntities entities = new HTMLCharacterEntities();

    @StepParameterMapping(parameterType=StepParameterType.OUTPUT_URI)
    public void setOutputURI(URI outputURI) {
        this.outputURI = outputURI;
    }

    @StepParameterMapping(parameterType=StepParameterType.INPUT_URI)
    public void setInputURI(URI inputURI) {
        this.inputURI = inputURI;
    }

    @StepParameterMapping(parameterType=StepParameterType.OUTPUT_ENCODING)
    public void setOutputEncoding(String outputEncoding) {
        this.outputEncoding = outputEncoding;
    }

    public String getDescription() {
        return "Convert the character set encoding of a text-based file. Expects: raw document. Sends back: raw document.";
    }

    public String getName() {
        return "Encoding Conversion";
    }

    public IParameters getParameters() {
        return this.params;
    }

    public void setParameters(IParameters params) {
        this.params = (Parameters)params;
    }

    protected Event handleStartBatch(Event event) {
        this.buffer = CharBuffer.allocate(1024);
        this.xmlEncDecl = Pattern.compile("((<\\?xml)(.*?)(encoding(\\s*?)=(\\s*?)(\\'|\\\")))", 32);
        this.xmlDecl = Pattern.compile("((<\\?xml)(.*?)(version(\\s*?)=(\\s*?)(\\'|\\\")))", 32);
        this.htmlEncDecl = Pattern.compile("(<meta)([^>]*?)(content)(\\s*?)=(\\s*?)[\\'|\\\"](\\s*?)text/html(\\s*?);(\\s*?)charset(\\s*?)=(\\s*?)([^\\s]+?)(\\s|\\\"|\\')", 34);
        this.htmlDecl = Pattern.compile("(<html)", 2);
        this.htmlHead = Pattern.compile("<head>", 2);
        String tmp = "";
        if (this.params.getUnescapeNCR()) {
            tmp = tmp + "&#([0-9]*?);|&#[xX]([0-9a-fA-F]*?);";
        }
        if (this.params.getUnescapeCER()) {
            if (tmp.length() > 0) {
                tmp = tmp + "|";
            }
            tmp = tmp + "(&\\w*?;)";
        }
        if (this.params.getUnescapeJava()) {
            if (tmp.length() > 0) {
                tmp = tmp + "|";
            }
            tmp = tmp + "(\\\\[Uu]([0-9a-fA-F]{1,4}))";
        }
        if (tmp.length() > 0) {
            this.pattern = Pattern.compile(tmp, 2);
            this.entities.ensureInitialization(false);
        } else {
            this.pattern = null;
        }
        this.useCER = false;
        switch (this.params.getEscapeNotation()) {
            case 3: {
                this.useCER = true;
                this.entities.ensureInitialization(false);
                this.outFormat = "&#x%X;";
                break;
            }
            case 5: {
                this.outFormat = "\\u%04x";
                break;
            }
            case 4: {
                this.outFormat = "\\u%04X";
                break;
            }
            case 2: {
                this.outFormat = "&#%d;";
                break;
            }
            case 1: {
                this.outFormat = "&#x%x;";
                break;
            }
            case 6: {
                this.outFormat = this.params.getUserFormat();
                break;
            }
            default: {
                this.outFormat = "&#x%X;";
            }
        }
        return event;
    }

    /*
     * Enabled aggressive block sorting
     * Enabled unnecessary exception pruning
     * Enabled aggressive exception aggregation
     */
    protected Event handleRawDocument(Event event) {
        RawDocument rawDoc = (RawDocument)event.getResource();
        BufferedReader reader = null;
        OutputStreamWriter writer = null;
        try {
            File outFile;
            this.isXML = false;
            this.isHTML = false;
            String ext = Util.getExtension((String)this.inputURI.getPath());
            if (!Util.isEmpty((String)ext)) {
                this.isHTML = ext.toLowerCase().indexOf(".htm") == 0;
                this.isXML = ext.equalsIgnoreCase(".xml");
            }
            InputStream is = rawDoc.getStream();
            BOMNewlineEncodingDetector detector = new BOMNewlineEncodingDetector(is, rawDoc.getEncoding());
            detector.detectAndRemoveBom();
            rawDoc.setEncoding(detector.getEncoding());
            String inputEncoding = rawDoc.getEncoding();
            if (!detector.isAutodetected()) {
                reader = new BufferedReader(rawDoc.getReader());
                reader.read(this.buffer);
                String detectedEncoding = this.checkDeclaration(inputEncoding);
                if (!detectedEncoding.equalsIgnoreCase(inputEncoding)) {
                    inputEncoding = detectedEncoding;
                }
                reader.close();
            }
            reader = new BufferedReader(rawDoc.getReader());
            this.logger.info("Input encoding: {}", (Object)inputEncoding);
            if (this.isLastOutputStep()) {
                outFile = rawDoc.createOutputFile(this.outputURI);
            } else {
                try {
                    outFile = File.createTempFile("~okapi-40_okp-enc_", ".tmp");
                }
                catch (Throwable e) {
                    throw new OkapiIOException("Cannot create temporary output.", e);
                }
            }
            writer = new OutputStreamWriter((OutputStream)new BufferedOutputStream(new FileOutputStream(outFile)), this.outputEncoding);
            this.outputEncoder = Charset.forName(this.outputEncoding).newEncoder();
            this.logger.info("Output encoding: {}", (Object)this.outputEncoding);
            Util.writeBOMIfNeeded((Writer)writer, (boolean)this.params.getBOMonUTF8(), (String)this.outputEncoding);
            CharBuffer tmpBuf = CharBuffer.allocate(1);
            boolean checkDeclaration = true;
            block11: while (true) {
                this.buffer.clear();
                if (this.prevBuf != null) {
                    this.buffer.append(this.prevBuf);
                }
                int n = reader.read(this.buffer);
                boolean needSplitCheck = true;
                if (n == -1) {
                    if (this.prevBuf == null) {
                        reader.close();
                        reader = null;
                        writer.close();
                        writer = null;
                        rawDoc.finalizeOutput();
                        RawDocument newDoc = new RawDocument(outFile.toURI(), this.outputEncoding, rawDoc.getSourceLocale(), rawDoc.getTargetLocale());
                        event.setResource((IResource)newDoc);
                        return event;
                    }
                    needSplitCheck = false;
                    this.prevBuf = null;
                    this.buffer.limit(this.buffer.position());
                }
                if (checkDeclaration) {
                    this.checkDeclaration(inputEncoding);
                    checkDeclaration = false;
                }
                if (this.pattern != null) {
                    if (needSplitCheck) {
                        this.checkSplitSequence();
                    }
                    this.unescape();
                }
                n = this.buffer.position();
                this.buffer.position(0);
                int i = 0;
                while (true) {
                    block38: {
                        boolean fallBack;
                        block32: {
                            ByteBuffer encBuf;
                            block37: {
                                block33: {
                                    block35: {
                                        block36: {
                                            boolean canEncode;
                                            block34: {
                                                if (i >= n) continue block11;
                                                canEncode = this.outputEncoder.canEncode(this.buffer.get(i));
                                                if (!canEncode && this.params.getReportUnsupported()) {
                                                    this.logger.warn(String.format("Un-supported character: U+%04X ('%c')", this.buffer.get(i), Character.valueOf(this.buffer.get(i))));
                                                }
                                                if ((!this.params.getEscapeAll() || this.buffer.get(i) <= '\u007f') && canEncode) break block33;
                                                fallBack = false;
                                                if (!this.useCER) break block34;
                                                String tmp = this.entities.getName(this.buffer.get(i));
                                                if (tmp == null) {
                                                    fallBack = true;
                                                    break block32;
                                                } else {
                                                    writer.write("&" + tmp + ";");
                                                }
                                                break block32;
                                            }
                                            if (!this.params.getUseBytes()) break block35;
                                            if (!canEncode) break block36;
                                            tmpBuf.put(0, this.buffer.get(i));
                                            tmpBuf.position(0);
                                            encBuf = this.outputEncoder.encode(tmpBuf);
                                            break block37;
                                        }
                                        fallBack = true;
                                        break block32;
                                    }
                                    writer.write(String.format(this.outFormat, this.buffer.get(i)));
                                    break block32;
                                }
                                writer.write(this.buffer.get(i));
                                break block38;
                            }
                            for (int j = 0; j < encBuf.limit(); ++j) {
                                writer.write(String.format(this.outFormat, encBuf.get(j) < 0 ? 0xFF ^ ~encBuf.get(j) : encBuf.get(j)));
                            }
                        }
                        if (fallBack) {
                            writer.write(String.format("&#x%X;", this.buffer.get(i)));
                        }
                    }
                    ++i;
                }
                break;
            }
        }
        catch (IOException e) {
            throw new OkapiException((Throwable)e);
        }
        finally {
            try {
                if (writer != null) {
                    writer.close();
                    writer = null;
                }
                if (reader != null) {
                    reader.close();
                    reader = null;
                }
            }
            catch (IOException e) {
                throw new OkapiException((Throwable)e);
            }
        }
    }

    private String checkDeclaration(String defEncoding) {
        String delim;
        this.buffer.limit(this.buffer.position());
        this.buffer.position(0);
        StringBuffer text = new StringBuffer(this.buffer.toString());
        String encoding = defEncoding;
        Matcher m = this.xmlEncDecl.matcher(text);
        if (m.find()) {
            this.isXML = true;
            delim = String.valueOf(text.charAt(m.end() - 1));
            int end = text.indexOf(delim, m.end());
            if (end != -1) {
                encoding = text.substring(m.end(), end);
                text.replace(m.end(), end, this.outputEncoding);
            }
        } else {
            m = this.xmlDecl.matcher(text);
            if (m.find()) {
                this.isXML = true;
                encoding = "UTF-8";
                delim = String.valueOf(text.charAt(m.end() - 1));
                int end = text.indexOf(delim, m.end());
                if (end != -1) {
                    text.insert(end + 1, " encoding=\"" + this.outputEncoding + "\"");
                }
            } else if (this.isXML) {
                encoding = "UTF-8";
                text.insert(0, "<?xml version=\"1.0\" encoding=\"" + this.outputEncoding + "\" ?>");
            }
        }
        m = this.htmlEncDecl.matcher(text);
        if (m.find()) {
            this.isHTML = true;
            encoding = m.group(11);
            int n = text.indexOf(encoding, m.start());
            text.replace(n, n + encoding.length(), this.outputEncoding);
        } else if (this.isHTML) {
            m = this.htmlHead.matcher(text);
            if (m.find()) {
                text.insert(m.end(), String.format("<meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"></meta>", this.outputEncoding));
            } else {
                int n;
                m = this.htmlDecl.matcher(text);
                if (m.find() && (n = text.indexOf(">", m.end())) != -1) {
                    text.insert(n + 1, String.format("<head><meta http-equiv=\"Content-Type\" content=\"text/html; charset=%s\"></meta></head>", this.outputEncoding));
                }
            }
        }
        int len = text.length();
        if (len > this.buffer.capacity()) {
            this.buffer = CharBuffer.allocate(len);
        } else {
            this.buffer.clear();
        }
        this.buffer.append(text.toString());
        this.buffer.limit(len);
        return encoding;
    }

    private void checkSplitSequence() {
        int len = this.buffer.position();
        this.buffer.position(0);
        this.prevBuf = null;
        int j = 0;
        for (int i = len - 1; i >= 0 && j < 10; ++j, --i) {
            if (this.buffer.charAt(i) != '&' && this.buffer.charAt(i) != '\\') continue;
            this.prevBuf = this.buffer.subSequence(i, len).toString();
            len = i;
            break;
        }
        this.buffer.position(len);
        this.buffer.limit(len);
    }

    private void unescape() {
        int len = this.buffer.position();
        this.buffer.position(0);
        Matcher m = this.pattern.matcher(this.buffer);
        int pos = 0;
        StringBuilder tmp = new StringBuilder(len);
        String seq = null;
        block3: while (m.find(pos)) {
            if (m.start() > pos) {
                tmp.append(this.buffer.subSequence(pos, m.start()));
            }
            pos = m.end();
            seq = m.group();
            int value = -1;
            int uIndex = seq.indexOf(117);
            if (seq.indexOf(120) == 2) {
                value = Integer.parseInt(seq.substring(3, seq.length() - 1), 16);
            } else if (uIndex == 1 && seq.charAt(uIndex - 1) == '\\') {
                value = Integer.parseInt(seq.substring(2), 16);
            } else if (seq.indexOf(35) == 1) {
                value = Integer.parseInt(seq.substring(2, seq.length() - 1));
            } else {
                seq = seq.substring(1, seq.length() - 1);
                value = this.entities.lookupName(seq);
            }
            switch (value) {
                case -1: 
                case 34: 
                case 38: 
                case 39: 
                case 60: 
                case 62: {
                    tmp.append(m.group());
                    continue block3;
                }
            }
            tmp.append((char)value);
        }
        if (seq != null) {
            if (pos < len) {
                tmp.append(this.buffer.subSequence(pos, len));
            }
            this.buffer.clear();
            this.buffer.append(tmp.toString(), 0, tmp.length());
        } else {
            this.buffer.position(len);
        }
    }
}

