/*
 * Decompiled with CFR 0.152.
 */
package edu.northwestern.at.morphadorner.tools.addcharacteroffsets;

import edu.northwestern.at.utils.CharUtils;
import edu.northwestern.at.utils.FileUtils;
import edu.northwestern.at.utils.PatternReplacer;
import edu.northwestern.at.utils.UnicodeReader;
import java.io.BufferedOutputStream;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;

public class AddCharacterOffsets {
    protected static final String LINE_SEPARATOR = System.getProperty("line.separator");
    protected static String wPattern = "^(.*)<w (.*)>(.*)</w>(.*)$";
    protected static PatternReplacer wreplacer = new PatternReplacer(wPattern, "");
    protected static final int LEFT = 1;
    protected static final int ATTRS = 2;
    protected static final int WORD = 3;
    protected static final int RIGHT = 4;
    protected static String cPattern = "^(.*)<c>(.*)</c>$";
    protected static final int CLEFT = 1;
    protected static final int CDATA = 2;
    protected static final int CRIGHT = 3;
    protected static PatternReplacer creplacer = new PatternReplacer(cPattern, "");
    protected static final int MAXLINEWIDTH = 80;

    public static void main(String[] args) {
        if (args.length >= 3) {
            new AddCharacterOffsets(args);
        } else {
            AddCharacterOffsets.displayUsage();
            System.exit(1);
        }
    }

    public static void displayUsage() {
        System.out.println();
        System.out.println("Usage:");
        System.out.println();
        System.out.println("java edu.northwestern.at.morphadorner.tools.addcharacteroffsets.AddCharacterOffsets adornedinput.xml adornedoutput.xml unadornedoutput.xml");
        System.out.println();
        System.out.println("adornedinput.xml -- Standard MorphAdorner adorned output file.");
        System.out.println("adornedoutput.xml -- Derived adorned file with character");
        System.out.println("offsets added to tags.");
        System.out.println("unadornedoutput.xml -- Derived unadorned file whose word");
        System.out.println("offsets are given in adornedoutput.xml file.");
        System.out.println();
        System.out.println("The derived adorned output file adornedoutput.xml adds a");
        System.out.println("cof= attribute to each <w> tag. The cof= attribute");
        System.out.println("specifies the character (not byte) offset of each word in");
        System.out.println("the unadornedoutput.xml file. The latter file removes the");
        System.out.println("<w> and <c> tags from the adorned input file and outputs");
        System.out.println("the word and whitespace text as specified by the <w> and");
        System.out.println("<c> tags.");
    }

    public AddCharacterOffsets(String[] args) {
        String inputFileName = args[0];
        String longOutputFileName = args[1];
        String shortOutputFileName = args[2];
        try {
            UnicodeReader streamReader = new UnicodeReader(new FileInputStream(new File(inputFileName)), "utf-8");
            BufferedReader in = new BufferedReader(streamReader);
            FileOutputStream outputStream = new FileOutputStream(longOutputFileName, false);
            BufferedOutputStream bufferedStream = new BufferedOutputStream(outputStream);
            OutputStreamWriter writer = new OutputStreamWriter((OutputStream)bufferedStream, "utf-8");
            PrintWriter longPrintWriter = new PrintWriter(writer);
            String line = in.readLine();
            StringBuffer sb = new StringBuffer();
            int charPos = 0;
            boolean needEOL = false;
            boolean needBlanks = false;
            int lineWidth = 0;
            int lastLineWidth = 0;
            boolean firstWord = true;
            while (line != null) {
                String[] groupValues;
                int wPos = line.indexOf("<w ");
                int cPos = line.indexOf("<c>");
                if (wPos >= 0) {
                    groupValues = wreplacer.matchGroups(line);
                    String wordText = groupValues[3];
                    if (needBlanks) {
                        if (CharUtils.isPunctuation(wordText) && !firstWord) {
                            sb.setLength(sb.length() - LINE_SEPARATOR.length());
                            lineWidth = lastLineWidth;
                        } else {
                            sb.append(groupValues[1]);
                            needBlanks = false;
                            lineWidth += groupValues[1].length();
                        }
                    }
                    charPos = sb.length();
                    line = groupValues[1] + "<w " + groupValues[2] + " cof=\"" + charPos + "\"" + ">" + groupValues[3] + "</w>" + groupValues[4];
                    sb.append(wordText);
                    needEOL = true;
                    if ((lineWidth += wordText.length()) > 80) {
                        sb.append(LINE_SEPARATOR);
                        lastLineWidth = lineWidth;
                        lineWidth = 0;
                        needBlanks = true;
                    }
                    firstWord = false;
                } else if (cPos >= 0) {
                    groupValues = creplacer.matchGroups(line);
                    if (needBlanks) {
                        sb.append(groupValues[1]);
                        needBlanks = false;
                        lineWidth += groupValues[1].length();
                    } else {
                        sb.append(groupValues[2]);
                    }
                    needEOL = true;
                    if ((lineWidth += groupValues[2].length()) > 80) {
                        sb.append(LINE_SEPARATOR);
                        lastLineWidth = lineWidth;
                        lineWidth = 0;
                        needBlanks = true;
                    }
                } else {
                    if (needEOL) {
                        sb.append(LINE_SEPARATOR);
                        needEOL = false;
                    }
                    sb.append(line);
                    sb.append(LINE_SEPARATOR);
                    needBlanks = true;
                    lastLineWidth = 0;
                    lineWidth = 0;
                    firstWord = true;
                }
                longPrintWriter.println(line);
                line = in.readLine();
            }
            in.close();
            longPrintWriter.close();
            FileUtils.writeTextFile(shortOutputFileName, false, sb.toString(), "utf-8");
        }
        catch (Exception e) {
            e.printStackTrace();
        }
    }
}

