package org.apache.uima.annotator.dict_annot.dictionary.impl;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.StringTokenizer;
import org.apache.uima.UIMAFramework;
import org.apache.uima.analysis_engine.AnalysisEngine;
import org.apache.uima.cas.CAS;
import org.apache.uima.cas.FSIterator;
import org.apache.uima.cas.Type;
import org.apache.uima.cas.text.AnnotationFS;
import org.apache.uima.internal.util.CommandLineParser;
import org.apache.uima.pear.tools.PackageInstaller;
import org.apache.uima.util.FileUtils;
import org.apache.uima.util.XMLInputSource;

/* loaded from: input_file:org/apache/uima/annotator/dict_annot/dictionary/impl/DictionaryCreator.class */
public class DictionaryCreator {
    private static final String INPUT_FILE_PARAM = "-input";
    private static final String OUTPUT_FILE_PARAM = "-output";
    private static final String INPUT_FILE_ENCODING_PARAM = "-encoding";
    private static final String INPUT_FILE_LANGUAGE_PARAM = "-lang";
    private static final String TOKENIZER_PARAM = "-tokenizer";
    private static final String TOKEN_TYPE_PARAM = "-tokenType";
    private static final String SEPARATOR_CHAR_PARAM = "-separator";
    private static final String SEPARATOR_CHAR = " ";
    private static final String OUTPUT_SEPARATOR_CHAR = "|";
    private static HashMap<Character, String> entities = new HashMap<>(5);

    private static final CommandLineParser createCmdLineParser() {
        CommandLineParser commandLineParser = new CommandLineParser();
        commandLineParser.addParameter(INPUT_FILE_PARAM, true);
        commandLineParser.addParameter(INPUT_FILE_LANGUAGE_PARAM, true);
        commandLineParser.addParameter(OUTPUT_FILE_PARAM, true);
        commandLineParser.addParameter(INPUT_FILE_ENCODING_PARAM, true);
        commandLineParser.addParameter(TOKENIZER_PARAM, true);
        commandLineParser.addParameter(TOKEN_TYPE_PARAM, true);
        commandLineParser.addParameter(SEPARATOR_CHAR_PARAM, true);
        return commandLineParser;
    }

    private static final void printUsage() {
        System.out.println("Usage: java org.apache.uima.annotator.dict_annot.dictionary.impl.DictionaryCreator -input <InputFile> -encoding <InputFileEncoding> -output <OutputFile> [-tokenizer <TokenizerPear> -tokenType <tokenType>] [-separator <separatorChar>] ");
        System.out.println("Additional optional parameters:");
        System.out.println("  -lang <dictionaryLanguage>");
    }

    private static final boolean checkCmdLineSyntax(CommandLineParser commandLineParser) {
        boolean z = false;
        if (!commandLineParser.isInArgsList(INPUT_FILE_PARAM)) {
            System.err.println("InputFile parameter -input is missing");
            z = true;
        }
        if (!commandLineParser.isInArgsList(INPUT_FILE_ENCODING_PARAM)) {
            System.err.println("InputFile encoding parameter -encoding is missing");
            z = true;
        }
        if (!commandLineParser.isInArgsList(OUTPUT_FILE_PARAM)) {
            System.err.println("OutputFile parameter -output is missing");
            z = true;
        }
        if (commandLineParser.isInArgsList(TOKENIZER_PARAM) && !commandLineParser.isInArgsList(TOKEN_TYPE_PARAM)) {
            System.err.println("If a tokenizer is used, the -tokenType paramter must be specified");
            z = true;
        }
        return !z;
    }

    public static void main(String[] strArr) {
        CommandLineParser createCmdLineParser = createCmdLineParser();
        try {
            createCmdLineParser.parseCmdLine(strArr);
            if (!checkCmdLineSyntax(createCmdLineParser)) {
                printUsage();
                System.exit(-1);
            }
        } catch (Exception e) {
            System.err.println("Error parsing command line: " + e.getMessage());
        }
        String paramArgument = createCmdLineParser.getParamArgument(INPUT_FILE_PARAM);
        String paramArgument2 = createCmdLineParser.getParamArgument(INPUT_FILE_LANGUAGE_PARAM);
        String paramArgument3 = createCmdLineParser.getParamArgument(INPUT_FILE_ENCODING_PARAM);
        String paramArgument4 = createCmdLineParser.getParamArgument(OUTPUT_FILE_PARAM);
        try {
            createDictionary(paramArgument, paramArgument3, paramArgument4, paramArgument2, createCmdLineParser.getParamArgument(TOKENIZER_PARAM), createCmdLineParser.getParamArgument(TOKEN_TYPE_PARAM), createCmdLineParser.getParamArgument(SEPARATOR_CHAR_PARAM));
            System.out.println("The dictionary was sucessfully created at: " + paramArgument4);
        } catch (Exception e2) {
            e2.printStackTrace();
        }
    }

    public static boolean createDictionary(String str, String str2, String str3, String str4, String str5, String str6, String str7) throws Exception {
        ArrayList files;
        String str8 = OUTPUT_SEPARATOR_CHAR;
        if (!new File(str).canRead()) {
            throw new Exception("Error: Input file " + str + " cannot be read!");
        }
        AnalysisEngine analysisEngine = null;
        Type type = null;
        CAS cas = null;
        File file = null;
        if (str5 != null) {
            File file2 = new File(str5);
            if (!file2.canRead()) {
                throw new Exception("Error: Tokenizer file " + str5 + " cannot be read!");
            }
            if (str6 == null) {
                throw new Exception("Error: Tokenizer tokenType not specified");
            }
            try {
                file = new File(System.getProperty("java.io.tmpdir"), "~tokenizer_temp_install");
                file.deleteOnExit();
                file.mkdir();
                analysisEngine = UIMAFramework.produceAnalysisEngine(UIMAFramework.getXMLParser().parseResourceSpecifier(new XMLInputSource(PackageInstaller.installPackage(file, file2, true).getComponentPearDescPath())));
                cas = analysisEngine.newCAS();
                type = cas.getTypeSystem().getType(str6);
            } catch (Exception e) {
                throw new Exception("Error creating tokenizer: " + e.getMessage(), e);
            }
        }
        if (str7 == null) {
            str7 = SEPARATOR_CHAR;
        } else {
            str8 = str7;
        }
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(new FileInputStream(str), str2));
        BufferedWriter bufferedWriter = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(str3), "UTF-8"));
        bufferedWriter.write("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n");
        bufferedWriter.write("<dictionary xmlns=\"http://incubator.apache.org/uima\" xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\" xsi:schemaLocation=\"dictionary.xsd\">\n");
        bufferedWriter.write("<typeCollection>\n");
        bufferedWriter.write("<dictionaryMetaData caseNormalization=\"true\" multiWordEntries=\"true\" multiWordSeparator=\"" + str8 + "\"/>\n");
        if (str4 != null) {
            bufferedWriter.write("<languageId>" + str4 + "</languageId>\n");
        }
        bufferedWriter.write("<typeDescription>\n");
        bufferedWriter.write("<typeName> ADD DICTIONARY OUTPUT TYPE HERE</typeName>\n");
        bufferedWriter.write("</typeDescription>\n");
        bufferedWriter.write("<entries>\n");
        String readLine = bufferedReader.readLine();
        while (true) {
            String str9 = readLine;
            if (str9 == null) {
                break;
            }
            StringBuffer stringBuffer = new StringBuffer();
            if (analysisEngine != null) {
                cas.setDocumentText(str9);
                if (str4 != null) {
                    cas.setDocumentLanguage(str4);
                }
                analysisEngine.process(cas);
                FSIterator it = cas.getAnnotationIndex(type).iterator();
                while (it.hasNext()) {
                    stringBuffer.append(((AnnotationFS) it.next()).getCoveredText());
                    stringBuffer.append(str8);
                }
                cas.reset();
            } else {
                StringTokenizer stringTokenizer = new StringTokenizer(str9, str7);
                while (stringTokenizer.hasMoreTokens()) {
                    stringBuffer.append(stringTokenizer.nextToken());
                    stringBuffer.append(str8);
                }
            }
            String trim = stringBuffer.toString().trim();
            if (trim.endsWith(str8)) {
                trim = trim.substring(0, trim.length() - str8.length());
            }
            String replaceXMLEntities = replaceXMLEntities(trim);
            bufferedWriter.write("<entry>\n");
            bufferedWriter.write("<key>" + replaceXMLEntities + "</key>\n");
            bufferedWriter.write("</entry>\n");
            readLine = bufferedReader.readLine();
        }
        bufferedReader.close();
        bufferedWriter.write("</entries>\n");
        bufferedWriter.write("</typeCollection>\n");
        bufferedWriter.write("</dictionary>\n");
        bufferedWriter.close();
        if (file == null) {
            return true;
        }
        FileUtils.deleteRecursive(file);
        if (file == null || (files = FileUtils.getFiles(file, true)) == null) {
            return true;
        }
        for (int i = 0; i < files.size(); i++) {
            ((File) files.get(i)).deleteOnExit();
        }
        return true;
    }

    private static String replaceXMLEntities(String str) {
        StringBuffer stringBuffer = new StringBuffer();
        for (int i = 0; i < str.length(); i++) {
            char charAt = str.charAt(i);
            if (entities.containsKey(Character.valueOf(charAt))) {
                stringBuffer.append(entities.get(Character.valueOf(charAt)));
            } else {
                stringBuffer.append(charAt);
            }
        }
        return stringBuffer.toString();
    }

    static {
        entities.put('<', "&lt;");
        entities.put('>', "&gt;");
        entities.put('&', "&amp;");
        entities.put('\"', "&quot;");
        entities.put('\'', "&apos;");
    }
}
