/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.pipe;

import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.util.Replacement;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;

public class NGramPreprocessor
extends Pipe
implements Serializable {
    public ArrayList<ReplacementSet> replacementSets = new ArrayList();
    private static final long serialVersionUID = 1L;
    private static final int CURRENT_SERIAL_VERSION = 1;

    public int loadReplacements(String filename) throws IOException {
        String line;
        BufferedReader in = new BufferedReader(new FileReader(filename));
        ReplacementSet set = new ReplacementSet();
        int totalReplacements = 0;
        while ((line = in.readLine()) != null) {
            set.addReplacement(new Replacement(line));
            ++totalReplacements;
        }
        in.close();
        this.replacementSets.add(set);
        return totalReplacements;
    }

    public int loadDeletions(String filename) throws IOException {
        String line;
        BufferedReader in = new BufferedReader(new FileReader(filename));
        ReplacementSet set = new ReplacementSet();
        int totalReplacements = 0;
        while ((line = in.readLine()) != null) {
            set.addReplacement(new Replacement(line, ""));
            ++totalReplacements;
        }
        in.close();
        this.replacementSets.add(set);
        return totalReplacements;
    }

    @Override
    public Instance pipe(Instance instance) {
        String input = (String)instance.getData();
        input = input.toLowerCase();
        input = input.replaceAll("&apos;", "'");
        input = input.replaceAll("&quot;", "\"");
        input = input.replaceAll("&gt;", "<");
        input = input.replaceAll("&lt;", ">");
        input = input.replaceAll("[^\\p{L}\\p{N}\\-\\']", " ");
        input = input.replaceAll("\\s+", " ");
        for (ReplacementSet set : this.replacementSets) {
            input = set.applyReplacements(input);
        }
        instance.setData(input);
        return instance;
    }

    private void writeObject(ObjectOutputStream out) throws IOException {
        out.writeInt(1);
        out.writeObject(this.replacementSets);
    }

    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
        int version2 = in.readInt();
        this.replacementSets = (ArrayList)in.readObject();
    }

    public class ReplacementSet
    implements Serializable {
        HashMap<String, ArrayList<Replacement>> replacementIndex = new HashMap();
        private static final long serialVersionUID = 1L;
        private static final int CURRENT_SERIAL_VERSION = 1;

        public void addReplacement(Replacement replacement) {
            String key = replacement.getFirstToken();
            if (!this.replacementIndex.containsKey(key)) {
                this.replacementIndex.put(key, new ArrayList());
            }
            this.replacementIndex.get(key).add(replacement);
        }

        public String applyReplacements(String input) {
            String[] tokens = input.split(" ");
            StringBuilder output = new StringBuilder();
            int position = 0;
            while (position < tokens.length) {
                String token = tokens[position];
                int initialPosition = position;
                if (this.replacementIndex.containsKey(token)) {
                    for (Replacement replacement : this.replacementIndex.get(token)) {
                        position = replacement.apply(tokens, position, output);
                        if (position > initialPosition) break;
                    }
                }
                if (position != initialPosition) continue;
                output.append(String.valueOf(token) + " ");
                ++position;
            }
            return output.toString();
        }

        private void writeObject(ObjectOutputStream out) throws IOException {
            out.writeInt(1);
            out.writeObject(this.replacementIndex);
        }

        private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
            int version2 = in.readInt();
            this.replacementIndex = (HashMap)in.readObject();
        }
    }
}

