/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.pipe;

import cc.mallet.pipe.Pipe;
import cc.mallet.types.Instance;
import cc.mallet.types.Token;
import cc.mallet.types.TokenSequence;
import cc.mallet.util.CharSequenceLexer;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class SelectiveSGML2TokenSequence
extends Pipe
implements Serializable {
    Pattern sgmlPattern = Pattern.compile("</?([^>]*)>");
    CharSequenceLexer lexer;
    String backgroundTag;
    Set allowedTags;
    private static final long serialVersionUID = 1L;
    private static final int CURRENT_SERIAL_VERSION = 0;

    public SelectiveSGML2TokenSequence(CharSequenceLexer lexer2, String backgroundTag, Set allowed) {
        this.lexer = lexer2;
        this.backgroundTag = backgroundTag;
        this.allowedTags = allowed;
    }

    public SelectiveSGML2TokenSequence(String regex, String backgroundTag, Set allowed) {
        this(new CharSequenceLexer(regex), backgroundTag, allowed);
    }

    public SelectiveSGML2TokenSequence(Set allowed) {
        this(new CharSequenceLexer(), "O", allowed);
    }

    public SelectiveSGML2TokenSequence(CharSequenceLexer lex, Set allowed) {
        this(lex, "O", allowed);
    }

    @Override
    public Instance pipe(Instance carrier) {
        if (!(carrier.getData() instanceof CharSequence)) {
            throw new ClassCastException("carrier.data is a " + carrier.getData().getClass().getName() + " not a CharSequence");
        }
        TokenSequence dataTokens = new TokenSequence();
        TokenSequence targetTokens = new TokenSequence();
        CharSequence string = (CharSequence)carrier.getData();
        String tag = this.backgroundTag;
        String nextTag = this.backgroundTag;
        Matcher m3 = this.sgmlPattern.matcher(string);
        int textStart = 0;
        int textEnd = 0;
        int nextStart = 0;
        boolean done = false;
        while (!done) {
            boolean bl = done = !this.findNextValidMatch(m3);
            if (done) {
                textEnd = string.length() - 1;
            } else {
                String sgml = m3.group();
                int groupCount = m3.groupCount();
                if (sgml.charAt(1) == '/') {
                    nextTag = this.backgroundTag;
                } else {
                    nextTag = m3.group(0);
                    nextTag = sgml.substring(1, sgml.length() - 1);
                }
                nextStart = m3.end();
                textEnd = m3.start();
            }
            if (textEnd - textStart > 0) {
                this.lexer.setCharSequence(string.subSequence(textStart, textEnd));
                while (this.lexer.hasNext()) {
                    dataTokens.add(new Token((String)this.lexer.next()));
                    targetTokens.add(new Token(tag));
                }
            }
            textStart = nextStart;
            tag = nextTag;
        }
        carrier.setData(dataTokens);
        carrier.setTarget(targetTokens);
        carrier.setSource(dataTokens);
        return carrier;
    }

    private boolean findNextValidMatch(Matcher m3) {
        if (!m3.find()) {
            return false;
        }
        String sgml = m3.group();
        int start = m3.start();
        int first = 1;
        int last = sgml.length() - 1;
        if (sgml.charAt(1) == '/') {
            first = 2;
        }
        if (this.allowedTags.contains(sgml = sgml.substring(first, last))) {
            m3.find(start);
            return true;
        }
        return this.findNextValidMatch(m3);
    }

    public String toString() {
        String ret = "sgml pattern: " + this.sgmlPattern.toString();
        ret = String.valueOf(ret) + "\nlexer: " + this.lexer.getPattern().toString();
        ret = String.valueOf(ret) + "\nbg tag: " + this.backgroundTag.toString();
        ret = String.valueOf(ret) + "\nallowedHash: " + this.allowedTags + "\n";
        return ret;
    }

    private void writeObject(ObjectOutputStream out) throws IOException {
        out.writeInt(0);
        out.writeObject(this.sgmlPattern);
        out.writeObject(this.lexer);
        out.writeObject(this.backgroundTag);
        out.writeObject(this.allowedTags);
    }

    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
        int version2 = in.readInt();
        this.sgmlPattern = (Pattern)in.readObject();
        this.lexer = (CharSequenceLexer)in.readObject();
        this.backgroundTag = (String)in.readObject();
        this.allowedTags = (Set)in.readObject();
    }
}

