/*
 * Decompiled with CFR 0.152.
 */
package com.aliasi.tokenizer;

import com.aliasi.tokenizer.ModifiedTokenizerFactory;
import com.aliasi.tokenizer.ModifyTokenTokenizerFactory;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.Strings;
import java.io.ObjectInput;
import java.io.Serializable;

public class SoundexTokenizerFactory
extends ModifyTokenTokenizerFactory
implements Serializable {
    static final long serialVersionUID = -7062805184862100578L;
    static char NON_CHAR_CODE = (char)255;
    static final char[] INITIAL_CODES = new char[256];
    static final char[] CODES = new char[256];
    static final boolean[] VOWELS;

    static {
        int i = 0;
        while (i < 256) {
            char c = (char)i;
            if (!Character.isLetter(c)) {
                SoundexTokenizerFactory.INITIAL_CODES[i] = NON_CHAR_CODE;
                SoundexTokenizerFactory.CODES[i] = NON_CHAR_CODE;
            } else {
                SoundexTokenizerFactory.INITIAL_CODES[i] = Character.toUpperCase(Strings.deAccentLatin1(c));
                SoundexTokenizerFactory.CODES[i] = SoundexTokenizerFactory.soundexCode(INITIAL_CODES[i]);
            }
            ++i;
        }
        VOWELS = new boolean[256];
        i = 0;
        while (i < 256) {
            char initCode = INITIAL_CODES[i];
            SoundexTokenizerFactory.VOWELS[i] = initCode == 'A' || initCode == 'E' || initCode == 'I' || initCode == 'O' || initCode == 'U';
            ++i;
        }
    }

    public SoundexTokenizerFactory(TokenizerFactory factory) {
        super(factory);
    }

    @Override
    public String modifyToken(String token) {
        return SoundexTokenizerFactory.soundexEncoding(token);
    }

    @Override
    public String toString() {
        return String.valueOf(this.getClass().toString()) + "\n  base factory=" + this.baseTokenizerFactory().toString().replace("\n", "\n    ");
    }

    Object writeReplace() {
        return new Serializer(this);
    }

    public static String soundexEncoding(String token) {
        int pos = 0;
        while (pos < token.length()) {
            char c = token.charAt(pos);
            if (c < '\u0100' && INITIAL_CODES[c] != NON_CHAR_CODE) break;
            ++pos;
        }
        if (pos == token.length()) {
            return "0000";
        }
        int csPos = 1;
        char[] cs = new char[4];
        cs[0] = INITIAL_CODES[token.charAt(pos)];
        char lastCode = CODES[token.charAt(pos)];
        ++pos;
        while (csPos < 4 && pos < token.length()) {
            char c = token.charAt(pos);
            ++pos;
            if (c > '\u00ff') continue;
            char code = CODES[c];
            if (code == NON_CHAR_CODE) {
                if (!VOWELS[c]) continue;
                lastCode = '7';
                continue;
            }
            if (code == lastCode) continue;
            cs[csPos] = code;
            lastCode = code;
            ++csPos;
        }
        while (csPos < 4) {
            cs[csPos] = 48;
            ++csPos;
        }
        return new String(cs);
    }

    static char soundexCode(char upperCaseLetter) {
        switch (upperCaseLetter) {
            case 'B': {
                return '1';
            }
            case 'F': {
                return '1';
            }
            case 'P': {
                return '1';
            }
            case 'V': {
                return '1';
            }
            case 'C': {
                return '2';
            }
            case 'G': {
                return '2';
            }
            case 'J': {
                return '2';
            }
            case 'K': {
                return '2';
            }
            case 'Q': {
                return '2';
            }
            case 'S': {
                return '2';
            }
            case 'X': {
                return '2';
            }
            case 'Z': {
                return '2';
            }
            case 'D': {
                return '3';
            }
            case 'T': {
                return '3';
            }
            case 'L': {
                return '4';
            }
            case 'M': {
                return '5';
            }
            case 'N': {
                return '5';
            }
            case 'R': {
                return '6';
            }
        }
        return NON_CHAR_CODE;
    }

    static class Serializer
    extends ModifiedTokenizerFactory.AbstractSerializer<SoundexTokenizerFactory> {
        static final long serialVersionUID = 2496844521092643488L;

        public Serializer(SoundexTokenizerFactory factory) {
            super(factory);
        }

        public Serializer() {
            this(null);
        }

        @Override
        public Object read(ObjectInput in, TokenizerFactory baseFactory) {
            return new SoundexTokenizerFactory(baseFactory);
        }
    }
}

