package com.github.dolphineor.filter.simhash;

import com.github.dolphineor.util.Logs;
import com.github.dolphineor.util.RegexLanguageUtils;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
import org.nlpcn.commons.lang.dic.DicManager;
import org.nlpcn.commons.lang.tire.GetWord;
import org.nlpcn.commons.lang.tire.domain.Forest;
import org.nlpcn.commons.lang.tire.library.Library;
import org.nlpcn.commons.lang.util.StringUtil;

/* loaded from: input_file:com/github/dolphineor/filter/simhash/SimHash.class */
public class SimHash extends Logs {
    private static Forest forest;
    private String tokens;
    private BigInteger intSimHash;
    private int hashbits;

    public SimHash(String str) {
        this.hashbits = 64;
        this.tokens = str;
        this.intSimHash = simHash();
    }

    public SimHash(String str, int i) {
        this.hashbits = 64;
        this.tokens = str;
        this.hashbits = i;
        this.intSimHash = simHash();
    }

    public BigInteger simHash() {
        int[] iArr = new int[this.hashbits];
        this.tokens = StringUtil.rmHtmlTag(this.tokens);
        if (!RegexLanguageUtils.INSTANCE.isEnglish(this.tokens)) {
            GetWord getWord = new GetWord(forest, this.tokens);
            while (true) {
                String frontWords = getWord.getFrontWords();
                if (frontWords == null) {
                    break;
                }
                if (frontWords == null || frontWords.length() != 0) {
                    BigInteger hash = hash(frontWords.toLowerCase());
                    for (int i = 0; i < this.hashbits; i++) {
                        if (hash.and(new BigInteger("1").shiftLeft(i)).signum() != 0) {
                            int i2 = i;
                            iArr[i2] = iArr[i2] + 1;
                        } else {
                            int i3 = i;
                            iArr[i3] = iArr[i3] - 1;
                        }
                    }
                }
            }
        } else {
            StringTokenizer stringTokenizer = new StringTokenizer(this.tokens);
            while (stringTokenizer.hasMoreTokens()) {
                BigInteger hash2 = hash(stringTokenizer.nextToken());
                for (int i4 = 0; i4 < this.hashbits; i4++) {
                    if (hash2.and(new BigInteger("1").shiftLeft(i4)).signum() != 0) {
                        int i5 = i4;
                        iArr[i5] = iArr[i5] + 1;
                    } else {
                        int i6 = i4;
                        iArr[i6] = iArr[i6] - 1;
                    }
                }
            }
        }
        BigInteger bigInteger = new BigInteger("0");
        for (int i7 = 0; i7 < this.hashbits; i7++) {
            if (iArr[i7] >= 0) {
                bigInteger = bigInteger.add(new BigInteger("1").shiftLeft(i7));
            }
        }
        return bigInteger;
    }

    private BigInteger hash(String str) {
        if (str == null || str.length() == 0) {
            return new BigInteger("0");
        }
        char[] charArray = str.toCharArray();
        BigInteger valueOf = BigInteger.valueOf(charArray[0] << 7);
        BigInteger bigInteger = new BigInteger("1000003");
        BigInteger subtract = new BigInteger("2").pow(this.hashbits).subtract(new BigInteger("1"));
        for (char c : charArray) {
            valueOf = valueOf.multiply(bigInteger).xor(BigInteger.valueOf(c)).and(subtract);
        }
        BigInteger xor = valueOf.xor(new BigInteger(String.valueOf(str.length())));
        if (xor.equals(new BigInteger("-1"))) {
            xor = new BigInteger("-2");
        }
        return xor;
    }

    public int hammingDistance(SimHash simHash) {
        int i = 0;
        for (BigInteger xor = this.intSimHash.xor(simHash.intSimHash); xor.signum() != 0; xor = xor.and(xor.subtract(new BigInteger("1")))) {
            i++;
        }
        return i;
    }

    public int getDistance(String str, String str2) {
        int i;
        if (str.length() != str2.length()) {
            i = -1;
        } else {
            i = 0;
            for (int i2 = 0; i2 < str.length(); i2++) {
                if (str.charAt(i2) != str2.charAt(i2)) {
                    i++;
                }
            }
        }
        return i;
    }

    public List<BigInteger> subByDistance(SimHash simHash, int i) {
        int i2 = this.hashbits / (i + 1);
        ArrayList arrayList = new ArrayList();
        StringBuffer stringBuffer = new StringBuffer();
        for (int i3 = 0; i3 < this.intSimHash.bitLength(); i3++) {
            if (simHash.intSimHash.testBit(i3)) {
                stringBuffer.append("1");
            } else {
                stringBuffer.append("0");
            }
            if ((i3 + 1) % i2 == 0) {
                BigInteger bigInteger = new BigInteger(stringBuffer.toString(), 2);
                this.logger.info("----{}", bigInteger);
                stringBuffer.delete(0, stringBuffer.length());
                arrayList.add(bigInteger);
            }
        }
        return arrayList;
    }

    static {
        forest = null;
        try {
            forest = Library.makeForest(DicManager.class.getResourceAsStream("/finger.dic"));
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
}
