/*
 * Decompiled with CFR 0.152.
 */
package org.openimaj.text.nlp.language;

import gnu.trove.map.hash.TIntDoubleHashMap;
import gnu.trove.map.hash.TIntIntHashMap;
import gnu.trove.procedure.TIntIntProcedure;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.zip.GZIPInputStream;
import javax.annotation.Resource;
import no.uib.cipr.matrix.DenseMatrix;
import no.uib.cipr.matrix.Matrix;
import org.openimaj.io.IOUtils;
import org.openimaj.text.nlp.language.LanguageModel;

@Resource
public class LanguageDetector {
    public static final String LANGUAGE_MODEL_BINARY = "/org/openimaj/text/language/language.model.binary.gz";
    private LanguageModel languageModel;
    TIntDoubleHashMap logFacCache = new TIntDoubleHashMap();

    public LanguageDetector() throws IOException {
        this.loadFromBinary();
    }

    private void loadFromBinary() throws IOException {
        this.languageModel = IOUtils.read(new GZIPInputStream(LanguageDetector.class.getResourceAsStream(LANGUAGE_MODEL_BINARY)), IOUtils.newInstance(LanguageModel.class));
    }

    public WeightedLocale classify(String text) {
        DenseMatrix fv = this.tokenize(text);
        WeightedLocale locale = this.naiveBayesClassify(fv);
        return locale;
    }

    private WeightedLocale naiveBayesClassify(DenseMatrix fv) {
        DenseMatrix nbWorkspace = new DenseMatrix(1, this.languageModel.naiveBayesPTC.numColumns());
        double logFVSum = this.sumLogFactorial(fv);
        fv.mult((Matrix)this.languageModel.naiveBayesPTC, (Matrix)nbWorkspace);
        DenseMatrix pdc = nbWorkspace;
        pdc.add((Matrix)this.languageModel.naiveBayesPC);
        double[] pdData = pdc.getData();
        int bestIndex = -1;
        double best = 0.0;
        double sum = 0.0;
        for (int i = 0; i < pdc.numColumns(); ++i) {
            double correctedScore = pdData[i] - logFVSum;
            sum += correctedScore;
            if (bestIndex != -1 && !(correctedScore > best)) continue;
            bestIndex = i;
            best = correctedScore;
        }
        return new WeightedLocale(this.languageModel.naiveBayesClasses[bestIndex], best / sum);
    }

    private double sumLogFactorial(DenseMatrix fv) {
        double sum = 0.0;
        double[] data = fv.getData();
        for (int i = 0; i < fv.numColumns(); ++i) {
            int fvi = (int)data[i];
            if (this.logFacCache.contains(fvi)) {
                sum += this.logFacCache.get(fvi);
                continue;
            }
            for (int j = 1; j < fvi + 1; ++j) {
                sum += Math.log(j);
            }
        }
        return sum;
    }

    private DenseMatrix tokenize(String text) {
        byte[] ords = text.getBytes(StandardCharsets.UTF_8);
        int state = 0;
        TIntIntHashMap statecount = new TIntIntHashMap();
        for (byte letter : ords) {
            state = this.languageModel.tk_nextmove[(state << 8) + (letter & 0xFF)];
            statecount.adjustOrPutValue(state, 1, 1);
        }
        final double[][] fv = new double[1][this.languageModel.naiveBayesNFeats];
        statecount.forEachEntry(new TIntIntProcedure(){

            public boolean execute(int state, int statecount) {
                int[] indexes = (int[])((LanguageDetector)LanguageDetector.this).languageModel.tk_output.get(state);
                if (indexes == null) {
                    return true;
                }
                for (int i : indexes) {
                    double[] dArray = fv[0];
                    int n = i;
                    dArray[n] = dArray[n] + (double)statecount;
                }
                return true;
            }
        });
        return new DenseMatrix(fv);
    }

    public LanguageModel getLanguageModel() {
        return this.languageModel;
    }

    public static void main(String[] args) throws IOException {
        LanguageDetector lm = new LanguageDetector();
        System.out.println("Available languages: ");
        for (String string : lm.languageModel.naiveBayesClasses) {
            System.out.println(string + ": " + new Locale(string).getDisplayLanguage());
        }
    }

    public static class WeightedLocale {
        public String language;
        public double confidence;

        public WeightedLocale(String language, double best) {
            this.language = language;
            this.confidence = best;
        }

        public String toString() {
            return String.format("%s: %f", this.language.toString(), this.confidence);
        }

        public Locale getLocale() {
            return new Locale(this.language);
        }

        public Map<String, Object> asMap() {
            HashMap<String, Object> map = new HashMap<String, Object>();
            map.put("language", this.language);
            map.put("confidence", this.confidence);
            return map;
        }

        public static WeightedLocale fromMap(Map<String, Object> map) {
            return new WeightedLocale((String)map.get("language"), (Double)map.get("confidence"));
        }
    }
}

