/*
 * Decompiled with CFR 0.152.
 */
package cc.mallet.topics;

import cc.mallet.topics.WordEmbeddingRunnable;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureSequence;
import cc.mallet.types.IDSorter;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.util.CommandOption;
import cc.mallet.util.Randoms;
import java.io.File;
import java.io.PrintWriter;
import java.util.Arrays;
import java.util.Formatter;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

public class WordEmbeddings {
    static CommandOption.String inputFile = new CommandOption.String(WordEmbeddings.class, "input", "FILENAME", true, null, "The filename from which to read the list of training instances.  Use - for stdin.  The instances must be FeatureSequence or FeatureSequenceWithBigrams, not FeatureVector", null);
    static CommandOption.String outputFile = new CommandOption.String(WordEmbeddings.class, "output", "FILENAME", true, "weights.txt", "The filename to write text-formatted word vectors.", null);
    static CommandOption.Integer numDimensions = new CommandOption.Integer(WordEmbeddings.class, "num-dimensions", "INTEGER", true, 50, "The number of dimensions to fit.", null);
    static CommandOption.Integer windowSizeOption = new CommandOption.Integer(WordEmbeddings.class, "window-size", "INTEGER", true, 5, "The number of adjacent words to consider.", null);
    static CommandOption.Integer numThreads = new CommandOption.Integer(WordEmbeddings.class, "num-threads", "INTEGER", true, 1, "The number of threads for parallel training.", null);
    static CommandOption.Integer numSamples = new CommandOption.Integer(WordEmbeddings.class, "num-samples", "INTEGER", true, 5, "The number of negative samples to use in training.", null);
    static CommandOption.String exampleWord = new CommandOption.String(WordEmbeddings.class, "example-word", "STRING", true, null, "If defined, periodically show the closest vectors to this word.", null);
    Alphabet vocabulary;
    int numWords;
    int numColumns;
    double[] weights;
    double[] squaredGradientSums;
    int stride;
    int[] wordCounts;
    double[] samplingDistribution;
    int[] samplingTable;
    int samplingTableSize = 100000000;
    double samplingSum = 0.0;
    int totalWords = 0;
    double maxExpValue = 6.0;
    double minExpValue = -6.0;
    double[] sigmoidCache;
    int sigmoidCacheSize = 1000;
    int windowSize = 5;
    String queryWord = "the";
    Randoms random = new Randoms();

    public WordEmbeddings() {
    }

    public WordEmbeddings(Alphabet a, int numColumns, int windowSize) {
        this.vocabulary = a;
        this.numWords = this.vocabulary.size();
        System.out.format("Vocab size: %d\n", this.numWords);
        this.numColumns = numColumns;
        this.stride = 2 * numColumns;
        this.weights = new double[this.numWords * this.stride];
        this.squaredGradientSums = new double[this.numWords * this.stride];
        int word = 0;
        while (word < this.numWords) {
            int col = 0;
            while (col < 2 * numColumns) {
                this.weights[word * this.stride + col] = (this.random.nextDouble() - 0.5) / (double)numColumns;
                ++col;
            }
            ++word;
        }
        this.wordCounts = new int[this.numWords];
        this.samplingDistribution = new double[this.numWords];
        this.samplingTable = new int[this.samplingTableSize];
        this.windowSize = windowSize;
        this.sigmoidCache = new double[this.sigmoidCacheSize + 1];
        int i = 0;
        while (i < this.sigmoidCacheSize) {
            double value = (double)i / (double)this.sigmoidCacheSize * (this.maxExpValue - this.minExpValue) + this.minExpValue;
            this.sigmoidCache[i] = 1.0 / (1.0 + Math.exp(-value));
            ++i;
        }
    }

    /*
     * Unable to fully structure code
     */
    public void countWords(InstanceList instances) {
        for (Instance instance : instances) {
            tokens = (FeatureSequence)instance.getData();
            length = tokens.getLength();
            position = 0;
            while (position < length) {
                v0 = type = tokens.getIndexAtPosition(position);
                this.wordCounts[v0] = this.wordCounts[v0] + 1;
                ++position;
            }
            this.totalWords += length;
        }
        normalizer = 1.0f / (float)this.totalWords;
        this.samplingDistribution[0] = Math.pow(normalizer * (double)this.wordCounts[0], 0.75);
        word = 1;
        while (word < this.numWords) {
            this.samplingDistribution[word] = this.samplingDistribution[word - 1] + Math.pow(normalizer * (double)this.wordCounts[word], 0.75);
            ++word;
        }
        this.samplingSum = this.samplingDistribution[this.numWords - 1];
        word = 0;
        i = 0;
        ** GOTO lbl28
        {
            ++word;
            do {
                if (this.samplingSum * (double)i / (double)this.samplingTableSize > this.samplingDistribution[word]) continue block3;
                this.samplingTable[i] = word;
                ++i;
lbl28:
                // 2 sources

            } while (i < this.samplingTableSize);
        }
        System.out.println("done counting");
    }

    public void train(InstanceList instances, int numThreads, int numSamples) {
        ExecutorService executor = Executors.newFixedThreadPool(numThreads);
        WordEmbeddingRunnable[] runnables = new WordEmbeddingRunnable[numThreads];
        int thread = 0;
        while (thread < numThreads) {
            runnables[thread] = new WordEmbeddingRunnable(this, instances, numSamples, numThreads, thread);
            executor.submit(runnables[thread]);
            ++thread;
        }
        long startTime = System.currentTimeMillis();
        double difference = 0.0;
        boolean finished = false;
        while (!finished) {
            try {
                Thread.sleep(5000L);
            }
            catch (InterruptedException interruptedException) {
                // empty catch block
            }
            int wordsSoFar = 0;
            int thread2 = 0;
            while (thread2 < numThreads) {
                wordsSoFar += runnables[thread2].wordsSoFar;
                System.out.format("%.3f ", runnables[thread2].getMeanError());
                ++thread2;
            }
            long runningMillis = System.currentTimeMillis() - startTime;
            System.out.format("%d\t%d\t%fk w/s %f loss %f avg\n", wordsSoFar, runningMillis, (double)wordsSoFar / (double)runningMillis, difference / 10000.0, this.averageAbsWeight());
            difference = 0.0;
            if (wordsSoFar > 5 * this.totalWords) {
                finished = true;
                int thread3 = 0;
                while (thread3 < numThreads) {
                    runnables[thread3].shouldRun = false;
                    ++thread3;
                }
            }
            if (this.queryWord == null) continue;
            this.findClosest(this.copy(this.queryWord));
        }
        executor.shutdownNow();
    }

    public void findClosest(double[] targetVector) {
        Object[] sortedWords = new IDSorter[this.numWords];
        double targetSquaredSum = 0.0;
        int col = 0;
        while (col < this.numColumns) {
            targetSquaredSum += targetVector[col] * targetVector[col];
            ++col;
        }
        double targetNormalizer = 1.0 / Math.sqrt(targetSquaredSum);
        System.out.println(targetSquaredSum);
        int word = 0;
        while (word < this.numWords) {
            double innerProduct = 0.0;
            double wordSquaredSum = 0.0;
            int col2 = 0;
            while (col2 < this.numColumns) {
                wordSquaredSum += this.weights[word * this.stride + col2] * this.weights[word * this.stride + col2];
                ++col2;
            }
            double wordNormalizer = 1.0 / Math.sqrt(wordSquaredSum);
            int col3 = 0;
            while (col3 < this.numColumns) {
                innerProduct += targetNormalizer * targetVector[col3] * wordNormalizer * this.weights[word * this.stride + col3];
                ++col3;
            }
            sortedWords[word] = new IDSorter(word, innerProduct);
            ++word;
        }
        Arrays.sort(sortedWords);
        int i = 0;
        while (i < 10) {
            System.out.format("%f\t%d\t%s\n", ((IDSorter)sortedWords[i]).getWeight(), ((IDSorter)sortedWords[i]).getID(), this.vocabulary.lookupObject(((IDSorter)sortedWords[i]).getID()));
            ++i;
        }
    }

    public double averageAbsWeight() {
        double sum = 0.0;
        int word = 0;
        while (word < this.numWords) {
            int col = 0;
            while (col < this.numColumns) {
                sum += Math.abs(this.weights[word * this.stride + col]);
                ++col;
            }
            ++word;
        }
        return sum / (double)(this.numWords * this.numColumns);
    }

    public void write(PrintWriter out) {
        int word = 0;
        while (word < this.numWords) {
            Formatter buffer = new Formatter();
            buffer.format("%s", this.vocabulary.lookupObject(word));
            int col = 0;
            while (col < this.numColumns) {
                buffer.format(" %.6f", this.weights[word * this.stride + col]);
                ++col;
            }
            out.println(buffer);
            ++word;
        }
    }

    public double[] copy(String word) {
        return this.copy(this.vocabulary.lookupIndex(word));
    }

    public double[] copy(int word) {
        double[] result = new double[this.numColumns];
        int col = 0;
        while (col < this.numColumns) {
            result[col] = this.weights[word * this.stride + col];
            ++col;
        }
        return result;
    }

    public double[] add(double[] result, String word) {
        return this.add(result, this.vocabulary.lookupIndex(word));
    }

    public double[] add(double[] result, int word) {
        int col = 0;
        while (col < this.numColumns) {
            int n = col;
            result[n] = result[n] + this.weights[word * this.stride + col];
            ++col;
        }
        return result;
    }

    public double[] subtract(double[] result, String word) {
        return this.subtract(result, this.vocabulary.lookupIndex(word));
    }

    public double[] subtract(double[] result, int word) {
        int col = 0;
        while (col < this.numColumns) {
            int n = col;
            result[n] = result[n] - this.weights[word * this.stride + col];
            ++col;
        }
        return result;
    }

    public static void main(String[] args) throws Exception {
        CommandOption.setSummary(WordEmbeddings.class, "Train continuous word embeddings using the skip-gram method with negative sampling.");
        CommandOption.process(WordEmbeddings.class, args);
        InstanceList instances = InstanceList.load(new File(WordEmbeddings.inputFile.value));
        WordEmbeddings matrix = new WordEmbeddings(instances.getDataAlphabet(), WordEmbeddings.numDimensions.value, WordEmbeddings.windowSizeOption.value);
        matrix.queryWord = WordEmbeddings.exampleWord.value;
        matrix.countWords(instances);
        matrix.train(instances, WordEmbeddings.numThreads.value, WordEmbeddings.numSamples.value);
        PrintWriter out = new PrintWriter(WordEmbeddings.outputFile.value);
        matrix.write(out);
        out.close();
    }
}

