/*
 * See BHTSne.java for copyright and license information!  
 */
package de.javagl.tsne;
/*
 * Copyright (c) 2009-2014, Peter Abeles. All Rights Reserved.
 *
 * This file is part of Efficient Java Matrix Library (EJML).
 * 
 * Adapted by 2014, Leif Jonsson, added pca method.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.ejml.data.DenseMatrix64F;
import org.ejml.factory.DecompositionFactory;
import org.ejml.interfaces.decomposition.SingularValueDecomposition;
import org.ejml.ops.CommonOps;
import org.ejml.ops.SingularOps;

/**
 * <p>
 * The following is a simple example of how to perform basic principal component
 * analysis in EJML.
 * </p>
 *
 * <p>
 * Principal Component Analysis (PCA) is typically used to develop a linear
 * model for a set of data (e.g. face images) which can then be used to test for
 * membership. PCA works by converting the set of data to a new basis that is a
 * subspace of the original set. The subspace is selected to maximize
 * information.
 * </p>
 * <p>
 * PCA is typically derived as an eigenvalue problem. However in this
 * implementation
 * {@link org.ejml.interfaces.decomposition.SingularValueDecomposition SVD} is
 * used instead because it will produce a more numerically stable solution.
 * Computation using EVD requires explicitly computing the variance of each
 * sample set. The variance is computed by squaring the residual, which can
 * cause loss of precision.
 * </p>
 *
 * <p>
 * Usage:<br>
 * 1) call setup()<br>
 * 2) For each sample (e.g. an image ) call addSample()<br>
 * 3) After all the samples have been added call computeBasis()<br>
 * 4) Call sampleToEigenSpace() , eigenToSampleSpace() , errorMembership() ,
 * response()
 * </p>
 *
 * @author Peter Abeles
 */
class PrincipalComponentAnalysis
{

    // principal component subspace is stored in the rows
    private DenseMatrix64F V_t;

    // how many principal components are used
    private int numComponents;

    // where the data is stored
    private DenseMatrix64F A = new DenseMatrix64F(1, 1);
    private int sampleIndex;

    // mean values of each element across all the samples

    private double mean[];

    public PrincipalComponentAnalysis()
    {
    }

    /**
     * Must be called before any other functions. Declares and sets up internal
     * data structures.
     *
     * @param numSamples Number of samples that will be processed.
     * @param sampleSize Number of elements in each sample.
     */
    private void setup(int numSamples, int sampleSize)
    {
        mean = new double[sampleSize];
        A.reshape(numSamples, sampleSize, false);
        sampleIndex = 0;
        numComponents = -1;
    }

    /**
     * Adds a new sample of the raw data to internal data structure for later
     * processing. All the samples must be added before computeBasis is called.
     *
     * @param sampleData Sample from original raw data.
     */
    private void addSample(double[] sampleData)
    {
        if (A.getNumCols() != sampleData.length)
            throw new IllegalArgumentException("Unexpected sample size");
        if (sampleIndex >= A.getNumRows())
            throw new IllegalArgumentException("Too many samples");

        for (int i = 0; i < sampleData.length; i++)
        {
            A.set(sampleIndex, i, sampleData[i]);
        }
        sampleIndex++;
    }

    /**
     * Computes a basis (the principal components) from the most dominant
     * eigenvectors.
     *
     * @param numComponents Number of vectors it will use to describe the data.
     *        Typically much smaller than the number of elements in the input
     *        vector.
     */
    private void computeBasis(int numComponents)
    {
        if (numComponents > A.getNumCols())
            throw new IllegalArgumentException(
                "More components requested that the data's length.");
        if (sampleIndex != A.getNumRows())
            throw new IllegalArgumentException(
                "Not all the data has been added");
        if (numComponents > sampleIndex)
            throw new IllegalArgumentException(
                "More data needed to compute the desired number of components");

        this.numComponents = numComponents;

        // compute the mean of all the samples
        for (int i = 0; i < A.getNumRows(); i++)
        {
            for (int j = 0; j < mean.length; j++)
            {
                mean[j] += A.get(i, j);
            }
        }
        for (int j = 0; j < mean.length; j++)
        {
            mean[j] /= A.getNumRows();
        }

        // subtract the mean from the original data
        for (int i = 0; i < A.getNumRows(); i++)
        {
            for (int j = 0; j < mean.length; j++)
            {
                A.set(i, j, A.get(i, j) - mean[j]);
            }
        }

        // Compute SVD and save time by not computing U
        SingularValueDecomposition<DenseMatrix64F> svd =
            DecompositionFactory.svd(A.numRows, A.numCols, false, true, false);
        if (!svd.decompose(A))
            throw new RuntimeException("SVD failed");

        V_t = svd.getV(null, true);
        DenseMatrix64F W = svd.getW(null);

        // Singular values are in an arbitrary order initially
        SingularOps.descendingOrder(null, false, W, V_t, true);

        // strip off unneeded components and find the basis
        V_t.reshape(numComponents, mean.length, true);
    }

    /**
     * Converts a vector from sample space into eigen space.
     *
     * @param sampleData Sample space data.
     * @return Eigen space projection.
     */
    private double[] sampleToEigenSpace(double[] sampleData)
    {
        if (sampleData.length != A.getNumCols())
            throw new IllegalArgumentException("Unexpected sample length");
        DenseMatrix64F mean = DenseMatrix64F.wrap(A.getNumCols(), 1, this.mean);

        DenseMatrix64F s =
            new DenseMatrix64F(A.getNumCols(), 1, true, sampleData);
        DenseMatrix64F r = new DenseMatrix64F(numComponents, 1);

        CommonOps.subtract(s, mean, s);

        CommonOps.mult(V_t, s, r);

        return r.data;
    }

    double[][] pca(double[][] matrix, int no_dims)
    {
        double[][] trafoed = new double[matrix.length][matrix[0].length];
        setup(matrix.length, matrix[0].length);
        for (int i = 0; i < matrix.length; i++)
        {
            addSample(matrix[i]);
        }
        computeBasis(no_dims);
        for (int i = 0; i < matrix.length; i++)
        {
            trafoed[i] = sampleToEigenSpace(matrix[i]);
            for (int j = 0; j < trafoed[i].length; j++)
            {
                trafoed[i][j] *= -1;
            }
        }
        return trafoed;
    }
}
