/*******************************************************************************
 * Copyright (c) 2010-2020 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with Smile.  If not, see <https://www.gnu.org/licenses/>.
 ******************************************************************************/

package smile.manifold

import smile.math.distance.Distance

/**
 * Isometric feature mapping. Isomap is a widely used low-dimensional embedding methods,
 * where geodesic distances on a weighted graph are incorporated with the
 * classical multidimensional scaling. Isomap is used for computing a
 * quasi-isometric, low-dimensional embedding of a set of high-dimensional
 * data points. Isomap is highly efficient and generally applicable to a broad
 * range of data sources and dimensionalities.
 *
 * To be specific, the classical MDS performs low-dimensional embedding based
 * on the pairwise distance between data points, which is generally measured
 * using straight-line Euclidean distance. Isomap is distinguished by
 * its use of the geodesic distance induced by a neighborhood graph
 * embedded in the classical scaling. This is done to incorporate manifold
 * structure in the resulting embedding. Isomap defines the geodesic distance
 * to be the sum of edge weights along the shortest path between two nodes.
 * The top n eigenvectors of the geodesic distance matrix, represent the
 * coordinates in the new n-dimensional Euclidean space.
 *
 * The connectivity of each data point in the neighborhood graph is defined
 * as its nearest k Euclidean neighbors in the high-dimensional space. This
 * step is vulnerable to "short-circuit errors" if k is too large with
 * respect to the manifold structure or if noise in the data moves the
 * points slightly off the manifold. Even a single short-circuit error
 * can alter many entries in the geodesic distance matrix, which in turn
 * can lead to a drastically different (and incorrect) low-dimensional
 * embedding. Conversely, if k is too small, the neighborhood graph may
 * become too sparse to approximate geodesic paths accurately.
 *
 * This class implements C-Isomap that involves magnifying the regions
 * of high density and shrink the regions of low density of data points
 * in the manifold. Edge weights that are maximized in Multi-Dimensional
 * Scaling(MDS) are modified, with everything else remaining unaffected.
 *
 * ====References:====
 *  - J. B. Tenenbaum, V. de Silva and J. C. Langford  A Global Geometric Framework for Nonlinear Dimensionality Reduction. Science 290(5500):2319-2323, 2000.
 *
 * @param data the data set.
 * @param d the dimension of the manifold.
 * @param k k-nearest neighbor.
 * @param CIsomap C-Isomap algorithm if true, otherwise standard algorithm.
 */
fun isomap(data: Array<DoubleArray>, k: Int, d: Int = 2, CIsomap: Boolean = true): IsoMap {
    return IsoMap.of(data, k, d, CIsomap)
}

/**
 * Locally Linear Embedding. It has several advantages over Isomap, including
 * faster optimization when implemented to take advantage of sparse matrix
 * algorithms, and better results with many problems. LLE also begins by
 * finding a set of the nearest neighbors of each point. It then computes
 * a set of weights for each point that best describe the point as a linear
 * combination of its neighbors. Finally, it uses an eigenvector-based
 * optimization technique to find the low-dimensional embedding of points,
 * such that each point is still described with the same linear combination
 * of its neighbors. LLE tends to handle non-uniform sample densities poorly
 * because there is no fixed unit to prevent the weights from drifting as
 * various regions differ in sample densities.
 *
 * ====References:====
 *  - Sam T. Roweis and Lawrence K. Saul. Nonlinear Dimensionality Reduction by Locally Linear Embedding. Science 290(5500):2323-2326, 2000.
 *
 * @param data the data set.
 * @param d the dimension of the manifold.
 * @param k k-nearest neighbor.
 */
fun lle(data: Array<DoubleArray>, k: Int, d: Int = 2): LLE {
    return LLE.of(data, k, d)
}

/**
 * Laplacian Eigenmap. Using the notion of the Laplacian of the nearest
 * neighbor adjacency graph, Laplacian Eigenmap compute a low dimensional
 * representation of the dataset that optimally preserves local neighborhood
 * information in a certain sense. The representation map generated by the
 * algorithm may be viewed as a discrete approximation to a continuous map
 * that naturally arises from the geometry of the manifold.
 *
 * The locality preserving character of the Laplacian Eigenmap algorithm makes
 * it relatively insensitive to outliers and noise. It is also not prone to
 * "short circuiting" as only the local distances are used.
 *
 * ====References:====
 *  - Mikhail Belkin and Partha Niyogi. Laplacian Eigenmaps and Spectral Techniques for Embedding and Clustering. NIPS, 2001.
 *
 * @param data the data set.
 * @param d the dimension of the manifold.
 * @param k k-nearest neighbor.
 * @param t the smooth/width parameter of heat kernel e<sup>-||x-y||<sup>2</sup> / t</sup>.
 *          Non-positive value means discrete weights.
 */
fun laplacian(data: Array<DoubleArray>, k: Int, d: Int = 2, t: Double = -1.0): LaplacianEigenmap {
    return LaplacianEigenmap.of(data, k, d, t)
}

/**
 * t-distributed stochastic neighbor embedding. t-SNE is a nonlinear
 * dimensionality reduction technique that is particularly well suited
 * for embedding high-dimensional data into a space of two or three
 * dimensions, which can then be visualized in a scatter plot. Specifically,
 * it models each high-dimensional object by a two- or three-dimensional
 * point in such a way that similar objects are modeled by nearby points
 * and dissimilar objects are modeled by distant points.
 *
 * ====References:====
 *  - L.J.P. van der Maaten. Accelerating t-SNE using Tree-Based Algorithms. Journal of Machine Learning Research 15(Oct):3221-3245, 2014.
 *  - L.J.P. van der Maaten and G.E. Hinton. Visualizing Non-Metric Similarities in Multiple Maps. Machine Learning 87(1):33-55, 2012.
 *  - L.J.P. van der Maaten. Learning a Parametric Embedding by Preserving Local Structure. In Proceedings of the Twelfth International Conference on Artificial Intelligence & Statistics (AI-STATS), JMLR W&CP 5:384-391, 2009.
 *  - L.J.P. van der Maaten and G.E. Hinton. Visualizing High-Dimensional Data Using t-SNE. Journal of Machine Learning Research 9(Nov):2579-2605, 2008.
 *
 * @param X input data. If X is a square matrix, it is assumed to be the squared distance/dissimilarity matrix.
 * @param d the dimension of the manifold.
 * @param perplexity the perplexity of the conditional distribution.
 * @param eta        the learning rate.
 * @param iterations the number of iterations.
 */
fun tsne(X: Array<DoubleArray>, d: Int = 2, perplexity: Double = 20.0, eta: Double = 200.0, iterations: Int = 1000): TSNE {
    return TSNE(X, d, perplexity, eta, iterations)
}

/**
 * Uniform Manifold Approximation and Projection.
 *
 * UMAP is a dimension reduction technique that can be used for visualization
 * similarly to t-SNE, but also for general non-linear dimension reduction.
 * The algorithm is founded on three assumptions about the data:
 *
 *  - The data is uniformly distributed on a Riemannian manifold;
 *  - The Riemannian metric is locally constant (or can be approximated as such);
 *  - The manifold is locally connected.
 *
 * From these assumptions it is possible to model the manifold with a fuzzy
 * topological structure. The embedding is found by searching for a low
 * dimensional projection of the data that has the closest possible equivalent
 * fuzzy topological structure.
 *
 * @param data               the input data.
 * @param k                  k-nearest neighbors. Larger values result in more global views
 *                           of the manifold, while smaller values result in more local data
 *                           being preserved. Generally in the range 2 to 100.
 * @param d                  The target embedding dimensions. defaults to 2 to provide easy
 *                           visualization, but can reasonably be set to any integer value
 *                           in the range 2 to 100.
 * @param iterations         The number of iterations to optimize the
 *                           low-dimensional representation. Larger values result in more
 *                           accurate embedding. Muse be at least 10. Choose wise value
 *                           based on the size of the input data, e.g, 200 for large
 *                           data (1000+ samples), 500 for small.
 * @param learningRate       The initial learning rate for the embedding optimization,
 *                           default 1.
 * @param minDist            The desired separation between close points in the embedding
 *                           space. Smaller values will result in a more clustered/clumped
 *                           embedding where nearby points on the manifold are drawn closer
 *                           together, while larger values will result on a more even
 *                           disperse of points. The value should be set no-greater than
 *                           and relative to the spread value, which determines the scale
 *                           at which embedded points will be spread out. default 0.1.
 * @param spread             The effective scale of embedded points. In combination with
 *                           minDist, this determines how clustered/clumped the embedded
 *                           points are. default 1.0.
 * @param negativeSamples    The number of negative samples to select per positive sample
 *                           in the optimization process. Increasing this value will result
 *                           in greater repulsive force being applied, greater optimization
 *                           cost, but slightly more accuracy, default 5.
 * @param repulsionStrength  Weighting applied to negative samples in low dimensional
 *                           embedding optimization. Values higher than one will result in
 *                           greater weight being given to negative samples, default 1.0.
 */
fun umap(data: Array<DoubleArray>, k: Int = 15, d: Int = 2, iterations: Int = 0,
         learningRate: Double = 1.0, minDist: Double = 0.1, spread: Double = 1.0, negativeSamples: Int = 5,
         repulsionStrength: Double = 1.0): UMAP {
    return UMAP.of(data, k, d, if (iterations >= 10) iterations else if (data.size > 10000) 200 else 500,
            learningRate, minDist, spread, negativeSamples, repulsionStrength)
}

/**
 * Uniform Manifold Approximation and Projection.
 *
 * UMAP is a dimension reduction technique that can be used for visualization
 * similarly to t-SNE, but also for general non-linear dimension reduction.
 * The algorithm is founded on three assumptions about the data:
 *
 *  - The data is uniformly distributed on a Riemannian manifold;
 *  - The Riemannian metric is locally constant (or can be approximated as such);
 *  - The manifold is locally connected.
 *
 * From these assumptions it is possible to model the manifold with a fuzzy
 * topological structure. The embedding is found by searching for a low
 * dimensional projection of the data that has the closest possible equivalent
 * fuzzy topological structure.
 *
 * @param data               the input data.
 * @param distance           the distance measure.
 * @param k                  k-nearest neighbors. Larger values result in more global views
 *                           of the manifold, while smaller values result in more local data
 *                           being preserved. Generally in the range 2 to 100.
 * @param d                  The target embedding dimensions. defaults to 2 to provide easy
 *                           visualization, but can reasonably be set to any integer value
 *                           in the range 2 to 100.
 * @param iterations         The number of iterations to optimize the
 *                           low-dimensional representation. Larger values result in more
 *                           accurate embedding. Muse be at least 10. Choose wise value
 *                           based on the size of the input data, e.g, 200 for large
 *                           data (1000+ samples), 500 for small.
 * @param learningRate       The initial learning rate for the embedding optimization,
 *                           default 1.
 * @param minDist            The desired separation between close points in the embedding
 *                           space. Smaller values will result in a more clustered/clumped
 *                           embedding where nearby points on the manifold are drawn closer
 *                           together, while larger values will result on a more even
 *                           disperse of points. The value should be set no-greater than
 *                           and relative to the spread value, which determines the scale
 *                           at which embedded points will be spread out. default 0.1.
 * @param spread             The effective scale of embedded points. In combination with
 *                           minDist, this determines how clustered/clumped the embedded
 *                           points are. default 1.0.
 * @param negativeSamples    The number of negative samples to select per positive sample
 *                           in the optimization process. Increasing this value will result
 *                           in greater repulsive force being applied, greater optimization
 *                           cost, but slightly more accuracy, default 5.
 * @param repulsionStrength  Weighting applied to negative samples in low dimensional
 *                           embedding optimization. Values higher than one will result in
 *                           greater weight being given to negative samples, default 1.0.
 */
fun <T> umap(data: Array<T>, distance: Distance<T>, k: Int = 15, d: Int = 2, iterations: Int = 0,
             learningRate: Double = 1.0, minDist: Double = 0.1, spread: Double = 1.0, negativeSamples: Int = 5,
             repulsionStrength: Double = 1.0): UMAP {
    return UMAP.of(data, distance, k, d, if (iterations >= 10) iterations else if (data.size > 10000) 200 else 500,
            learningRate, minDist, spread, negativeSamples, repulsionStrength)
}
