//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller
 *  @version 1.1
 *  @date    Mon Mar 25 14:58:13 EDT 2013
 *  @see     LICENSE (MIT style license file).
 *  @see     http://www.predict.ws/H_principle/SvanteHarald.htm
 *  @see     http://folk.uio.no/henninri/pca_module/pca_nipals.pdf
 */

package scalation.analytics

import util.control.Breaks.{breakable, break}

import scalation.linalgebra.{Eigenvalue, Eigenvector}
import scalation.linalgebra.{MatrixD, VectorD}
import scalation.linalgebra.MatrixD.outer
import scalation.util.Error

//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `PrincipalComponents` class performs the Principal Component Analysis (PCA)
 *  on data matrix 'x'.  It can be used to reduce the dimensionality of the data.
 *  First find the PCs by calling 'findPCs' and then call 'reduce' to reduce
 *  the data (i.e., reduce matrix 'x' to a lower dimensionality matrix).
 *  @param x  the data matrix to reduce, stored column-wise
 */
class PrincipalComponents (x: MatrixD)
      extends Reducer with Error
{
    if (! x.isRectangular) flaw ("constructor", "x matrix must be rectangular")

    private val MAX_ITER = 200                               // the maximum number of iterations
    private val EPSILON  = 1E-8                              // a value close to zero
    private val M        = x.dim1                            // the number of data points (rows)
    private val MR       = M.toDouble                        // M as a real number
    private val N        = x.dim2                            // the number of variables (columns)
    private val mu       = meanCenter ()                     // the mean vector
    private val cov      = computeCov ()                     // the covariance matrix
    private val eigenVal = (new Eigenvalue (cov)).getE       // the eigenvalues of cov
    private val eigenVec = computeEigenVectors (eigenVal)    // the eigenvectors for cov

    private var featureMat: MatrixD = null                   // the feature matrix
    private var reducedMat: MatrixD = null                   // the reduced matrix

    println ("mu = " + mu)
    println ("zero-meaned x = " + x)
    println ("cov = " + cov)
    println ("eigenVal = " + eigenVal)
    println ("eigenVec = " + eigenVec)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Center the data about the means (i.e., subtract the means) and return
     *  the mean vector (i.e., the mean for each varaibale/dimension).
     */
    def meanCenter (): VectorD =
    {
        val mu  = new VectorD (N)                              // the mean vector
        for (j <- 0 until N) mu(j) = x.col(j).sum / MR         // compute means
        for (i <- 0 until M; j <- 0 until N) x(i, j) -= mu(j)  // subtract the means
        mu
    } // meanCenter

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Assuming mean centered data, compute the covariance matrix.
     */
    def computeCov (): MatrixD =
    {
        val cov = new MatrixD (N, N)                           // the covariance matrix
        for (i <- 0 until N; j <- i until N) {
            cov(i, j) = (x.col(i) dot x.col(j)) / (MR - 1.0)
            if (i != j) cov(j, i) = cov(i, j)
        } // for
        cov
    } // computeCov

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the unit eigenvectors for the covariance matrix.
     *  @param eVal  the vector of eigenvalues for the covariance matrrix
     */
    def computeEigenVectors (eVal: VectorD): MatrixD =
    {
        val eigenVec = (new Eigenvector (cov, eigenVal)).getV                 // the eigenvectors for cov
        for (j <- 0 until N) eigenVec.setCol (j, eigenVec.col(j).normalizeU)  // want unit vectors
        eigenVec
    } // computeEigenVectors

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Find the Principal Components/Features, the eigenvectors with the k
     *  highest eigenvalues.
     *  @param k  the number of Principal Components (PCs) to find
     */
    def findPCs (k: Int): MatrixD =
    {
        featureMat = eigenVec(0 until N, 0 until k)     // store in feature matrix
        featureMat
    } // findPCs

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Multiply the zero mean data matrix by the feature matrix to reduce
     *  dimensionality.
     */
    def reduce (): MatrixD =
    {
        reducedMat = x * featureMat                     // store in reduced matrix
        reducedMat
    } // reduce

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Approximately recover the orginal data by multiplying the reduced matrix
     *  by the inverse (via transpose) of the feature matrix and then adding back
     *  the means.
     */
    def recover (): MatrixD = reducedMat * featureMat.t + mu

} // PrincipalComponents class


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `PrincipalComponentsTest` object is used to test the `PrincipalComponents` class.
 *  @see http://www.ce.yildiz.edu.tr/personal/songul/file/1097/principal_components.pdf
 */
object PrincipalComponentsTest extends App
{
    val x = new MatrixD ((10, 2), 2.5, 2.4,         // the data matrix
                                  0.5, 0.7,
                                  2.2, 2.9,
                                  1.9, 2.2,
                                  3.1, 3.0,
                                  2.3, 2.7,
                                  2.0, 1.6,
                                  1.0, 1.1,
                                  1.5, 1.6,
                                  1.1, 0.9)

    val pca = new PrincipalComponents (x)           // perform Principal Component Analysis (PCA)
    println ("pc         = " + pca.findPCs (1))     // find 1 Principal Components
    println ("reduced  x = " + pca.reduce)          // the reduce data (lower dimensionality)
    println ("recoverd x = " + pca.recover)         // the approximately recovered data

} // PrincipalComponentsTest


//------------------------------------------------------------------------------

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Solve for the i-th Principal Component PC_i return the score and loading
     *  vectors t and p.
     *  @param i  the i-th Principal Component PC_i
     *
    def solve (i: Int): Tuple2 [VectorD, VectorD] =
    {
        var t  = x.col (i)          // the scores for PC_i, initialized to i-th column of x
        var p  = new VectorD (1)    // the loadings for PC_i
        var tt = new VectorD (1)    // saved value for t (the old t)
        var dt = new VectorD (1)    // the difference between t and tt

        breakable { for (k <- 1 to MAX_ITER) {
            p  = (e * t) / t.normSq           // project x onto t to find the corresponding loading p
            p  = p / p.norm                   // normalise loading vector p to length 1
            tt = t                            // save the old value of t
            t  = (e * p) / p.normSq           // project x onto p to find corresponding score vector t
            dt = t - tt                       // calculate dt as the difference between previous and current scores
            if (i > 1 && dt.norm > EPSILON) break  // check for the convergence
        }} // for
        e = e - outer (t, p)                   // remove the estimated PC_i component from e
        (t, p)
    } // solve
     */