//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller
 *  @version 1.3
 *  @date    Mon Mar 25 14:58:13 EDT 2013
 *  @see     LICENSE (MIT style license file).

 *  @see www.predict.ws/H_principle/SvanteHarald.htm
 *  @see folk.uio.no/henninri/pca_module/pca_nipals.pdf
 */

package scalation.analytics

import scala.util.control.Breaks.{breakable, break}

import scalation.linalgebra.{Eigenvalue, Eigenvector, MatrixD, VectoD, VectorD}
import scalation.linalgebra.MatrixD.outer
import scalation.stat.StatVector.{center, mean}
import scalation.util.Error

//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `PrincipalComponents` class performs the Principal Component Analysis 'PCA'
 *  on data matrix 'x'.  It can be used to reduce the dimensionality of the data.
 *  First find the Principal Components 'PC's by calling 'findPCs' and then call
 *  'reduce' to reduce the data (i.e., reduce matrix 'x' to a lower dimensionality
 *  matrix).
 *  @param x  the data matrix to reduce, stored column-wise
 */
class PrincipalComponents (x: MatrixD)
      extends Reducer with Error
{
    if (! x.isRectangular) flaw ("constructor", "x matrix must be rectangular")

    private val DEBUG    = true                              // debug flag
    private val MAX_ITER = 200                               // the maximum number of iterations
    private val EPSILON  = 1E-8                              // a value close to zero
    private val M        = x.dim1                            // the number of data points (rows)
    private val MR       = M.toDouble                        // M as a real number
    private val N        = x.dim2                            // the number of variables (columns)
    private val mu       = mean (x)                          // the mean vector (column means)
    private val xc       = center (x, mu)                    // center x (shift mean to zero)
    private val cov      = (xc mdot xc) / (MR - 1.0)         // the covariance matrix (from mean zero data)
    private val eigenVal = (new Eigenvalue (cov)).getE ()    // the eigenvalues of cov
    private val eigenVec = computeEigenVectors (eigenVal)    // the eigenvectors for cov

    private var featureMat: MatrixD = null                   // the feature matrix
    private var reducedMat: MatrixD = null                   // the reduced matrix

    if (DEBUG) {
        println ("-" * 60)
        println ("mu = " + mu)
        println ("-" * 60)
        println ("zero-meaned x = " + x)
        println ("-" * 60)
        println ("cov = " + cov)
        println ("-" * 60)
        println ("eigenVal = " + eigenVal)
        println ("-" * 60)
        println ("eigenVec = " + eigenVec)
        println ("-" * 60)
    } // if

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the unit eigenvectors for the covariance matrix.
     *  @param eVal  the vector of eigenvalues for the covariance matrix
     */
    private def computeEigenVectors (eVal: VectorD): MatrixD =
    {
        val eigenVec = (new Eigenvector (cov, eigenVal)).getV                 // the eigenvectors for cov
        for (j <- 0 until N) eigenVec.setCol (j, eigenVec.col(j).normalizeU)  // want unit vectors
        eigenVec
    } // computeEigenVectors

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Find the Principal Components/Features, the eigenvectors with the 'k'
     *  highest eigenvalues.
     *  @param k  the number of Principal Components 'PC's to find
     */
    def findPCs (k: Int): MatrixD =
    {
        featureMat = eigenVec(0 until N, 0 until k)     // store in feature matrix
        featureMat
    } // findPCs

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Multiply the zero mean data matrix by the feature matrix to reduce
     *  dimensionality.
     */
    def reduce (): MatrixD =
    {
        reducedMat = x * featureMat                     // store in reduced matrix
        reducedMat
    } // reduce

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Approximately recover the original data by multiplying the reduced matrix
     *  by the inverse (via transpose) of the feature matrix and then adding back
     *  the means.
     */
    def recover: MatrixD = reducedMat * featureMat.t + mu

} // PrincipalComponents class


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `PrincipalComponentsTest` object is used to test the `PrincipalComponents` class.
 *  @see www.ce.yildiz.edu.tr/personal/songul/file/1097/principal_components.pdf
 *  > run-main scalation.analytics.PrincipalComponentsTest
 */
object PrincipalComponentsTest extends App
{
    val x = new MatrixD ((10, 2), 2.5, 2.4,         // the data matrix
                                  0.5, 0.7,
                                  2.2, 2.9,
                                  1.9, 2.2,
                                  3.1, 3.0,
                                  2.3, 2.7,
                                  2.0, 1.6,
                                  1.0, 1.1,
                                  1.5, 1.6,
                                  1.1, 0.9)

    val pca = new PrincipalComponents (x)            // perform Principal Component Analysis (PCA)
    println ("-" * 60)
    println ("pc          = " + pca.findPCs (1))     // find 1 Principal Components
    println ("-" * 60)
    println ("reduced   x = " + pca.reduce)          // the reduce data (lower dimensionality)
    println ("-" * 60)
    println ("recovered x = " + pca.recover)         // the approximately recovered data
    println ("-" * 60)

} // PrincipalComponentsTest


//------------------------------------------------------------------------------

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Solve for the i-th Principal Component 'PC_i' return the score and loading
     *  vectors t and p.
     *  @param i  the i-th Principal Component 'PC_i'
     */
//  def solve (i: Int): Tuple2 [VectorD, VectorD] =
//  {
//      var t  = x.col (i)                    // the scores for PC_i, initialized to i-th column of x
//      var p  = new VectorD (1)              // the loadings for PC_i
//      var tt = new VectorD (1)              // saved value for t (the old t)
//      var dt = new VectorD (1)              // the difference between t and tt
//
//      breakable { for (k <- 1 to MAX_ITER) {
//          p  = (e * t) / t.normSq           // project x onto t to find the corresponding loading p
//          p  = p / p.norm                   // normalise loading vector p to length 1
//          tt = t                            // save the old value of t
//          t  = (e * p) / p.normSq           // project x onto p to find corresponding score vector t
//          dt = t - tt                       // calculate dt as the difference between previous and current scores
//          if (i > 1 && dt.norm > EPSILON) break  // check for the convergence
//      }} // for
//      e = e - outer (t, p)                  // remove the estimated PC_i component from e
//      (t, p)
//  } // solve