//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller
 *  @version 1.0
 *  @date    Wed Aug 26 18:41:26 EDT 2009
 *  @see     LICENSE (MIT style license file).
 */

package scalation.stat

import math.sqrt

import scalation.linalgebra.VectorD
import scalation.math.DoubleWithExp._
import scalation.random.Quantile.studentTInv

//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The StatVector class provides methods for computing common statistics on a
 *  data vector.  Both maximum likelihood (the default) and unbiased estimators
 *  are supported.  Unbiased should only be used on sample (not population) data.
 *  @param dim       the dimension/size of the vector
 *  @param unbiased  whether the estimators are restricted to be unbiased.
 */
class StatVector (dim: Int, private var unbiased: Boolean = false)
      extends VectorD (dim)
{
    /** The number of samples
     */
    private val n = dim.toDouble

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Construct a StatVector from an Array.
     *  @param u  the array to initialize StatVector
    def this (u: Array [Double])
    {
        this (u.length)
        setAll (u)
    } // constructor
     */

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Construct a StatVector from two or more values (repeated values Double*).
     *  @param u0  the first value
     *  @param u1  the second value
     *  @param u   the rest of the values (zero or more additional values)
     */
    def this (u0: Double, u1: Double, u: Double*)
    {
        this (u.length + 2)
        this(0) = u0; this(1) = u1
        for (i <- 2 until dim) this(i) = u(i - 2)
    } // constructor

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Construct a StatVector from a VectorN [Double], i.e., VectorD.
     *  @param u  the vector to initialize StatVector
     */
    def this (u: VectorD)
    {
        this (u.dim)
        setAll (u())
    } // constructor

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Toggle/flip the bias flag for the estimators.
     */
    def toggleBias () { unbiased = ! unbiased }

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** The denominator for sample variance and covariance is one less for
     *  unbiased (n-1) vs. maximum likelihood (n) estimators.  Also use n for
     *  population statistics.
     */
    private def den: Double = if (unbiased) n - 1. else n

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Get the number of samples.
     */
    def num: Int = dim

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the mean of this vector.
     */
    def mean: Double = sum / n

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the median (middle value) of this vector.  For odd size, it is
     *  the middle element, for even, it is the larger of the two middle elements.
     *  FIX:  need a more efficient algorithm
     */
    def median: Double = { sort (); v(dim / 2) }

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the variance of this vector.
     */
    def variance: Double = (normSq - sum * sum / n) / den 

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the covariance of this vector with vector y.
     *  @param y  the other vector
     */
    def cov (y: VectorD): Double = ((this dot y) - sum * y.sum / n) / den

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the k-lag auto-covariance of this vector.
     *  @param k  the lag parameter
     */
    def acov (k: Int = 1): Double =
    {
        val mu  = mean
        val num = dim - k
        var sum = 0.
        for (i <- 0 until num) sum += (v(i) - mu) * (v(i+k) - mu)
        sum / num
    } // acorr

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute Pearson's correlation of this vector with vector y.
     *  @param y  the other vector
     */
    def corr (y: StatVector): Double = cov (y) / sqrt (variance * y.variance)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the k-lag auto-correlation of this vector.
     *  @param k  the lag parameter
     */
    def acorr (k: Int = 1): Double = acov (k) / variance

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the standard deviation of this vector.
     */
    def stddev: Double = sqrt (variance)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the mean square (ms) of this vector.
     */
    def ms: Double = normSq / n

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the root mean square (rms) of this vector.
     */
    def rms: Double = sqrt (ms)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the skewness of this vector.  Negative skewness indicates the
     *  distribution is elongated on the left, zero skewness indicates it is
     *  symmetric, and positive skewness indicates it is elongated on the right.
     *  @see http://www.mathworks.com/help/stats/skewness.html
     */
    def skew: Double =
    {
        val s = (this - mean).sum~^3 / (n * stddev~^3)
        if (unbiased) s * sqrt (n * (n-1.)) / (n-2.) else s
    } // skew

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the confidence interval half-width for the given confidence level.
     *  @param p  the confidence level
     */
    def interval (p: Double = .95): Double =
    {
        val df = dim - 1              // degrees of freedom
        if (df < 1) flaw ("interval", "must have at least 2 observations")
        val pp = 1 - (1 - p) / 2.     // e.g., .95 --> .975 (two tails)
        val t  = studentTInv (pp, df)
        t * stddev / sqrt (dim.toDouble)
    } // interval

} // StatVector class


//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** Object to test the StatVector class.
 */
object StatVectorTest extends App
{
    val x = new StatVector (1., 2., 3., 4., 5., 6.)
    val y = new StatVector (2., 3., 4., 5., 6., 7.)

    println ("x           = " + x)
    println ("y           = " + y)
    println ("x.min       = " + x.min ())       // minimum (from VectorD)
    println ("x.max       = " + x.max ())       // maximum (from VectorD)
    println ("x.mean      = " + x.mean)         // mean
    println ("x.median    = " + x.median)       // median
    println ("x.variance  = " + x.variance)     // variance
    println ("x.cov (y)   = " + x.cov (y))      // covariance
    println ("x.corr (y)  = " + x.corr (y))     // correlation
    println ("x.acorr (1) = " + x.acorr (1))    // auto-correlation
    println ("x.stddev    = " + x.stddev)       // standard deviation
    println ("x.ms        = " + x.ms)           // mean square
    println ("x.rms       = " + x.rms)          // root mean square
    println ("x.skew      = " + x.skew)         // skewness
    println ("x.interval  = " + x.interval ())  // confidence interval

} // StatVectorTest object