//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller
 *  @version 1.6
 *  @date    Sun Jan  4 23:09:27 EST 2015
 *  @see     LICENSE (MIT style license file).
 *
 *  @title   Model: ANalysis Of VAriance (ANOVA) with one categorial variable
 */

package scalation.analytics

import scala.collection.mutable.Set

import scalation.linalgebra.{VectoD, VectorD, VectoI, VectorI}
import scalation.plot.Plot
import scalation.stat.Statistic
import scalation.util.{banner, Error}

import RegTechnique._

//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ANOVA1` class supports one-way ANalysis Of VAriance (ANOVA), i.e,
 *  it allows only one binary/categorial treatment variable.  It is framed using
 *  General Linear Model 'GLM' notation and supports the use of one
 *  binary/categorical treatment variable 't'.  This is done by introducing
 *  dummy variables 'd_j' to distinguish the treatment level.  The problem is
 *  again to fit the parameter vector 'b' in the following equation
 *  <p>
 *      y  =  b dot x + e  =  b_0 + b_1 * d_1 +  b_1 * d_2 ... b_k * d_k + e
 *  <p>
 *  where 'e' represents the residuals (the part not explained by the model).
 *  Use Least-Squares (minimizing the residuals) to solve for the parameter vector 'b'
 *  using the Normal Equations:
 *  <p>
 *      x.t * x * b  =  x.t * y
 *      b  =  fac.solve (.)
 *  <p>
 *  @see `ANCOVA` for models with multiple variables 
 *  @see psych.colorado.edu/~carey/Courses/PSYC5741/handouts/GLM%20Theory.pdf
 *  @param t          the treatment/categorical variable vector
 *  @param y          the response/output vector
 *  @param fname_     the feature/variable names
 *  @param technique  the technique used to solve for b in x.t*x*b = x.t*y
 */
class ANOVA1 (t: VectoI, y: VectoD, fname_ : Strings = null, technique: RegTechnique = QR)
      extends Regression (VectorD.one (t.dim) +^: Variable.dummyVars (t), y, fname_, null, technique)
      with ExpandableVariable
{
    if (t.dim != y.dim) flaw ("constructor", "dimensions of t and y are incompatible")

    private val (shift, tmax) = Variable.get_shift_tmax                           // save shift and tmax

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Expand the scalar 't' into a vector of terms/columns including dummy variables.
     *  @param t     the scalar to expand into the vector
     *  @param nCat  the number of categorical variable (currently locked at 1)
     */
    def expand (t: VectoD, nCat: Int = 1): VectoD =
    {
        VectorD (1.0) ++ Variable.dummyVar (t(0).toInt, shift, tmax)
    } // expand

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given the vector 'zt', expand it and predict the response value.
     *  @param zt  the vector with categorical values (at the end) to expand
     */
    def predict_ex (zt: VectoD): Double = predict (expand (zt))

} // ANOVA1 class


//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ANOVA1Test` object tests the `ANOVA1` class using the following
 *  regression equation.
 *  <p>
 *      y  =  b dot x  =  b_0 + b_1*d_1 + b_2*d_2
 *  <p>
 *  > runMain scalation.analytics.ANOVA1Test
 */
object ANOVA1Test extends App
{
//  val t  = VectorI (0, 0, 0, 1, 1, 1, 2, 2, 2)                 // treatment level data
    val t  = VectorI (1, 1, 1, 2, 2, 2, 3, 3, 3)                 // treatment level data
    val y  = VectorD (755.0, 865.0, 815.0,
                      442.0, 420.0, 401.0,
                      282.0, 250.0, 227.0)                       // response vector

    println ("t = " + t)
    println ("y = " + y)

    banner ("ANOVA1 Model")
    val ano = new ANOVA1 (t, y)
    println ("x = " + ano.getX)
    ano.train ().eval ()
    println (ano.report)

    banner ("Make Predictions")
    val z  = VectorD (2)                                          // new instance
    val ze = VectorD (1.0, 2.0, 1)                                // expanded vector
    assert (ze == ano.expand (z))

    println (s"predict ($ze)   = ${ano.predict (ze)}")
    println (s"predict_ex ($z) = ${ano.predict_ex (z)}")

    val yp = new VectorD (y.dim)
    for (i <- yp.range) yp(i) = ano.predict (ano.expand (VectorD (t(i))))
    println (s" y = $y \n yp = $yp")
    new Plot (t.toDouble, y, yp, "ANOVA1", true)

} // ANOVA1Test object