//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller
 *  @version 1.6
 *  @date    Sun Jan  4 23:09:27 EST 2015
 *  @see     LICENSE (MIT style license file).
 *
 *  @title   Model: ANalysis of COVAriance (ANCOVA) with 1 categorical variable
 */

package scalation.analytics

import scala.collection.mutable.{Map, Set}

import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD, VectoI, VectorI}
import scalation.util.{banner, Error}

import RegTechnique._
import Variable.dummyVar

//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ANCOVA1` class supports ANalysis of COVAriance 'ANCOVA1'.  It allows
 *  the addition of a categorical treatment variable 't' into a multiple linear
 *  regression.  This is done by introducing dummy variables 'dj' to distinguish
 *  the treatment level.  The problem is again to fit the parameter vector 'b'
 *  in the augmented regression equation
 *  <p>
 *      y  =  b dot x + e  =  b0  +  b_1   * x_1  +  b_2   * x_2  +  ... b_k * x_k
                                  +  b_k+1 * d_1  +  b_k+2 * d_2  +  ... b_k+l * d_l + e
 *  <p>
 *  where 'e' represents the residuals (the part not explained by the model).
 *  Use Least-Squares (minimizing the residuals) to solve for the parameter vector 'b'
 *  using the Normal Equations:
 *  <p>
 *      x.t * x * b  =  x.t * y
 *      b  =  fac.solve (.)
 *  <p>
 *  't' has  categorical values/levels, e.g., treatment levels (0, ... 't.max ()')
 *  @see see.stanford.edu/materials/lsoeldsee263/05-ls.pdf
 *  @param x_         the data/input matrix of continuous variables
 *  @param t          the treatment/categorical variable vector
 *  @param y          the response/output vector
 *  @param fname_     the feature/variable names
 *  @param technique  the technique used to solve for b in x.t*x*b = x.t*y
 */
class ANCOVA1 (x_ : MatriD, t: VectoI, y: VectoD, fname_ : Strings = null, technique: RegTechnique = QR)
      extends Regression (x_ ++^ Variable.dummyVars (t), y, fname_, null, technique)
      with ExpandableVariable
{
    if (t.dim != y.dim) flaw ("constructor", "dimensions of t and y are incompatible")

    private val (shift, tmax) = Variable.get_shift_tmax                           // save shift and tmax

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Expand the vector 'zt' into a vector of terms/columns including dummy variables.
     *  @param zt    the vector with categorical value (at the end) to expand
     *  @param nCat  the number of categorical variable (currently locked at one)
     */
    def expand (zt: VectoD, nCat: Int = 1): VectoD =
    {
        val (z, t) = (zt.slice (0, zt.dim - 1), zt(zt.dim - 1))   // use 1, not nCat
        z ++ Variable.dummyVar (t.toInt, shift, tmax)
    } // expand

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given the vector 'zt', expand it and predict the response value.
     *  @param zt  the vector with categorical values (at the end) to expand
     */
    def predict_ex (zt: VectoD): Double = predict (expand (zt))

} // ANCOVA1 class


//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ANCOVA1` companion object provides factor functions.
 */
object ANCOVA1 extends Error
{
    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Create an `ANCOVA1` model from a combined data matrix 'xt'.
     *  @param xt         the data/input matrix of continuous variables and a
     *                        treatment/categorical variable in the last column
     *  @param y          the response/output vector
     *  @param fname      the feature/variable names
     *  @param technique  the technique used to solve for b in x.t*x*b = x.t*y
     */
    def apply (xt: MatriD, y: VectoD, fname: Strings = null, technique: RegTechnique = QR)
    {
        val (x, t) = pullResponse (xt: MatriD)
        new  ANCOVA1 (x, t.toInt, y, fname, technique)
    } // apply

} // ANCOVA1 object


//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ANCOVA1Test` object tests the `ANCOVA1` class using the following
 *  regression equation.
 *  <p>
 *      y  =  b dot x  =  b_0 + b_1*x_1 + b_2*x_2 + b_3*d_1 + b_4*d_2
 *  <p>
 *  > runMain scalation.analytics.ANCOVA1Test
 */
object ANCOVA1Test extends App
{
    // 5 data points: constant term, x_1 coordinate, x_2 coordinate
    val x = new MatrixD ((6, 3), 1.0, 36.0,  66.0,                 // 6-by-3 matrix
                                 1.0, 37.0,  68.0,
                                 1.0, 47.0,  64.0,
                                 1.0, 32.0,  53.0,
                                 1.0, 42.0,  83.0,
                                 1.0,  1.0, 101.0)
    val t  = VectorI (0, 0, 1, 1, 2, 2)                             // treatments levels
    val y  = VectorD (745.0, 895.0, 442.0, 440.0, 643.0, 1598.0)    // response vector

    println ("x = " + x)
    println ("t = " + t)
    println ("y = " + y)

    banner ("ANCOVA1 Model")
    val anc = new ANCOVA1 (x, t, y)
    println ("xe = " + anc.getX)                                   // x expanded with dummy variables
    anc.train ().eval ()
    println (anc.report)

    banner ("Make Predictions")
    val z  = VectorD (1.0, 20.0, 80.0, 1)                          // new instance with categorical value
    val ze = VectorD (1.0, 20.0, 80.0, 2.0, 1.0)                   // expanded vector
    assert (ze == anc.expand (z))

    println (s"predict ($ze)   = ${anc.predict (ze)}")
    println (s"predict_ex ($z) = ${anc.predict_ex (z)}")

} // ANCOVA1Test object