//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller, Michael E. Cotterell
 *  @version 2.0
 *  @date    Sun Jan 11 19:05:20 EST 2015
 *  @see     LICENSE (MIT style license file).
 *
 *  @note    Model: Exponential Regression
 */

// FIX: needs improved optimization: try IRWLS

package scalation
package modeling

import scala.math.exp

import scalation.mathstat._
import scalation.optimization.quasi_newton.BFGS

//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ExpRegression` class supports exponential regression.  In this case,
 *  x is multi-dimensional [1, x_1, ... x_k].  Fit the parameter vector b in the
 *  exponential regression equation
 *      log (mu (x))  =  b dot x  =  b_0 + b_1 * x_1 + ... b_k * x_k
 *  @see www.stat.uni-muenchen.de/~leiten/Lehre/Material/GLM_0708/chapterGLM.pdf 
 *  @param x       the data/input matrix
 *  @param y       the response/output vector
 *  @param fname_  the feature/variable names (defaults to null)
 *  @param hparam  the hyper-parameters (currently none)
 *  @param nonneg  whether to check that responses are nonnegative (defaults to true)
 */
class ExpRegression (x: MatrixD, y: VectorD, fname_ : Array [String] = null,
                     hparam: HyperParameter = null, nonneg: Boolean = true)
      extends Predictor (x, y, fname_, hparam)
         with Fit (dfm = x.dim2 - 1, df = x.dim - x.dim2):

    if nonneg && ! y.isNonnegative then flaw ("init", "response vector y must be nonnegative")

    private val debug      = debugf ("ExpRegression", true)              // debug function
    private val flaw       = flawf ("ExpRegression")                     // flaw function
    private var n_dev      = -1.0                                        // null dev: -LL, for null model (intercept only)
//  private var r_dev      = -1.0                                        // residual dev: -LL, for full model
//  private var pseudo_rSq = -1.0                                        // McFaffen's pseudo R-squared

    modelName = "ExpRegression"

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** For a given parameter vector b, compute -2 * Log-Likelihood (-2LL).
     *  -2LL is the standard measure that follows a Chi-Square distribution. 
     *  @see www.stat.cmu.edu/~cshalizi/350/lectures/26/lecture-26.pdf
     *  @see www.statisticalhorizons.com/wp-content/uploads/Allison.StatComp.pdf
     *  @param b  the parameters to fit
     */
    def ll (b: VectorD): Double = 
        var sum = 0.0
        for i <- y.indices do
            val bx = b dot x(i)
            sum += -bx - y(i) / exp (bx)
        end for
        -2.0 * sum
    end ll

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** For a given parameter vector b, compute -2 * Log-Likelihood (-2LL) for
     *  the null model (the one that does not consider the effects of x(i)).
     *  @param b  the parameters to fit
     */
    def ll_null (b: VectorD): Double =
        var sum = 0.0
        for i <- y.indices do
            val bx = b(0)                                                // only use intercept
            sum += -bx - y(i) / exp (bx)
        end for
        -2.0 * sum
    end ll_null

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Train the predictor by fitting the parameter vector (b-vector) in the
     *  exponential regression equation.
     *  @param x_  the training/full data/input matrix
     *  @param y_  the training/full response/output vector
     */
    def train (x_ : MatrixD = x, y_ : VectorD = y): Unit =
        // FIX - currently only works for x_ = x and y_ = y
        train_null ()
        val b0   = new VectorD (x_.dim2)                                 // use b_0 = 0 for starting guess for parameters
        val bfgs = new BFGS (ll)                                         // minimizer for -2LL
        b        = bfgs.solve (b0)._2                                    // find optimal solution for parameters

//      e = y_ / (x_ * b)                                                // residual/error vector e
        e = y_ - (x_ * b).map (exp)                                      // residual/error vector e
    end train

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** For the null model, train the classifier by fitting the parameter vector
     *  (b-vector) in the logistic regression equation using maximum likelihood.
     *  Do this by minimizing -2l.
     */
    def train_null (): Unit =
         val b0   = new VectorD (x.dim2)                                 // use b0 = 0 for starting guess for parameters
         val bfgs = new BFGS (ll_null)                                   // minimizer for -2l
         val b_n = bfgs.solve (b0)._2                                    // find optimal solution for parameters

         n_dev   = ll_null (b_n)                                         // measure of fitness for null model
    end train_null

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Test a predictive model y_ = f(x_) + e and return its QoF vector.
     *  Testing may be be in-sample (on the training set) or out-of-sample
     *  (on the testing set) as determined by the parameters passed in.
     *  Note: must call train before test.
     *  @param x_  the testing/full data/input matrix (defaults to full x)
     *  @param y_  the testing/full response/output vector (defaults to full y)
     */
    def test (x_ : MatrixD = x, y_ : VectorD = y): (VectorD, VectorD) =
        val yp = predict (x_)                                            // make predictions
        (yp, diagnose (y_, yp))                                          // return predictions and QoF vector
    end test

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Produce a QoF summary for a model with diagnostics for each predictor 'x_j'
     *  and the overall Quality of Fit (QoF).
     *  @param x_      the testing/full data/input matrix
     *  @param fname_  the array of feature/variable names
     *  @param b_      the parameters/coefficients for the model
     *  @param vifs    the Variance Inflation Factors (VIFs)
     */
    override def summary (x_ : MatrixD = getX, fname_ : Array [String] = fname, b_ : VectorD = b,
                          vifs: VectorD = vif ()): String =
        super.summary (x_, fname_, b_, vifs)                             // summary from `Fit`
    end summary

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Predict the value of y = f(z) by evaluating the formula y = exp (b dot z),
     *  e.g., exp (b_0, b_1, b_2) dot (1, z_1, z_2).
     *  @param z  the new vector to predict
     */
    override def predict (z: VectorD): Double = exp (b dot z)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Build a sub-model that is restricted to the given columns of the data matrix.
     *  @param x_cols  the columns that the new model is restricted to
     */
    override def buildModel (x_cols: MatrixD): ExpRegression =
        debug ("buildModel", s"${x_cols.dim} by ${x_cols.dim2}")
        new ExpRegression (x_cols, y, null, hparam, nonneg)
    end buildModel

end ExpRegression


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ExpRegression` companion object provides factory methods for creating
 *  exponential regression models.
 */
object ExpRegression:

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Create an `ExpRegression` object from a combined data-response matrix.
     *  The last column is assumed to be the response column.
     *  @param xy      the combined data-response matrix (predictors and response)
     *  @param fname   the feature/variable names (defaults to null)
     *  @param hparam  the hyper-parameters (currently nome)
     *  @param nonneg  whether to check that responses are nonnegative (defaults to true)
     *  @param col     the designated response column (defaults to the last column)
     */
    def apply (xy: MatrixD, fname: Array [String] = null,
               hparam: HyperParameter = null, nonneg: Boolean = true)
              (col: Int = xy.dim2 - 1): ExpRegression =
        new ExpRegression (xy.not(?, col), xy(?, col), fname, hparam, nonneg)
    end apply

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Create an `ExpRegression` object from a data matrix and a response vector.
     *  This method provides data rescaling.
     *  @param x       the data/input m-by-n matrix
     *                     (augment with a first column of ones to include intercept in model)
     *  @param y       the response/output m-vector
     *  @param fname   the feature/variable names (defaults to null)
     *  @param hparam  the hyper-parameters (currently none)
     *  @param nonneg  whether to check that responses are nonnegative (defaults to true)
     */
    def rescale (x: MatrixD, y: VectorD, fname: Array [String] = null,
                 hparam: HyperParameter = null, nonneg: Boolean = true): ExpRegression =
        val xn = normalize ((x.mean, x.stdev)) (x)
        new ExpRegression (xn, y, fname, hparam, nonneg)
    end rescale

end ExpRegression


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `expRegressionTest` main function tests `ExpRegression` class using the following
 *  exponential regression problem.
 *  > runMain scalation.modeling.expRegressionTest
 */
@main def expRegressionTest: Unit =

    val x = MatrixD ((5, 3), 1.0, 36.0,  66.0,                           // 5-by-3 matrix
                             1.0, 37.0,  68.0,
                             1.0, 47.0,  64.0,
                             1.0, 32.0,  53.0,
                             1.0,  1.0, 101.0)
    val y = VectorD (745.0, 895.0, 442.0, 440.0, 1598.0)
    val z = VectorD (1.0, 20.0, 80.0)

    println (s"x = $x")

    val mod = new ExpRegression (x, y)                                   // create model with with interecept (else pass x)
    mod.trainNtest ()()                                                  // train and test the model
    println (mod.summary ())                                             // parameter/coefficient statistics

    val yp = mod.predict (x)
    println (s"y  = $y")
    println (s"yp = $yp")

    new Plot (null, y, yp, "expRegressionTest")

    val yp2 = mod.predict (z)
    println (s"predict ($z) = $yp2")

end expRegressionTest


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `expRegressionTest2` main function has a basic test for the `ExpRegression` class.
 *  > runMain scalation.modeling.expRegressionTest
 */
@main def expRegressionTest2 (): Unit =

    import scalation.random.{Uniform, Exponential, Random}

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Test `ExpRegression` by simulating n-many observations.
     *  @param n  number of observations
     *  @param k  number of variables
     */
    def test (n: Int = 10000, k: Int = 5): Unit =
        val u = new Uniform (0, 1)                                       // uniform random
        val e = new Exponential (1)                                      // exponential error
        val r = new Random ()

        val x = new MatrixD (n, k)                                       // data matrix
        val y = new VectorD (x.dim)                                      // response vector
        val b = new VectorD (k)                                          // known coefficients

        for i <- b.indices do b(i) = 1 + r.gen * 6

        for i <- x.indices; j <- x.indices2 do x(i, j) = u.gen

        for i <- y.indices do y(i) = exp (x(i) dot b) * e.gen

        banner ("Simulated exp regression problem with $k vars")
        val mod = new ExpRegression (x, y)                                // create model with with interecept (else pass x)
        mod.trainNtest ()()                                               // train and test the model
        println (mod.summary ())                                          // parameter/coefficient statistics
    end test

    for k <- 1 to 10 do test (1000, k)

end expRegressionTest2