//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller
 *  @version 2.0
 *  @date    Thu Mar 22 22:31:32 EDT 2018
 *  @see     LICENSE (MIT style license file).
 *
 *  @note    Model Support: Quality of Fit (QoF) suitable for all models
 *
 *  @see facweb.cs.depaul.edu/sjost/csc423/documents/f-test-reg.htm
 *  @see avesbiodiv.mncn.csic.es/estadistica/ejemploaic.pdf
 *  @see en.wikipedia.org/wiki/Bayesian_information_criterion
 *  @see www.forecastpro.com/Trends/forecasting101August2011.html
 *  @see www.bpa.gov/-/media/Aep/energy-efficiency/evaluation-projects-studies/uncertainty-methods-comparisons-final.pdf
 */

package scalation
package modeling

import scala.collection.mutable.{LinkedHashMap, Map}
import scala.math.sqrt

import scalation.mathstat._

//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `FitM` class provides methods to determine basic Quality of Fit 'QoF' metrics/measures
 *  suitable for all Models.  Note, to work with multiple types of models where degrees
 *  of freedom (df) may be hard to calculate, sde uses m-1 rather than df for sample estimates,
 *  while rmse uses a population formula (i.e., divide by m).  Therefore, in ScalaTion sde
 *  will be slightly larger than rmse.
 */
trait FitM: 

    protected var m      = -1                                // number of instances (# data points)

    protected var sse    = -1.0                              // sum of squares for error (SSE or RSS)
    protected var ssr    = -1.0                              // sum of squares regression/model (SSR)
    protected var sst    = -1.0                              // sum of squares total (SST = SSR + SSE)
    protected var sde    = -1.0                              // standard deviation of errors (standard error of estimate)
                                                             //   note sde uses sample vs. rmse uses population formulas
    protected var mse0   = -1.0                              // raw/MLE mean squared error (MSE0)
    protected var rmse   = -1.0                              // root mean squared error (RMSE)
    protected var mae    = -1.0                              // mean absolute error (MAE or MAD)
    protected var rSq    = -1.0                              // coefficient of determination R^2 using mean
    protected var rSq0   = -1.0                              // coefficient of determination R^2 using 0

    private val flaw  = flawf ("FitM")                       // flaw function

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Return the sum of the squares for error (sse).  Must call diagnose first.
     */
    def sse_ : Double = sse

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Return the coefficient of determination (R^2).  Must call diagnose first.
     */
    def rSq_ : Double  = rSq                                // using mean 
    def rSq0_ : Double = rSq0                               // using 0

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Diagnose the health of the model by computing the Quality of Fit (QoF) measures,
     *  from the error/residual vector and the predicted & actual responses.
     *  For some models the instances may be weighted.
     *  @see `Regression_WLS`
     *  Must be overridden.
     *  @param y   the actual response/output vector to use (test/full)
     *  @param yp  the predicted response/output vector (test/full)
     *  @param w   the weights on the instances (defaults to null)
     */
    def diagnose (y: VectorD, yp: VectorD, w: VectorD = null): VectorD =
        m = y.dim                                           // size of response vector (test/full)
        if m < 2       then flaw ("diagnose", s"requires at least 2 responses to evaluate m = $m")
        if yp.dim != m then flaw ("diagnose", s"yp.dim = ${yp.dim} != y.dim = $m")

        val mu = y.mean                                     // mean of y (may be zero)
        val e  = y - yp                                     // residual/error vector
        sse    = e.normSq                                   // sum of squares for error
        if w == null then
            sst = (y - mu).normSq                           // sum of squares total (ssr + sse)
            ssr = sst - sse                                 // sum of squares regression/model
//          println (s"ssr = $ssr")
        else
            ssr = (w * (yp - (w * yp / w.sum).sum)~^2).sum  // regression sum of squares
            sst = ssr + sse
        end if
        sde    = e.stdev                                    // standard deviation of error

        mse0   = sse / m                                    // raw/MLE mean squared error
        rmse   = sqrt (mse0)                                // root mean squared error (RMSE)
        mae    = e.norm1 / m                                // mean absolute error
        rSq    = 1 - sse / sst                              // R^2 using mean
        rSq0   = 1 - sse / y.normSq                         // R^2 using 0
        e                                                   // returns error, overrides return QoF
    end diagnose

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Return the Quality of Fit (QoF) measures corresponding to the labels given.
     *  Note, if sse > sst, the model introduces errors and the rSq may be negative,
     *  otherwise, R^2 (rSq) ranges from 0 (weak) to 1 (strong).
     *  Override to add more quality of fit measures.
     */
    def fit: VectorD

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Return the help string that describes the Quality of Fit (QoF) measures
     *  provided by the `Fit` class.  Override to correspond to 'fitLabel'.
     */
    def help: String

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Produce a QoF summary for a model with diagnostics for each predictor 'x_j'
     *  and the overall Quality of Fit (QoF).
     *  @param x_     the testing/full data/input matrix
     *  @param fname  the array of feature/variable names
     *  @param b      the parameters/coefficients for the model
     *  @param vifs   the Variance Inflation Factors (VIFs)
     */
    def summary (x_ : MatrixD, fname: Array [String], b: VectorD, vifs: VectorD = null): String

end FitM


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `FitM` object provides functions for making fit maps for QoF measures.
 */
object FitM:

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Build a map of quality of fit measures (use of `LinkedHashMap` makes it ordered).
     *  @param  ftVec  the vector of QoF values
     *  @param  ftLab  the array of QoF labels
     */
    def fitMap (ftVec: VectorD, ftLab: Array [String]): Map [String, String] =
        val lm = LinkedHashMap [String, String] ()                          // empty list map
        for i <- ftLab.indices do lm += ftLab(i) -> fmt(ftVec(i))
        lm
    end fitMap

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Build a map of quality of fit measures (use of `LinkedHashMap` makes it ordered).
     *  @param  ftMat  the matrix of QoF values
     *  @param  ftLab  the array of QoF labels
     */
    def fitMap (ftMat: MatrixD, ftLab: Array [String]): Map [String, String] =
        val lm = LinkedHashMap [String, String] ()                          // empty list map
        for i <- ftLab.indices do lm += ftLab(i) -> (ftMat(i).toString + "\n")
        lm
    end fitMap

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Show the quality of fit measures for each response/output variable.
     *  @param  ftMat  the matrix of QoF values
     *  @param  ftLab  the array of QoF labels
     */
    def showFitMap (ftMat: MatrixD, ftLab: Array [String]): String =
        val sb = StringBuilder ("\n")
        for i <- ftLab.indices do sb ++= s"\t\t${ftLab(i)} \t -> ${ftMat(i)} \n"
        sb.toString
    end showFitMap

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Show the table storing the statistics for QoF measures.
     *  @param stats  the table of statistics for QoF measures
     */
    def showQofStatTable (stats: Array [Statistic]): Unit =
        banner ("showQofStatTable: Statistical Table for QoF")
        println (Statistic.labels)
        for i <- stats.indices do
            if i == 0 then println ("-" * 88)
            println (stats(i))
        end for
        println ("-" * 88)
    end showQofStatTable

end FitM