//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller
 *  @version 1.6
 *  @date    Sun Mar 11 15:12:46 EDT 2018
 *  @see     LICENSE (MIT style license file).
 *
 *  @title   Model Support: Quality of Fit (QoF) and Confusion Matrix
 */

package scalation.analytics
package classifier

import scala.collection.mutable.{LinkedHashMap, Map}
import scala.Double.NaN

import scalation.linalgebra.{MatriI, MatrixI, VectoD, VectorD, VectoI, VectorI}
import scalation.math.{double_exp, noDouble}
import scalation.stat.Statistic
import scalation.util.{banner, Error}

//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ConfusionFit` companion object records the indicies and labels for the
 *  base Quality of Fit (QoF) measures for the classification techniques.
 */
object ConfusionFit
{
    val MIN_FOLDS = 3                                        // minimum number of folds for cross-validation

    // indices for Scalar Quality of Fit (QoF) measures

    val index_p_rSq =  0                                     // index  0 - pseudo R-squared (Efron's or McFadden's)
    val index_sst   =  1                                     // index  1 - sum of squares total (ssr + sse)
    val index_sse   =  2                                     // index  2 - sum of squares for error
    val index_kappa =  3                                     // index  3 - Cohen's kappa
    val index_acc   =  4                                     // index  4 - accuracy

    val index_p_m   =  5                                     // index  5 - mean micro-precision
    val index_r_m   =  6                                     // index  6 - mean micro-recall
    val index_s_m   =  7                                     // index  7 - mean micro-specificity
    val index_f1_m  =  8                                     // index  8 - mean micro-F1-measure

    val index_p     =  9                                     // index  9 - precision (for k = 2)
    val index_r     = 10                                     // index 10 - recall/sensitivity (for k = 2)
    val index_s     = 11                                     // index 11 - specificity (for k = 2)
    val index_f1    = 12                                     // index 12 - F1-measure (for k = 2)

    // indices for Vecror Quality of Fit (QoF) micro-measures

    val index_p_v   =  0                                     // index  0 - micro-precision vector
    val index_r_v   =  1                                     // index  1 - micro-recall vector
    val index_s_v   =  2                                     // index  2 - micro-specificity vector
    val index_f1_v  =  3                                     // index  3 - micro-F1-measure vector

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Return the labels for the Scalar Quality of Fit (QoF) measures.
     */
    def fitLabel: Seq [String] = Seq ("p_rSq",              // index  0 - pseudo R-squared (Efron's or McFadden's)
                                      "sst",                // index  1 - sum of squares total (ssr + sse)
                                      "sse",                // index  2 - sum of squares for error
                                      "kappa",              // index  3 - Cohen's kappa
                                      "acc",                // index  4 - accuracy

                                      "p_m",                // index  5 - mean micro-precision
                                      "r_m",                // index  6 - mean micro-recall
                                      "s_m",                // index  7 - mean micro-specificity
                                      "f1_m",               // index  8 - mean micro-F1-measure

                                      "p",                  // index  9 - precision (for k = 2)
                                      "r",                  // index 10 - recall/sensitivity (for k = 2)
                                      "s",                  // index 11 - specificity (for k = 2)
                                      "f1")                 // index 12 - F1-measure (for k = 2)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Return the help string that describes the Quality of Fit (QoF) measures provided
     *  by the `ConfusionFit` class.  The QoF measures are divided into four groups:
     *  general, ordinary, micro (per class) vectors and means of the micro vectors.
     *  Ordinary are values of the last element in the micro vectors and can be
     *  interpreted as, say the precision for the last class value/label, e.g.,
     *  y = hasCancer in {no, yes}, is the prcision of the 'yes' prediction
     *  and is most meaningful when the number of class values/labels (k) is 2.
     *  @see en.wikipedia.org/wiki/Precision_and_recall
     *  @see en.wikipedia.org/wiki/Cohen%27s_kappa
     */
    def help: String =
    {
"""
help: Quality of Fit (QoF) measures:
    p_rSq =  pseudo R-squared (Efron's or McFadden's)
    sst   =  sum of squares total (ssr + sse)
    sse   =  sum of squares for error
    kappa =  Cohen's kappa, adjusted accuracy that accounts for agreement by chance
    acc   =  accuracy, the fraction of predictions that are correct 

    p     =  precision, the fraction classified as true that are actually true
    r     =  recall/sensitivity, the fraction of the actually true that are classified as true
    s     =  specificity, the fraction of the actually false that are classified as false
    f1    =  F1-measure, harmonic mean of precision and recall

    p_v   =  micro-precision vector, precision for every class
    r_v   =  micro-recall vector, recall for every class
    s_v   =  micro-specificity vector, specificity for every class
    f1_v  =  micro-F1-measure vector, F1-measure for every class

    p_m   =  mean of the micro-precision vector
    r_m   =  mean of the micro-recall vector
    s_m   =  mean of the micro-specificity vector
    f1_m  =  mean of the micro-F1-measure vector
"""
    } // help

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Return the labels for the Vector Quality of Fit (QoF) micro-measures.
     */
    def fitLabel_v: Seq [String] = Seq ("p_v",              // index  0 - micro-precision vector
                                        "r_v",              // index  1 - micro-recall vector
                                        "s_v",              // index  2 - micro-specificity vector
                                        "f1_v")             // index  3 - micro-F1-measur vector

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Test and report the confusion matrix and associate QoF measures.
     *  @param y   the actual class values
     *  @param yp  the predicted class values
     *  @param k   the number of class labels {0, 1, ... , k-1}
     */
    def test (y: VectoI, yp: VectoI, k: Int = 2)
    {
        banner ("Actual Class Values/Labels")
        println ("y   = " + y)                                       // actual class values

        banner ("Predicted Class Values/Labels")
        println ("yp  = " + yp)                                      // predicted class values

        val cf = new ConfusionFit (y, k)                             // confusion fit
        val cm = cf.confusion (yp)                                   // confusion matrix

        banner ("Quality of Fit (QoF) measures")
        println (cf.summary ())
    } // test

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Create a table to store statistics for QoF measures, where each row corresponds
     *  to the statistics on a particular QoF measure, e.g., 'acc'
     */
    def qofStatTable: Array [Statistic] =
    {
        val fLabel = fitLabel                                  // labels for QoF measures
        val stats  = Array.ofDim [Statistic] (fLabel.length)   // for collecting stats on QoF measures
        for (i <- stats.indices) stats(i) = new Statistic (fLabel(i))
        stats
    } // qofStatTable

} // ConfusionFit object 


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ConfusionFit` class provides functions for determining the confusion
 *  matrix as well as derived Quality of Fit (QoF) measures such as pseudo R-squared,
 *  sst, sse, accuracy, precsion, recall, specificity and Cohen's kappa coefficient.
 *  @see `analytics.Fit`
 *------------------------------------------------------------------------------
 *  Must call the 'confusion' method before calling the other methods.
 *------------------------------------------------------------------------------
 *  @param y   the actual class labels
 *  @param k   the number class values
 */
class ConfusionFit (y: VectoI, k: Int = 2)
      extends QoF with Error
{
    private val DEBUG = false                                   // debug flag
    private val cmat  = new MatrixI (k, k)                      // confusion matrix
    private val tcmat = new MatrixI (k, k)                      // total cummulative confusion matrix
    private val rsum  = new VectorI (k)                         // vector of row sums of cmat
    private val csum  = new VectorI (k)                         // vector of column sums of cmat

    private val m     = y.dim                                   // size of the classification vector (test/full)
    private val sst   = y.normSq - y.sum~^2 / m                 // sum of squares total (ssr + sse)
    private var sse   = -1.0                                    // sum of squares for error
    private var p_rSq = -1.0                                    // pseudo R-squared (Efron's or McFadden's)

    private val pv    = new VectorD (cmat.dim1)                 // micro-precision vector
    private val rv    = new VectorD (cmat.dim1)                 // micro-recall vector
    private val sv    = new VectorD (cmat.dim1)                 // micro-specificity vector

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Clear the total cummulative confusion matrix.
     */
    def clearConfusion () { tcmat.set (0) }

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Return a copy of the total cummulative confusion matrix 'tcmat' and clear 'tcmat'.
     */
    def total_cmat (): MatriI = { val t = tcmat.copy (); tcmat.set (0); t }

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Diagnose the health of the model by computing the Quality of Fit (QoF) measures,
     *  from the error/residual vector and the predicted & actual responses.
     *  For some models the instances may be weighted.
     *  @see `Regression_WLS`
     *  @param e   the m-dimensional error/residual vector (yy - yp)
     *  @param yy  the actual response vector to use (test/full)
     *  @param yp  the predicted response vector (test/full)
     *  @param w   the weights on the instances (defaults to null)
     *  @param ym  the mean of the actual response vector to use (test/full)
     */
    def diagnose (e: VectoD, yy: VectoD, yp: VectoD, w: VectoD = null, ym: Double = noDouble)
    {
        throw new UnsupportedOperationException ("ConfusionFit: uses integer vectors, see 'confusion'")
    } // diagnose

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compare the actual class 'y' vector versus the predicted class 'yp' vector,
     *  returning the confusion matrix 'cmat', which for 'k = 2' is
     *  <p>
     *       yp  0   1
     *        ----------
     *  y  0  | tn  fp |
     *     1  | fn  tp |
     *        ----------
     *  <p>
     *  Note: ScalaTion's confusion matrix is Actual × Predicted, but to swap the position of
     *  actual 'y' (rows) with predicted 'yp' (columns) simply use 'cmat.t', the transpose of 'cmat'.
     *  @see www.dataschool.io/simple-guide-to-confusion-matrix-terminology
     *  @param yp  the precicted class values/labels
     *  @param yy  the actual class values/labels for full (y) or test (y_e) dataset
     */
    def confusion (yp: VectoI, yy: VectoI = y): MatriI =
    {
        cmat.set (0)
        if (yp.dim != yy.dim) flaw ("confusion", s"requires (yp.dim = ${yp.dim}) = (yy.dim = ${yy.dim})")
        val e = yy - yp                                          // error vector
        sse   = e.normSq
        p_rSq = pseudo_rSq
        for (i <- yy.range) cmat(yy(i), yp(i)) += 1              // increment count
        for (i <- cmat.range1) rsum(i) = cmat(i).sum             // compute row sum
        for (j <- cmat.range2) csum(j) = cmat.col(j).sum         // compute column sum
        tcmat += cmat
        p_r_s ()
        cmat
    } // confusion

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Contract the actual class 'yy' vector versus the predicted class 'yp' vector.
     *  @param yp  the predicted class values/labels
     *  @param yy  the actual class values/labels for full (y) or test (y_e) dataset
     */
    def contrast (yp: VectoI, yy: VectoI = y)
    {
        println (s"actual    yy = $yy")
        println (s"predicted yp = $yp")
    } // contrast
 
    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the micro-precision, micro-recall and micro-specificity vectors
     *  which have elements for each class i in {0, 1, ... k-1}.
     *--------------------------------------------------------------------------
     *  Precision is the fraction classified as true that are actually true.
     *  Recall (sensitivity) is the fraction of the actually true that are classified as true.
     *  Specificity is the fraction of the actually false that are classified as false.
     *--------------------------------------------------------------------------
     *  Note, for 'k = 2', ordinary precision 'p', recall 'r' and specificity 's' will
     *  correspond to the last elements in the 'pv', 'rv' and 'sv' micro vectors.
     */
    def p_r_s ()
    {
        for (i <- cmat.range1) {
            val tp: Double = cmat(i, i)                          // true  positives for class i: y = i, yp = i 
            val fp: Double = csum(i) - tp                        // false positives for class i: y ≠ i, yp = i
            val fn: Double = rsum(i) - tp                        // false negatives for class i: y = i, yp ≠ i
            val tn: Double = m - (tp + fp + fn)                  // true  negatives for class i: y ≠ i, yp ≠ i

            pv(i) = tp / (tp + fp)                               // micro-precision for class i
            rv(i) = tp / (tp + fn)                               // micro-recall for class i
            sv(i) = tn / (tn + fp)                               // micro-specificity for class i
        } // for
        if (DEBUG) println (s" pv = $pv,\n rv = $rv,\n sv = $sv")
    } // p_r_s

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the Efron's pseudo R-squared value.  Override to McFadden's, etc.
     *  @param p1  the first parameter
     *  @param p2  the second parameter
     */
    def pseudo_rSq: Double =  1.0 - sse / sst

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Return the confusion matrix for 'k = 2' as a tuple (tn, fp, fn, tp).
     *  @param con  the confusion matrix (defaults to cmat)
     */
    def tn_fp_fn_tp (con: MatriI = cmat): (Double, Double, Double, Double) =
    {
        if (k == 2) {
            (con(0, 0) /* tn */, con(0, 1) /* fp */,
             con(1, 0) /* fn */, con(1, 1) /* tp */)
        } else (NaN, NaN, NaN, NaN)
    } // tn_fp_fn_tp

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the accuracy of the classification, i.e., the fraction of correct
     *  classifications.  Note, the correct classifications 'tp_i' are in the main
     *  diagonal of the confusion matrix.
     */
    def accuracy: Double = cmat.trace / cmat.sum.toDouble

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the mean ignoring NaN (Not-a-Number).
     *  @param x  the vector whose mean is sought
     */
    private def mean (x: VectoD): Double =
    {
        var sum = 0.0
        var k   = 0
        for (i <- x.range if ! x(i).isNaN) { sum += x(i); k += 1 }
        sum / k
    } // mean

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the F1-measure, i.e., the harmonic mean of the precision and recall.
     *  @param p  the precision
     *  @param r  the recall
     */
    def f1_measure (p: Double, r: Double): Double = 2.0 * p * r / (p + r)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the micro-F1-measure vector, i.e., the harmonic mean of the precision and recall.
     */
    def f1v: VectoD = (pv * rv * 2.0) / (pv + rv)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute Cohen's 'kappa' coefficient that measures agreement between
     *  actual 'y' and predicted 'yp' classifications.
     *  @see en.wikipedia.org/wiki/Cohen%27s_kappa
     */
    def kappa: Double =
    {
        val freq_y  = new VectorI (k)
        val freq_yp = new VectorI (k)
        for (i <- y.range) {
            freq_y(y(i))  += 1
            freq_yp(y(i)) += 1
        } // for
        val pe = (freq_y dot freq_yp) / (y.dim * y.dim).toDouble
        val po = accuracy
        (po - pe) / (1.0 - pe)
     } // kappa

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Return the Quality of Fit (QoF) measures corresponding to the labels given
     *  above in the 'fitLabel' method.
     */
    def fit: VectoD =
    {
        val (p, r, s) = (pv.last, rv.last, sv.last)              // ordinary precision, recall and specificity

        VectorD (p_rSq, sst, sse, kappa, accuracy,               // common QoF measures
                 mean (pv), mean (rv), mean (sv), mean (f1v),    // means of precision, recall, specificity and F1
                 p, r, s, f1_measure (p, r))                     // most meaningful when k = 2
    } // fit

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Return the Quality of Fit (QoF) vector micor-measures, i.e., measures for
     *  each class. 
     */
    def fitMicroMap: Map [String, VectoD] =
    {
        val lab = fitLabel_v
        LinkedHashMap (lab(0) -> pv,                             // mirco-precision vector
                       lab(1) -> rv,                             // mirco-recall vector
                       lab(2) -> sv,                             // mirco-specificity vector
                       lab(3) -> f1v)                            // mirco-F1 vector
    } // fitMicroMap

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Return the labels for the Quality of Fit (QoF) measures. Override to
     *  add additional QoF measures.
     */
    def fitLabel: Seq [String] = ConfusionFit.fitLabel

   //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Return the help string that describes the Quality of Fit (QoF) measures
     *  provided by the `ConfusionFit` class.  Override to correspond to 'fitLabel'.
     */
    def help: String = ConfusionFit.help

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Return the labels for the Quality of Fit (QoF) measures. Override to
     *  add additional QoF measures.
     */
    def fitLabel_v: Seq [String] = ConfusionFit.fitLabel_v

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Produce a summary report with diagnostics and the overall quality of fit.
     *  @param b     the parameters of the model
     *  @param show  flag indicating whether to print the summary
     */
    def summary (b: VectoD = null, show: Boolean = false): String =
    {
        val fit1 = fitMap
        val fit2 = fitMicroMap

        var sb = new StringBuilder ("-" * 58 + "\nSUMMARY")
        sb.append ("\n" + "-" * 58)
        sb.append ("\nparameter = " + b)
        sb.append ("\n" + "-" * 58)
        sb.append ("\nConfusion Matrix = " + cmat)
        sb.append ("\n" + "-" * 58)
        sb.append ("\nScalar QoF Measures")
        sb.append ("\n" + "-" * 58)
        for ((k, v) <- fit1) sb.append (s"\n\t $k \t= $v")
        sb.append ("\n" + "-" * 58)
        sb.append ("\nVector QoF Micro-Measures")
        sb.append ("\n" + "-" * 58)
        for ((k, v) <- fit2) sb.append (s"\n\t $k \t= $v")
        sb.append ("\n" + "-" * 58)
        val sum = sb.mkString

        if (show) println (sum)
        sum
    } // summary

} // ConfusionFit class


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ConfusionFitTest` object is used to test the `ConfusionFit` class.
 *  > runMain scalation.analytics.classifier.ConfusionFitTest
 */
object ConfusionFitTest extends App
{
    val y  = VectorI (0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2)   // actual
    val yp = VectorI (0, 0, 0, 1, 2, 0, 0, 1, 1, 2, 0, 1, 1, 1, 2)   // predicted
    val k  = 3                                                       // three classes

    ConfusionFit.test (y, yp, k)

} // ConfusionFitTest object


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ConfusionFitTest2` object is used to test the `ConfusionFit` class.
 *  @see www.quora.com/How-do-I-compute-precision-and-recall-values-for-a-dataset
 *  > runMain scalation.analytics.classifier.ConfusionFitTest2
 */
object ConfusionFitTest2 extends App
{
    val y  = VectorI (1, 1, 1, 1, 1, 0, 0, 0, 0, 0)                   // actual
    val yp = VectorI (1, 0, 0, 1, 1, 1, 0, 0, 1, 1)                   // predicted
    val k  = 2                                                        // two classes

    ConfusionFit.test (y, yp, k)

} // ConfusionFitTest2 object


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `ConfusionFitTest3` object is used to test the `ConfusionFit` class.
 *  @see towardsdatascience.com/multi-class-metrics-made-simple-part-i-precision-and-recall-9250280bddc2
 *  Note: ScalaTion's confusion matrix is the transpose of the one on the Website
 *  > runMain scalation.analytics.classifier.ConfusionFitTest3
 */
object ConfusionFitTest3 extends App
{
    val k  = 3                                                        // three classes: cat, fish, hen
//                                  y yp
    val yyp = new MatrixI ((25, 2), 0, 0,                             // 6 actual cats and their predictions
                                    0, 0,
                                    0, 0,
                                    0, 0,
                                    0, 1,
                                    0, 2,

                                    1, 0,                                  // 10 actual fish and their predictions
                                    1, 0,
                                    1, 0,
                                    1, 0,
                                    1, 0,
                                    1, 0,
                                    1, 1,
                                    1, 1,
                                    1, 2,
                                    1, 2,
    
                                    2, 0,                                  // 9 actual hens and their predictions
                                    2, 0,
                                    2, 0,
                                    2, 2,
                                    2, 2,
                                    2, 2,
                                    2, 2,
                                    2, 2,
                                    2, 2)

    println (ConfusionFit.help)
    ConfusionFit.test (yyp.col(0), yyp.col(1), k)

} // ConfusionFitTest3 object