//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller, Hao Peng
 *  @version 2.0
 *  @date    Sat Sep  8 13:53:16 EDT 2012
 *  @see     LICENSE (MIT style license file).
 *
 *  @note    Model: Integer-Based TAN Bayes Classifier
 *           Tree-Augmented Naive (TAN)
 *
 *  @see eric.univ-lyon2.fr/~ricco/tanagra/fichiers/en_Tanagra_TAN_Bayes_Classifier_Explained.pdf
 *
 *  for comparison @see www.bnlearn.com
 *      R-package bblearn:  bn.naive, naive.bayes, bn.tan, tree.bayes
 */

package scalation
package modeling
package classifying

import scala.runtime.ScalaRunTime.stringOf

import scalation.database.MaxSpanningTree
import scalation.mathstat._

import Classifier.{shift2zero, vc_fromData}
import Probability.plog

//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `TANBayes` class implements an Integer-Based TAN Bayes Classifier,
 *  which is a commonly used such classifier for discrete input data.  The
 *  classifier is trained using a data matrix x and a classification vector y.
 *  Each data vector in the matrix is classified into one of k classes numbered
 *  0, ..., k-1.  Prior probabilities are calculated based on the population of
 *  each class in the training-set.  Relative posterior probabilities are computed
 *  by multiplying these by values computed using conditional probabilities.
 *  stored in Conditional Probability Tables (CPTs).
 *------------------------------------------------------------------------------
 *  The classifier is TAN allowing each xj to utilize information from its x-parant.
 *  @param x       the input/data m-by-n matrix
 *  @param y       the class vector, where y(i) = class for row i of matrix x
 *  @param fname_  the names of the features/variables
 *  @param k       the number of classes
 *  @param cname_  the names of the classes
 *  @param vc      the value count (number of distinct values) for each feature
 *  @param hparam  the hyper-parameters
 */
class TANBayes (x: MatrixD, y: VectorI, fname_ : Array [String] = null,
                k: Int = 2, cname_ : Array [String] = Array ("No", "Yes"),
                private var vc: VectorI = null, hparam: HyperParameter = NaiveBayes.hp)
      extends Classifier (x, y, fname_, k, cname_, hparam)
         with BayesClassifier (k)
         with FitC (k):

    private val debug = debugf ("TANBayes", true)                        // debug function

    modelName = "TANBayes"                                               // name of the model

    if vc == null then
        shift2zero (x); vc = vc_fromData (x)                             // set value counts from data
    end if

    private val me   = hparam("me").toDouble                             // m-estimates (me == 0 => regular MLE estimates)
    private val me_v = NaiveBayes.me_vc (me, vc)                         // for Laplace smoothing: me / vc_j for all j

    private var nu_Xy: RTensorD = null                                   // Joint Frequency Tables (JFTs)
    private var nu_Xpy_ : RTensor4D = null                               // Joint Frequency Tables (JFTs)

//  private var p_Xy: Array [MatrixD] = null                             // Conditional Probability Tables (CPTs) one per feature
    private var p_Xpy: Array [Array [MatrixD]] = null                    // extended Conditional Probability Tables (CPTs) one per feature
//  private var p_Xpy_ : RTensor4D = null                                // extended Conditional Probability Tables (CPTs) one per feature

    private var parent: VectorI = null                                   // parent of each feature/variable
    private var vc_p: VectorI = null                                     // parent value count

    debug ("init", s"distinct value count vc = $vc")

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Find the best parent xpj for each feature xj based on the CMI matrix.
     *  For the Play Tennis example, parent = VectorI (-1, 0, 1, 0).
     *  @param x_  the training/full data/input matrix (defaults to full x)
     *  @param y_  the training/full response/output vector (defaults to full y)
     */
    def findParent (x_ : MatrixD, y_ : VectorI): Unit =
        val cmiMx = cmiMatrix (x_, vc, y_)                              // create Conditional Mutual Information matrix
        val stree = MaxSpanningTree (cmiMx)                             // find Maximum Spanning Tree
        parent    = stree.makeITree ()                                  // get predecessor vector
        debug ("findParent", s"parent = $parent")
    end findParent

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Determine the value counts for parents.  When there is no paremt (-1) set to 1.
     */
    def vc_parent (): Unit =
        vc_p = VectorI (for j <- x.indices2 yield { val pj = parent(j); if pj < 0 then 1 else vc(pj) })
        println (s"vc_p = $vc_p")     
    end vc_parent

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Train a classification model y_ = f(x_) + e where x_ is the data/input
     *  matrix and y_ is the response/output vector.  These arguments default
     *  to the full dataset x and y, but may be restricted to a training
     *  dataset.  Training involves estimating the model parameters or pmf.
     *  Train the classifier by computing the probabilities for y, and the
     *  conditional probabilities for each x_j.
     *  @param x_  the training/full data/input matrix (defaults to full x)
     *  @param y_  the training/full response/output vector (defaults to full y)
     */
    override def train (x_ : MatrixD = x, y_ : VectorI = y): Unit =
        super.train (x_, y_)                                            // set class frequencies nu_y and probabilities p_y
        findParent (x_, y_)                                             // find the best x-parent for each xj (parent)
        vc_parent ()                                                    // set the value counts for the parents (vc_p)
        nu_Xy      = RTensorD.freq (x_, vc, y_, k)                      // Joint Frequency Tables (JFTs)
        nu_Xpy_    = RTensor4D.freq (x_, vc, parent, vc_p, y_, k)       // extended Joint Frequency Tables (JFTs)
        val nu_Xpy = freq_Xpy (x_, y_)                                  // extended Joint Frequency Tables (JFTs)
        p_Xpy      = cProb_Xpy (x_, y_, nu_y, nu_Xy, nu_Xpy)            // extended Conditional Probability Tables (CPTs)
//      println (s"nu_Xpy  = ${stringOf (nu_Xpy)}")
//      println (s"nu_Xpy_ = $nu_Xpy_")
    end train

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the joint frequency of xj, xp and y for all xj in X, where p is the
     *  unique x-parent of feature xj.
     *  @param x_  the training/full data/input matrix (defaults to full x)
     *  @param y_  the training/full response/output vector (defaults to full y)
     */
    def freq_Xpy (x_ : MatrixD, y_ : VectorI): Array [Array [MatrixD]] =
        val nu_Xpy = Array.ofDim [Array [MatrixD]] (x_.dim2)
        for j <- x_.indices2 do
            nu_Xpy(j) = Array.ofDim [MatrixD] (vc(j))
            val p     = parent(j)                                       // parent of xj
            val vc_p  = if p < 0 then 1 else vc(p)                      // parent value count
            for xj <- 0 until vc(j) do nu_Xpy(j)(xj) = new MatrixD (vc_p, k)
        end for
        for i <- x_.indices do updateFreq (x_, y_, i, nu_Xpy)
        nu_Xpy
    end freq_Xpy

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Increment frequency counters used in freq_Xpy calculations based on the i-th
     *  row of the data matrix.
     *  @param x_      the integer-valued data vectors stored as rows of a matrix
     *  @param y_      the class vector, where y(i) = class for row i of the matrix x, x(i)
     *  @param i       the index for current data row
     *  @param nu_Xpy  the joint frequency of X, p and y for each combination of features xj.
     */
    private def updateFreq (x_ : MatrixD, y_ : VectorI, i: Int,
                            nu_Xpy: Array [Array [MatrixD]]): Unit =
        val yi = y_(i)                                                  // class value for i-th instance/datapoint
        for j <- x_.indices2 if parent(j) > -1 do                       // for each feature/variable xj
            val p = parent(j)                                           // parent of xj
            val x_ij = x_(i, j).toInt                                   // i-th value for xj
            val x_ip = x_(i, p).toInt                                   // i-th value for xp
            nu_Xpy(j)(x_ij)(x_ip, yi) += 1                              // increment frequency for xj, xp, yi
        end for
    end updateFreq

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the conditional probability of X given p and y for all xj in X,
     *  where p is the the unique x-parent of feature xj.
     *  @param x_     the training/full data/input matrix (defaults to full x)
     *  @param y_     the training/full response/output vector (defaults to full y)
     *  @param nu_y   the class frequency of y
     *  @param nu_Xy  the joint frequency of X and y for all xj in X
     *  @param nu_Xpy the joint frequency of X, p and y for all xj in X
     */
    def cProb_Xpy (x_ : MatrixD, y_ : VectorI, nu_y: VectorD, nu_Xy: RTensorD,
                   nu_Xpy: Array [Array [MatrixD]]): Array [Array [MatrixD]] =
        val p_Xpy = Array.ofDim [Array [MatrixD]] (x_.dim2)
        for j <- x_.indices2 do
            p_Xpy(j) = Array.ofDim [MatrixD] (vc(j))
            val p    = parent(j)                                        // parent of xj
            val vc_p = if p < 0 then 1 else vc(p)                       // parent value count
            for xj <- 0 until vc(j) do
                p_Xpy(j)(xj) = new MatrixD (vc_p, k)
                for xp <- 0 until vc_p do
                    val d: VectorD = if p > -1 then nu_Xy(p)(xp) else nu_y
                    p_Xpy(j)(xj)(xp) = (nu_Xpy(j)(xj)(xp) + me_v(j)) / (d + me)
        end for
        p_Xpy
    end cProb_Xpy

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Test the predictive model y_ = f(x_) + e and return its predictions and QoF vector.
     *  Testing may be in-sample (on the full dataset) or out-of-sample (on the testing set)
     *  as determined by the parameters passed in.
     *  Note: must call train before test.
     *  @param x_  the testing/full data/input matrix (defaults to full x)
     *  @param y_  the testing/full response/output vector (defaults to full y)
     */
    def test (x_ : MatrixD = x, y_ : VectorI = y): (VectorI, VectorD) =
        val yp  = predictI (x_)                                         // predicted classes
        val qof = diagnose (y_.toDouble, yp.toDouble)                   // diagnose from actual and predicted
//      debug ("test", s" yp = $yp \n qof = $qof")
        (yp, qof)
    end test

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Predict the integer value of y = f(z) by computing the product of the class
     *  probabilities p_y and all the conditional probabilities P(X_j = z_j | X_p = z_p, y = c)
     *  and returning the class with the highest relative probability.
     *  Note, p_yz from `Classifier` holds the relative probabilities of y given z.
     *  @param z  the new vector to predict
     */
    override def predictI (z: VectorI): Int =
        p_yz = p_y.copy                                                 // start with class (prior) probabilities
        for j <- z.indices do                                           // P(X_j = z_j | X_p = z_p, y = c)
            val p    = parent(j)                                        // parent of xj
            val ecpt = p_Xpy(j)                                         // get j-th extended CPT
            if p > -1 then                                              // xj has a parent
                p_yz *= ecpt (z(j))(z(p))                               // multiply in its v = (z(j), z(p)) row
            else                                                        // xj does not have a parent
                p_yz *= ecpt (z(j))(0)                                  // multiply in its v = z(j) row
            end if
        end for
//      debug ("predictI", s"p_yz = $p_yz")
        p_yz.argmax ()                                                  // return class with highest probability
    end predictI

    inline def predictI (z: VectorD): Int = predictI (z.toInt)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Predict the integer value of y = f(z) by computing the product of the class
     *  probabilities p_y and all the conditional probabilities P(X_j = z_j | X_p = z_p, y = c)
     *  and returning the class with the highest relative probability.
     *  This method adds "positive log probabilities" to avoids underflow.
     *  Note, p_yz from `Classifier` holds the relative probabilities of y given z.
     *  To recover q relative probability compute 2^(-q) where q is a plog.
     *  @param z  the new vector to predict
     */
    override def lpredictI (z: VectorI): Int =
        p_yz = plog (p_y)                                               // start with class (prior) plogs
        for j <- z.indices do                                           // P(X_j = z_j | X_p = z_p, y = c)
            val p    = parent(j)                                        // parent of xj
            val ecpt = p_Xpy(j)                                         // get j-th extended CPT
            if p > -1 then                                              // xj has a parent
                p_yz += plog (ecpt (z(j))(z(p)))                        // multiply in its v = (z(j), z(p)) row
            else                                                        // xj does not have a parant
                p_yz += plog (ecpt (z(j))(0))                           // multiply in its v = z(j) row
            end if
        end for
        debug ("lpredictI", s"p_yz = $p_yz")
        p_yz.argmin ()                                                  // return class with lowest positive log probability
    end lpredictI

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Print the extended Joint Frequenct Tables (eJFTs) by iterating over
     *  the features/variables.
     */
    def printJFTs (): Unit =
        for j <- x.indices2 do
            val pj = parent(j)
            if pj < 0 then
                println (s"regular JFT for xj [ x$j, y ]: nu_Xy(j) = ${nu_Xy(j)}")            // regular JFT
            else
                println (s"extended JFT for xj [ x$j, x$pj, y ]: nu_Xpy(j) = ${nu_Xpy_(j)}")   // extended JFT
        end for
    end printJFTs

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Print the extended Conditional Probability Tables (eCPTs) by iterating over
     *  the features/variables.
     */
    def printCPTs (): Unit =
        for j <- x.indices2 do
            val pj = parent(j)
            if pj < 0 then
                println (s"regular CPT for xj: P(x$j | y) = ${stringOf (p_Xpy(j))}")
            else
                println (s"extended CPT for xj: P(x$j | z$pj, y) = ${stringOf (p_Xpy(j))}")
        end for
    end printCPTs

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Produce a QoF summary for a model with diagnostics for each predictor x_0, x_1,
     *  and the overall Quality of Fit (QoF).
     *  @param x_      the testing/full data/input matrix
     *  @param fname_  the array of feature/variable names
     *  @param b_      the parameters/coefficients for the model
     *  @param vifs    the Variance Inflation Factors (VIFs)
     */
    override def summary (x_ : MatrixD = null, fname_ : Array [String] = null,
                          b_ : VectorD = p_y, vifs: VectorD = null): String =
        super.summary (x_, fname_, b_, vifs)                            // summary from `Fit`
    end summary

end TANBayes


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** `TANBayes` is the companion object for the `TANBayes` class.
 */
object TANBayes:

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Create a `TANBayes` object, passing x and y together in one matrix.
     *  @param xy      the combined data-response matrix
     *  @param fname   the names of the features/variables
     *  @param k       the number of classes
     *  @param cname   the names of the classes
     *  @param vc      the value count (number of distinct values) for each feature
     *  @param hparam  the hyper-parameters
     *  @param col     the designated response column (defaults to the last column)
     */
    def apply (xy: MatrixI, fname: Array [String] = null, k: Int = 2,
               cname: Array [String] = Array ("No", "Yes"), vc: VectorI = null,
               hparam: HyperParameter = NaiveBayes.hp)
              (col: Int = xy.dim2 - 1): TANBayes =
        val (x, y) = (xy.not(?, col), xy(?, col).toInt)                 // data matrix, response vector
        new TANBayes (x, y, fname, k, cname, vc, hparam)
    end apply

end TANBayes


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `tANBayesTest` main function is used to test the `TANBayes` class on the
 *  Play Tennis example problem.
 *  > runMain scalation.modeling.classifying.tANBayesTest
 */
@main def tANBayesTest (): Unit =

    import Example_PlayTennis._

    banner ("Play Tennis Example")
    println (s"xy = $xy")                                           // combined data matrix [ x | y ]

    val mod = TANBayes (xy, fname)()                                // create a classifier
    mod.trainNtest ()()                                             // train and test the classifier
    banner ("Joint Frequency Tables:")
    mod.printJFTs ()                                                // print Joint Frequency Tables (JFTs)
    banner ("Conditional Probability Tables:")
    mod.printCPTs ()                                                // print Conditional Probability Tables (CPTs)
    println (mod.summary ())                                        // summary statistics

    val z = VectorI (2, 2, 1, 1)                                    // new data vector to classify
    banner (s"Classify $z")
    println (s"Use mod to classify ($z)  = ${mod.classify (z)}")    // based on highest relative probability
    println (s"Use mod to lclassify ($z) = ${mod.lclassify (z)}")   // add positive log probabilities

    banner ("Validation")
    println ("mod test accu = " + mod.validate ()())                // out-of-sample testing

/*  too few instances for cross-validation
    banner ("Cross-validation")
    FitM.showQofStatTable (mod.crossValidate ())                    // 5-fold cross-validation (14 instances typically too few)
*/

end tANBayesTest


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `tANBayesTest2` main function is used to test the `TANBayes` class.
 *  Classify whether a car is more likely to be stolen (1) or not (1).
 *  @see www.inf.u-szeged.hu/~ormandi/ai2/06-tANBayes-example.pdf
 *  > runMain scalation.modeling.classiying.tANBayesTest2
 */
@main def tANBayesTest2 (): Unit =

    // x0: Color:   Red (1), Yellow (0)
    // x1: Type:    SUV (1), Sports (0)
    // x2: Origin:  Domestic (1), Imported (0)
    // x3: Mpg:     High (1), Low (0)
    // features:             x0 x1 x2 x3
    val x = MatrixI ((10, 4), 1, 0, 1, 1,                           // data matrix
                              1, 0, 1, 0,
                              1, 0, 1, 1,
                              0, 0, 1, 1,
                              0, 0, 0, 1,
                              0, 1, 0, 0,
                              0, 1, 0, 0,
                              0, 1, 1, 1,
                              1, 1, 0, 0,
                              1, 0, 0, 0)

    val y  = VectorI (1, 0, 1, 0, 1, 0, 1, 0, 0, 1)                 // classification vector: 0(No), 1(Yes))
    val fname = Array ("Color", "Type", "Origin", "Mpg")            // feature/variable names
    val cname = Array ("No", "Yes")                                 // class names

    banner ("Stolen Car Example")
    println (s"x = $x")

    val mod = new TANBayes (x, y, fname, 2, cname)                  // create the classifier
    mod.trainNtest ()()                                             // train and test the classifier
    mod.printCPTs ()                                                // print the conditional probability tables
    println (mod.summary ())                                        // summary statistics

    val z1 = VectorI (1, 0, 1, 1)                                   // existing data vector to classify
    val z2 = VectorI (1, 1, 1, 0)                                   // new data vector to classify
    println (s"Use mod to classify ($z1) = ${mod.classify (z1)}")
    println (s"Use mod to classify ($z2) = ${mod.classify (z2)}")

end tANBayesTest2


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `tANBayesTest3` main function is used to test the `TANBayes` class.
 *  Given whether a person is Fast and/or Strong, classify them as making C = 1
 *  or not making C = 0 the football team.
 *  > runMain scalation.modeling.classiying.tANBayesTest3
 */
@main def tANBayesTest3 (): Unit =

    // x0: Fast
    // x1: Strong
    // y:  Classification (No/0, Yes/1)
    // features:              x0 x1  y
    val xy = MatrixI ((10, 3), 1, 1, 1,
                               1, 1, 1,
                               1, 0, 1,
                               1, 0, 1,
                               1, 0, 0,
                               0, 1, 0,
                               0, 1, 0,
                               0, 1, 1,
                               0, 0, 0,
                               0, 0, 0)

    val fname = Array ("Fast", "Strong")                            // feature names
    val cname = Array ("No", "Yes")                                 // class names

    banner ("Football Team  Example")
    println (s"xy = $xy")

    val mod = TANBayes  (xy, fname, 2, cname)()                     // create the classifier
    mod.trainNtest ()()                                             // train and test the classifier
    mod.printCPTs ()                                                // print the conditional probability tables
    println (mod.summary ())                                        // summary statistics

    val z = VectorI (1, 0)                                          // new data vector to classify
    println (s"Use mod  to classify ($z) = ${mod.classify (z)}")

end tANBayesTest3


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `tANBayesTest4` main function is used to test the `TANBayes` class.
 *  @see archive.ics.uci.edu/ml/datasets/Lenses
 *  @see docs.roguewave.com/imsl/java/7.3/manual/api/com/imsl/datamining/TANBayesClassifierEx2.html
 *  > runMain scalation.modeling.classiying.tANBayesTest4
 */
@main def tANBayesTest4 (): Unit =

    // y:  Classification (1): hard contact lenses, (2) soft contact lenses, (3) no contact lenses
    // x0. Age of the patient: (1) young, (2) pre-presbyopic, (3) presbyopic
    // x1. Spectacle prescription:  (1) myope, (2) hypermetrope
    // x2. Astigmatic:     (1) no, (2) yes
    // x3. Tear production rate:  (1) reduced, (2) normal
    // features:              x0  x1  x2  x3   y
    val xy = MatrixI ((24, 5), 1,  1,  1,  1,  3,           // 1
                               1,  1,  1,  2,  2,           // 2
                               1,  1,  2,  1,  3,           // 3
                               1,  1,  2,  2,  1,           // 4
                               1,  2,  1,  1,  3,           // 5
                               1,  2,  1,  2,  2,           // 6
                               1,  2,  2,  1,  3,           // 7
                               1,  2,  2,  2,  1,           // 8
                               2,  1,  1,  1,  3,           // 9
                               2,  1,  1,  2,  2,           // 10
                               2,  1,  2,  1,  3,           // 11
                               2,  1,  2,  2,  1,           // 12
                               2,  2,  1,  1,  3,           // 13
                               2,  2,  1,  2,  2,           // 14
                               2,  2,  2,  1,  3,           // 15
                               2,  2,  2,  2,  3,           // 16
                               3,  1,  1,  1,  3,           // 17
                               3,  1,  1,  2,  3,           // 18
                               3,  1,  2,  1,  3,           // 19
                               3,  1,  2,  2,  1,           // 20
                               3,  2,  1,  1,  3,           // 21
                               3,  2,  1,  2,  2,           // 22
                               3,  2,  2,  1,  3,           // 23
                               3,  2,  2,  2,  3)           // 24

    xy -= 1                                                         // shift values to start at 0

    val fname = Array ("Age", "Spectacle", "Astigmatic", "Tear")    // feature names
    val cname = Array ("Hard", "Soft", "Neither")                   // class names

    banner ("Contact Leans Example")
    println (s"xy = $xy")

    val mod = TANBayes (xy, fname, 3, cname)()                      // create the classifier
    mod.trainNtest ()()                                             // train and test the classifier
    mod.printCPTs ()                                                // print the conditional probability tables
    println (mod.summary ())                                        // summary statistics

    for i <- xy.indices2 do
        val z   = xy(i).not(4).toInt                                // x-values
        val y   = xy(i, 4).toInt                                    // y-value
        val yp  = mod.classify (z)                                  // y predicted
        println (s"Use mod : yp = classify ($z) = $yp,\t y = $y,\t ${cname(y)}")
    end for

end tANBayesTest4