//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller, Hao Peng
 *  @version 2.0
 *  @date    Sat Sep  8 13:53:16 EDT 2012
 *  @see     LICENSE (MIT style license file).
 *
 *  @note    Model: Integer-Based Naive Bayes Classifier
 *
 *  @see eric.univ-lyon2.fr/~ricco/tanagra/fichiers/en_Tanagra_Naive_Bayes_Classifier_Explained.pdf
 */

package scalation
package modeling
package classifying

import scalation.mathstat._

import Classifier.{shift2zero, vc_fromData}
import Probability.plog

//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `NaiveBayes` class implements an Integer-Based Naive Bayes Classifier,
 *  which is a commonly used such classifier for discrete input data.  The
 *  classifier is trained using a data matrix x and a classification vector y.
 *  Each data vector in the matrix is classified into one of k classes numbered
 *  0, ..., k-1.  Prior probabilities are calculated based on the population of
 *  each class in the training-set.  Relative posterior probabilities are computed
 *  by multiplying these by values computed using conditional probabilities.
 *  stored in Conditional Probability Tables (CPTs).
 *------------------------------------------------------------------------------
 *  The classifier is naive, because it assumes variable/feature independence and
 *  therefore simply multiplies the conditional probabilities.
 *  @param x       the input/data m-by-n matrix with instances stored in rows
 *  @param y       the response/classification m-vector, where y_i = class for row i of matrix x
 *  @param fname_  the name for each feature/variable xj
 *  @param k       the number of classes
 *  @param cname_  the name for each class
 *  @param vc      the value count (number of distinct values) for each feature/variable xj
 *  @param hparam  the hyper-parameters
 */
class NaiveBayes (x: MatrixD, y: VectorI, fname_ : Array [String] = null, k: Int = 2,
                  cname_ : Array [String] = Array ("No", "Yes"), private var vc: VectorI = null,
                  hparam: HyperParameter = NaiveBayes.hp)
      extends Classifier (x, y, fname_, k, cname_, hparam)
         with FitC (k):

    private val debug = debugf ("NaiveBayes", true)                      // debug function

    modelName = "NaiveBayes"                                             // name of the model

    if vc == null then
        shift2zero (x); vc = vc_fromData (x)                             // set value counts from data
    end if

    private val me   = hparam("me").toDouble                             // m-estimates (me == 0 => regular MLE estimates)
    private val me_v = NaiveBayes.me_vc (me, vc)                         // for Laplace smoothing: me / vc_j for all j
    private var p_Xy: RTensorD = null                                    // Conditional Probability Tables (CPTs) one per feature

    debug ("init", s"distinct value count vc = $vc")

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Get the Conditional Probability Tables (CPTs) stored in tensor p_Xy.
     *  Must call train first to get values for p_Xy.
     */
    def getCPTs: RTensorD = p_Xy

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Train a classification model y_ = f(x_) + e where x_ is the data/input
     *  matrix and y_ is the response/output vector.  These arguments default
     *  to the full dataset x and y, but may be restricted to a training set
     *  Training involves estimating the model parameters or pmf.
     *  Train the classifier by computing the class probabilities for y, and
     *  the conditional probabilities for each x_j given y.
     *  @param x_  the training/full data/input matrix (defaults to full x)
     *  @param y_  the training/full response/output vector (defaults to full y)
     */
    override def train (x_ : MatrixD = x, y_ : VectorI = y): Unit =
        super.train (x_, y_)                                             // set class frequencies nu_y and probabilities p_y
        val nu_Xy = RTensorD.freq (x_, vc, y, k)                         // Joint Frequency Tables (JFTs)
        p_Xy      = cProb_Xy (x_, y_, nu_Xy)                             // Conditional Probability Tables (CPTs)
    end train

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the conditional probability of X given y for each feature xj.
     *  @param x_     the integer-valued data vectors stored as rows of a matrix
     *  @param y_     the class vector, where y(i) = class for row i of the matrix x, x(i)
     *  @param nu_Xy  the joint frequency of X and y for each feature xj and class value
     */
    def cProb_Xy (x_ : MatrixD, y_ : VectorI, nu_Xy: RTensorD): RTensorD =
        val pXy = new RTensorD (x_.dim2, vc, k)
        for j <- x_.indices2; xj <- 0 until vc(j) do
            pXy(j, xj) = (nu_Xy(j, xj) + me_v(j)) / (nu_y + me)          // Conditional Probability Tables (CPTs)
        end for
        pXy
    end cProb_Xy

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Test the predictive model y_ = f(x_) + e and return its predictions and QoF vector.
     *  Testing may be in-sample (on the full dataset) or out-of-sample (on the testing set)
     *  as determined by the parameters passed in.
     *  Note: must call train before test.
     *  @param x_  the testing/full data/input matrix (defaults to full x)
     *  @param y_  the testing/full response/output vector (defaults to full y)
     */
    def test (x_ : MatrixD = x, y_ : VectorI = y): (VectorI, VectorD) =
        val yp  = predictI (x_)                                          // predicted classes
        val qof = diagnose (y_.toDouble, yp.toDouble)                    // diagnose from actual and predicted
//      debug ("test", s" yp = $yp \n qof = $qof")
        (yp, qof)
    end test

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Predict the integer value of y = f(z) by computing the product of the class
     *  probabilities p_y and all the conditional probabilities P(X_j = z_j | y = c)
     *  and returning the class with the highest relative probability.
     *  Note, p_yz from `Classifier` holds the relative probabilities of y given z.
     *  @param z  the new vector to predict
     */
    override def predictI (z: VectorI): Int =
        p_yz = p_y.copy                                                  // start with class (prior) probabilities
        for j <- z.indices do p_yz *= p_Xy(j, z(j))                      // multiply P(X_j = z_j | y = c)
        p_yz.argmax ()                                                   // return class with highest probability
    end predictI

    inline def predictI (z: VectorD): Int = predictI (z.toInt)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Predict the integer value of y = f(z) by computing the product of the class
     *  probabilities p_y and all the conditional probabilities P(X_j = z_j | y = c)
     *  and returning the class with the highest relative probability.
     *  This method adds "positive log probabilities" to avoids underflow.
     *  Note, p_yz from `Classifier` holds the relative probabilities of y given z.
     *  To recover q relative probability compute 2^(-q) where q is a plog.
     *  @param z  the new vector to predict
     */
    override def lpredictI (z: VectorI): Int =
        p_yz = plog (p_y)                                                // start with class (prior) probabilities
        for j <- z.indices do p_yz += plog (p_Xy(j, z(j)))               // add plog P(X_j = z_j | y = c)
        p_yz.argmin ()                                                   // return class with lowest positive log probability
    end lpredictI

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Produce a QoF summary for a model with diagnostics for each predictor x_0, x_1,
     *  and the overall Quality of Fit (QoF).
     *  @param x_      the testing/full data/input matrix
     *  @param fname_  the array of feature/variable names
     *  @param b_      the parameters/coefficients for the model
     *  @param vifs    the Variance Inflation Factors (VIFs)
     */
    override def summary (x_ : MatrixD = null, fname_ : Array [String] = null,
                          b_ : VectorD = p_y, vifs: VectorD = null): String =
        super.summary (x_, fname_, b_, vifs)                             // summary from `Fit`
    end summary

end NaiveBayes


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** `NaiveBayes` is the companion object for the `NaiveBayes` class.
 */
object NaiveBayes:

    val hp = new HyperParameter ()
    hp += ("me", 0.1, 0.1)                                               // Laplace smoothing

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the contribution to the fake instances for each each feature xj
     *  based on its value count.  Used for Laplace smoothing.
     *  @param me  the number/fraction of fake instances used for Laplace smooth
     *  @param vc  the value count vector
     */
    def me_vc (me: Double, vc: VectorI): VectorD = VectorD (vc.map (me / _))         // me / vc_j for all j

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Create a `NaiveBayes` object, passing x and y together in one matrix.
     *  @param xy      the combined data-response matrix
     *  @param fname   the names of the features/variables
     *  @param k       the number of classes
     *  @param cname   the names of the classes
     *  @param vc      the value count (number of distinct values) for each feature
     *  @param hparam  the hyper-parameters
     *  @param col     the designated response column (defaults to the last column)
     */
    def apply (xy: MatrixI, fname: Array [String] = null, k: Int = 2,
               cname: Array [String] = Array ("No", "Yes"), vc: VectorI = null,
               hparam: HyperParameter = NaiveBayes.hp)
              (col: Int = xy.dim2 - 1): NaiveBayes =
        val (x, y) = (xy.not(?, col), xy(?, col).toInt)                  // data matrix, response vector
        new NaiveBayes (x, y, fname, k, cname, vc, hparam)
    end apply

end NaiveBayes


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `naiveBayesTest` main function is used to test the `NaiveBayes` class.
 *  > runMain scalation.modeling.classifying.naiveBayesTest
 */
@main def naiveBayesTest (): Unit =

    import Example_PlayTennis._

    banner ("Play Tennis Example")
    println (s"xy = $xy")                                           // combined data matrix [ x | y ]

    val mod = NaiveBayes (xy, fname)()                              // create a classifier
    mod.trainNtest ()()                                             // train and test the classifier
    println ("CPTs = " + mod.getCPTs)                               // print the conditional probability tables
    println (mod.summary ())                                        // summary statistics

    val z = VectorI (2, 2, 1, 1)                                    // new data vector to classify
    banner (s"Classify $z")
    println (s"Use mod to classify ($z)  = ${mod.classify (z)}")    // based on highest relative probability
    println (s"Use mod to lclassify ($z) = ${mod.lclassify (z)}")   // add positive log probabilities

    banner ("Validation")
    println ("mod test accu = " + mod.validate ()())                // out-of-sample testing

/* Not enough instances for cross-validation
    banner ("Cross-validation")
    FitM.showQofStatTable (mod.crossValidate ())                    // 5-fold cross-validation (14 instances typically too few)
*/

end naiveBayesTest


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `naiveBayesTest2` main function is used to test the `NaiveBayes` class.
 *  Classify whether a car is more likely to be stolen (1) or not (1).
 *  @see www.inf.u-szeged.hu/~ormandi/ai2/06-naiveBayes-example.pdf
 *  > runMain scalation.modeling.classiying.naiveBayesTest2
 */
@main def naiveBayesTest2 (): Unit =

    // x0: Color:   Red (1), Yellow (0)
    // x1: Type:    SUV (1), Sports (0)
    // x2: Origin:  Domestic (1), Imported (0)
    // x3: Mpg:     High (1), Low (0)
    // features:             x0 x1 x2 x3
    val x = MatrixI ((10, 4), 1, 0, 1, 1,                           // data matrix
                              1, 0, 1, 0,
                              1, 0, 1, 1,
                              0, 0, 1, 1,
                              0, 0, 0, 1,
                              0, 1, 0, 0,
                              0, 1, 0, 0,
                              0, 1, 1, 1,
                              1, 1, 0, 0,
                              1, 0, 0, 0)

    val y  = VectorI (1, 0, 1, 0, 1, 0, 1, 0, 0, 1)                 // classification vector: 0(No), 1(Yes))
    val fname = Array ("Color", "Type", "Origin", "Mpg")            // feature/variable names
    val cname = Array ("No", "Yes")                                 // class names

    banner ("Stolen Car Example")
    println (s"x = $x")

    val mod = new NaiveBayes (x, y, fname, 2, cname)                // create the classifier
    mod.trainNtest ()()                                             // train and test the classifier
    println ("CPTs = " + mod.getCPTs)                               // print the conditional probability tables
    println (mod.summary ())                                        // summary statistics

    val z1 = VectorI (1, 0, 1, 1)                                   // existing data vector to classify
    val z2 = VectorI (1, 1, 1, 0)                                   // new data vector to classify
    println (s"Use mod to classify ($z1) = ${mod.classify (z1)}")
    println (s"Use mod to classify ($z2) = ${mod.classify (z2)}")

end naiveBayesTest2


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `naiveBayesTest3` main function is used to test the `NaiveBayes` class.
 *  Given whether a person is Fast and/or Strong, classify them as making C = 1
 *  or not making C = 0 the football team.
 *  > runMain scalation.modeling.classiying.naiveBayesTest3
 */
@main def naiveBayesTest3 (): Unit =

    // x0: Fast
    // x1: Strong
    // y:  Classification (No/0, Yes/1)
    // features:              x0 x1  y
    val xy = MatrixI ((10, 3), 1, 1, 1,
                               1, 1, 1,
                               1, 0, 1,
                               1, 0, 1,
                               1, 0, 0,
                               0, 1, 0,
                               0, 1, 0,
                               0, 1, 1,
                               0, 0, 0,
                               0, 0, 0)

    val fname = Array ("Fast", "Strong")                            // feature names
    val cname = Array ("No", "Yes")                                 // class names

    banner ("Football Team  Example")
    println (s"xy = $xy")

    val mod = NaiveBayes  (xy, fname, 2, cname)()                   // create the classifier
    mod.trainNtest ()()                                             // train and test the classifier
    println ("CPTs = " + mod.getCPTs)                               // print the conditional probability tables
    println (mod.summary ())                                        // summary statistics

    val z = VectorI (1, 0)                                          // new data vector to classify
    println (s"Use mod  to classify ($z) = ${mod.classify (z)}")

end naiveBayesTest3


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `naiveBayesTest4` main function is used to test the `NaiveBayes` class.
 *  @see archive.ics.uci.edu/ml/datasets/Lenses
 *  @see docs.roguewave.com/imsl/java/7.3/manual/api/com/imsl/datamining/NaiveBayesClassifierEx2.html
 *  > runMain scalation.modeling.classiying.naiveBayesTest4
 */
@main def naiveBayesTest4 (): Unit =

    // y:  Classification (1): hard contact lenses, (2) soft contact lenses, (3) no contact lenses
    // x0. Age of the patient: (1) young, (2) pre-presbyopic, (3) presbyopic
    // x1. Spectacle prescription:  (1) myope, (2) hypermetrope
    // x2. Astigmatic:     (1) no, (2) yes
    // x3. Tear production rate:  (1) reduced, (2) normal
    // features:              x0  x1  x2  x3   y
    val xy = MatrixI ((24, 5), 1,  1,  1,  1,  3,           // 1
                               1,  1,  1,  2,  2,           // 2
                               1,  1,  2,  1,  3,           // 3
                               1,  1,  2,  2,  1,           // 4
                               1,  2,  1,  1,  3,           // 5
                               1,  2,  1,  2,  2,           // 6
                               1,  2,  2,  1,  3,           // 7
                               1,  2,  2,  2,  1,           // 8
                               2,  1,  1,  1,  3,           // 9
                               2,  1,  1,  2,  2,           // 10
                               2,  1,  2,  1,  3,           // 11
                               2,  1,  2,  2,  1,           // 12
                               2,  2,  1,  1,  3,           // 13
                               2,  2,  1,  2,  2,           // 14
                               2,  2,  2,  1,  3,           // 15
                               2,  2,  2,  2,  3,           // 16
                               3,  1,  1,  1,  3,           // 17
                               3,  1,  1,  2,  3,           // 18
                               3,  1,  2,  1,  3,           // 19
                               3,  1,  2,  2,  1,           // 20
                               3,  2,  1,  1,  3,           // 21
                               3,  2,  1,  2,  2,           // 22
                               3,  2,  2,  1,  3,           // 23
                               3,  2,  2,  2,  3)           // 24

    xy -= 1                                                         // shift values to start at 0

    val fname = Array ("Age", "Spectacle", "Astigmatic", "Tear")    // feature names
    val cname = Array ("Hard", "Soft", "Neither")                   // class names

    banner ("Contact Leans Example")
    println (s"xy = $xy")

    val mod = NaiveBayes (xy, fname, 3, cname)()                    // create the classifier
    mod.trainNtest ()()                                             // train and test the classifier
    println ("CPTs = " + mod.getCPTs)                               // print the conditional probability tables
    println (mod.summary ())                                        // summary statistics

    for i <- xy.indices2 do
        val z   = xy(i).not(4).toInt                                // x-values
        val y   = xy(i, 4).toInt                                    // y-value
        val yp  = mod.classify (z)                                  // y predicted
        println (s"Use mod : yp = classify ($z) = $yp,\t y = $y,\t ${cname(y)}")
    end for

end naiveBayesTest4