//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  Kevin Warrick, John Miller
 *  @version 1.1
 *  @date    Wed Jan  9 15:07:13 EST 2013
 *  @see     LICENSE (MIT style license file).
 */

package scalation.analytics

import math.{ceil, floor}
import scala.collection.mutable.HashMap

import scalation.linalgebra.VectorD
import scalation.linalgebra_gen.Matrices.MatrixI
import scalation.linalgebra_gen.VectorN
import scalation.linalgebra_gen.Vectors.VectorI
import scalation.util.Error

import Probability.entropy

//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `DecisionTreeID3` class implements a Decision Tree classifier using the
 *  ID3 algorithm.  The classifier is trained using a data matrix 'x' and a
 *  classification vector 'y'.  Each data vector in the matrix is classified into
 *  one of 'k' classes numbered '0, ..., k-1'.  Each column in the matrix represents
 *  a feature (e.g., Humidity).  The 'vc' array gives the number of distinct values
 *  per feature (e.g., 2 for Humidity).
 *  @param x   the data vectors stored as rows of a matrix
 *  @param y   the class array, where y_i = class for row i of the matrix x
 *  @param fn  the names for all features/variables
 *  @param k   the number of classes
 *  @param cn  the names for all classes
 *  @param vc  the value count array indicating number of distinct values per feature
 */
class DecisionTreeID3 (x: MatrixI, y: VectorI, fn: Array [String], k: Int, cn: Array [String],
                      private var vc: VectorI = null)
      extends ClassifierInt (x, y, fn, k, cn)
{
    abstract class Node
    case class FeatureNode (f: Int, branches: HashMap [Int, Node]) extends Node
    case class LeafNode (y: Int) extends Node
    type Path = List [Tuple2 [Int, Int]]
    
    private val DEBUG    = false                  // debug flag
    private val y_prob   = new VectorD (k)        // probability that class c occurs

    if (vc == null) vc = vc_default               // set value count (vs) to default for binary data (2)
    for (i <- 0 until m) y_prob(y(i)) += 1
    y_prob /= md
    private val entropy_0 = entropy (y_prob)      // the initial entropy
    println ("the initial entropy entropy_0 = " + entropy_0)
    private var tree: Node = null

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given a feature column (e.g., 2 (Humidity)) and a value (e.g., 1 (High))
     *  use the frequency of ocurrence of the value for each classification
     *  (e.g., 0 (no), 1 (yes)) to estimate k probabilities.  Also, determine
     *  the fraction of training cases where the feature has this value
     *  (e.g., fraction where Humidity is High = 7/14).
     *  @param dset   the list of data set tuples to consider (e.g. value, row index)
     *  @param value  one of the possible values for this feature (e.g., 1 (High))
     */
    def frequency (dset: Array [Tuple2 [Int, Int]], value: Int): Tuple2 [Double, VectorD] =
    {
        val prob  = new VectorD (k)     // probability vector for a given feature and value
        var count = 0.0
        for ((i, j) <- dset if i == value) {
            count   += 1.0
            prob(j) += 1
        } // for
        (count / dset.size, prob /= count)     // return the fraction and the probability vector 
    } // frequency

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Extract column from matrix, filtering out values rows that are not on path.
     *  @param f  the feature to consider (e.g., 2 (Humidity))
     *  @param p  the path
     */
    def dataset (f: Int, path: Path): Array [Tuple2 [Int, Int]] =
    {
        val col = x.col(f).apply.zipWithIndex 
        col.filter (t => path.forall (tt => x(t._2,tt._1) == tt._2)).map (t => (t._1, y(t._2)))
    } // dataset
    
    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Find the most frequent classification.
     *  @param a  array of discrete classifications
     */ 
    def mode (a: Array [Int]): Int =
    {
        a.groupBy (i => i).map (t => (t._1, t._2.size)).maxBy (_._2)._1
    } // mode 

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the information gain due to using the values of a feature/attribute
     *  to distinguish the training cases (e.g., how well does Humidity with its
     *  values Normal and High indicate whether one will play tennis).
     *  @param f  the feature to consider (e.g., 2 (Humidity))
     *  @param p  the path
     */
    def gain (f: Int, path: Path): Double =
    {
        val dset = dataset (f, path)                  // extract values from column f indata matrix x
//      val vals = dset.map (_._1).toSet.size
        val vals = vc(f)                              // number of distinct values for feature f
        var sum  = 0.0
        for (i <- 0 until vals) {
            val (coun_fi, prob_fi) = frequency (dset, i)
            val entr_fi = entropy (prob_fi)           // entropy for feature f value i
            println ("gain from feature " + f + " for value " + i + " is " + entr_fi)
            sum += coun_fi * entr_fi
        } // for
        val igain = entropy_0 - sum                   // the drop in entropy
        println ("entropy = " + sum + ": overall gain from feature " + f + " is " + igain)
        igain                                         // return the information gain
    } // gain

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Train the decsion tree.
     */
    def train ()
    {
        tree = buildTree (List [Tuple2 [Int, Int]] ())
        println (tree)
    } // train

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Extend the tree given a path e.g. ((outlook, sunny), ...).
     *  @param path an existing path in the tree ((feature, value), ...)
     */
    def buildTree (path: Path): Node = 
    {
        val features = ((0 until x.dim2) diff path.map (_._1))
                
        var opt = (0, gain (0, path))
        for (f <- features) {
            val fgain = gain (f, path)
            println ("for feature " + f + " the gain is " + fgain)
            if (fgain > opt._2) opt = (f, fgain)
        } // for
        println ("optimal feature is " + opt._1 + " with a gain of " + opt._2)
        
        val f = opt._1
        val node = new FeatureNode(f, new HashMap[Int, Node])
        for (b <- 0 until vc(f)) {    // build subtree or leaf for each branch value
            // base case
            val dset = dataset (f, (f, b) :: path)
            if (dset.size == 0 || features.size == 0 || dset.map (_._2).toSet.size == 1) {
                node.branches += b -> new LeafNode (mode (dset.map (_._2)))
            } else {
                node.branches += b -> buildTree((f, b) :: path)
            } // if
        } // for
        node
    } // buildTree

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given a data vector z, classify it returning the class number (0, ..., k-1)
     *  by following a decision path from the root to a leaf.
     *  @param z  the data vector to classify
     */
    def classify (z: VectorI): Tuple2 [Int, String] =
    {
        var n = tree
        for (i <- 0 until z.dim) {
            n match { 
                case FeatureNode (f, branches) => n = branches (z(f))
                case LeafNode (y) => { val best = y; return (best, cn(best)) } 
            } // match
        } // for
        (-1, "?")
    } // classify

} // DecisionTreeID3 class


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** `DecisionTreeID3` is the companion object for the `DecisionTreeID3` class.
 */
object DecisionTreeID3
{
    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Create a 'DecisionTreeID3` object, passing 'x' and 'y' together in one table.
     *  @param xy  the data vectors along with their classifications stored as rows of a matrix
     *  @param fn  the names for all features/variables
     *  @param k   the number of classes
     *  @param cn  the names for all classes
     *  @param vc  the value count array indicating number of distinct values per feature
     */
    def apply (xy: MatrixI, fn: Array [String], k: Int, cn: Array [String], vc: VectorI = null) =
    {
        new DecisionTreeID3 (xy(0 until xy.dim1, 0 until xy.dim2-1), xy.col(xy.dim2-1), fn, k, cn, vc)
    } // apply

} // DecisionTreeID3 object


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `DecisionTreeID3Test` object is used to test the `DecisionTreeID3` class.
 *  Ex: Classify (No/Yes) whether a person will play tennis based on the measured
 *  features.
 *  @see http://www.cise.ufl.edu/~ddd/cap6635/Fall-97/Short-papers/2.htm
 */
object DecisionTreeID3Test extends App
{
    // training-set -----------------------------------------------------------
    // Outlook:     Rain (0), Overcast (1), Sunny (2)
    // Temperature: Cold (0), Mild (1), Hot (2)
    // Humidity:    Normal (0), High (1)
    // Wind:        Weak (0), Strong (1)
    // features:                Outlook Temp Humidity Wind
    val x  = new MatrixI ((14, 4),  2,     2,     1,     0,       // day  1 - data matrix
                                    2,     2,     1,     1,       // day  2
                                    1,     2,     1,     0,       // day  3
                                    0,     1,     1,     0,       // day  4
                                    0,     0,     0,     0,       // day  5
                                    0,     0,     0,     1,       // day  6
                                    1,     0,     0,     1,       // day  7
                                    2,     1,     1,     0,       // day  8
                                    2,     0,     0,     0,       // day  9
                                    0,     1,     0,     0,       // day 10
                                    2,     1,     0,     1,       // day 11
                                    1,     1,     1,     1,       // day 12
                                    1,     2,     0,     0,       // day 13
                                    0,     1,     1,     1)       // day 14
    // day:           1  2  3  4  5  6  7  8  9 10 11 12 13 14
    val y  = VectorN (0, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0)   // classification vector: 0(No), 1(Yes))
    val vc = VectorN (3, 3, 2, 2)                                 // distinct values for each feature
    val fn = Array ("Outlook", "Temp", "Humidity", "Wind")
    println ("x  = " + x)
    println ("y  = " + y)
    println ("vc = " + vc)
    println ("---------------------------------------------------------------")

    // train the classifier ---------------------------------------------------
    val id3 = new DecisionTreeID3 (x, y, fn, 2, null, vc)        // create the classifier            
    id3.train ()

    val z = VectorN (2, 2, 1, 1)                                 // new data vector to classify
    println ("classify (" + z + ") = " + id3.classify (z))

} // DecisionTreeID3Test object