//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  Dong Yu Yu, John Miller
 *  @version 1.4
 *  @date    Fri Jan  5 16:54:27 EST 2018
 *  @see     LICENSE (MIT style license file).
 */

package scalation.analytics.classifier

import scala.util.Random

import scalation.linalgebra.{MatrixD,VectorI,VectorD,VectoD}
import scalation.random.RandomVecI
import scalation.util.Error

//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `RandomForest` class introduced randomness for building trees in classification.
 *  FIX - explain what is is in more detail
 *  @param x   the features part of samples
 *  @param y   the class labels of samples
 *  @param nF  the number of trees
 *  @param bR  bagging ratio (the portion of samples used in building trees)
 *  @param fS  the number of features used in building trees
 *  @param k   the number of classes in samples
 *  @param s   seed for randomness
 *  @param fn  feature names (array of string)
 *  @param cn  class names (array of string)
 */
class RandomForest (x: MatrixD, y: VectorI, nF: Int, bR: Double, fS: Int, k: Int, s: Int, var fn: Array [String],
                    var cn: Array [String])
        extends ClassifierReal (x, y, fn, k , cn) with Error
{
    private val DEBUG  = false
    private val xy     = x.:^+(y.toDouble)
    private val random = new Random (s)
    private val forest = Array.ofDim [DecisionTreeC45] (nF)

    if (nF <= 0) flaw ("constructor", "RF number of tree restrcited to be positive integer ")
    if (bR < 0 || bR > 1) flaw ("constructor", "RF bagging ratio restricted to 0 thru 1")
    if (fS < 0 || fS > x.dim2) flaw ("constructor", "RF feature size restricted to 0 thru number of features")

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Create a 'subSample' (size = baggingRatio * orginal sample size) from the samples,
     *  returning the 'subSample'.
     */
    def createSubsample (): MatrixD =
    {
        val stream     = random.nextInt ().abs.%(1000)
        val sampleSize = (xy.dim1 * bR).toInt
        val rvv        = RandomVecI (min = 0, max = xy.dim1-1, dim = sampleSize, unique = false, stream = stream)
        val subSample  = new MatrixD (sampleSize, xy.dim2)
        val index      = rvv.igen
        for (i <- 0 until sampleSize) subSample.set (i, xy(index(i)))
        subSample
    } // createSubsample

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Select 'subFeatures' for input of building Trees, return the 'subFeatures'
     *  and the selected features 'index'.
     *  @param subSample  the sub-sample to select from
     */
    def selectSubFeatures (subSample: MatrixD): (MatrixD, VectorI) =
    {
        val rvv         = RandomVecI (min = 0, max = subSample.dim2-1, dim = fS, unique = true)
        val index       = rvv.igen
        val subFeatures = subSample.selectCols (index.toArray)
        (subFeatures, index)
    } // selectSubFeatures

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Build the trees of Forest by first selecting the subSamples, then decide
     *  which features used in spliting, then build trees.
     *  @param testStart  the beginning of test region (inclusive).
     *  @param testEnd    the end of test region (exclusive).
     */
    def train (testStart:Int, testEnd:Int)
    {
        println ("=== Start Training ===")
        val isC = Array.fill (x.dim2)(true)
        val vc  = VectorI.fill (x.dim2)(2)
        for (i <- 0 until nF ){
            val temp    = createSubsample ()
            val feature = temp.selectCols (Range (0, temp.dim2-1).toArray)
            val selectTarget = temp.col (temp.dim2-1).toInt
            forest(i) = new DecisionTreeC45 (feature, selectTarget, fn, isCont = isC, k = k, cn = cn, vc = vc)
            forest(i).train (selectSubFeatures (x)._2)
        } // for
        println ("=== Training Completed ===")
    } // train

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Classify the vector 'z' by voting within randomized trees, returning the class index,
     *  class name and the probability (not used in Random Forest, always -1.0)
     *  @param z  the vetcotr to be classified
     */
    def classify (z: VectoD): (Int, String, Double) =
    {
        var result = new VectorI (k)
        for (i <- 0 until nF) result(forest(i).classify (z)._1) += 1
        (result.argmax (), cn(result.argmax ()), -1.0)
    } // classify

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Reset the frequency and probability tables (not used here).
     */
    def reset() {}

} // RandomForest class


//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `RandomForestTest` object is used to test the `RandomForest` class.
 *  It tests a simple case that does not require a file to be read.
 *  > runMain scalation.analytics.classifier.RandomForestTest
 */
object RandomForestTest extends App
{
    // FIX - add simple test case

} // RandomForestTest object


//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `RandomForestTest2` object is used to test the `RandomForest` class.
 *  It tests the Random Forest classifier using well-known WineQuality Dataset.
 *  > runMain scalation.analytics.classifier.RandomForestTest2
 */
object RandomForestTest2 extends App
{
    val file        = BASE_DIR + "winequality-white.csv"
    val numbClasses = 7

    println ("RandomForestTest2: Loading WineQuality Dataset")
    val data   = MatrixD (file)
    val target = data.col (data.dim2-1).-=(3)            // regularize the class labels

    val fn = (for (i <- 0 until data.dim2-1) yield s"feature$i").toArray
    val cn = (for (i <- 0 until numbClasses) yield s"class$i").toArray
    val rF = new RandomForest (data.selectCols (Range (0,data.dim2-1).toArray), target.toInt,
                               nF = 3, bR = 0.7, fS = 7, k = numbClasses, s = 223, fn = fn, cn = cn)
    rF.train ()
    println (s"Accuracy = ${rF.test (0, data.dim1)}")

} // RandomForestTest2 object


//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `RandomForestTest3` object is used to test the `RandomForest` class.
 *  It tests the Random Forest classifier by specific numbers of trees.
 *  > runMain scalation.analytics.classifier.RandomForestTest3
 */
object RandomForestTest3 extends App
{
    val file        = BASE_DIR + "winequality-white.csv"
    val numbClasses = 7
    val maxTrees    = 3

    println ("RandomForestTest3: Loading WineQuality Dataset")
    val data   = MatrixD (file)
    val target = data.col (data.dim2-1).-=(3)            // regularize the class labels

    val fn = (for (i <- 0 until data.dim2-1) yield s"feature$i").toArray
    val cn = (for (i <- 0 until numbClasses) yield s"class$i").toArray

    for (numTrees <- 1 to maxTrees) {
        val rF = new RandomForest (data.selectCols (Range (0, data.dim2 - 1).toArray), target.toInt,
                                   nF = numTrees, bR = 0.7, fS = 7, k = numbClasses, s = 223, fn = fn, cn = cn)
        rF.train ()
        println (s"Number of Tree = $numTrees, Accuracy = ${rF.test (0,data.dim1)}")
    } // for

} // RandomForestTest3 object


//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `RandomForestTest4` object is used to test the `RandomForest` class.
 *  It tests RF using unseen data
 *  > runMain scalation.analytics.classifier.RandomForestTest4
 *  FIX - clean up this code
 */
object RandomForestTest4 extends App
{
    /*Loading the Dataset*/
    val a = new Array[String](3)
    println("Loading WineQuality Dataset")
    val file = BASE_DIR+"winequality-white.csv"
    val data=MatrixD(file)
    val target =data.col(data.dim2-1).-=(3) //regulized the class labels
    data.setCol(data.dim2-1, target)
    var i = 0

    /*Divide samples into training and testing dataset */
    val trainSize = (data.dim1 * 0.8).toInt
    val rvv = RandomVecI ( min=0, max=data.dim1-1, dim =trainSize, unique=true,stream=223 )
    val subSample = new MatrixD (trainSize, data.dim2)
    val elseSample = new MatrixD (data.dim1-trainSize, data.dim2)
    val index = rvv.igen
    var trainCount = 0; var elseCount = 0;
    for ( i <- 0 until data.dim1) {
        if (index.contains(i)) {
            subSample.set(trainCount, data.apply(i))
            trainCount += 1
        }//if
        else {
            elseSample.set(elseCount, data.apply(i))
            elseCount += 1
        }//else
    }//for
val elseFeature = elseSample.selectCols(Range(0, elseSample.dim2-1).toArray)
    val elseTarget = elseSample.col(elseSample.dim2-1)

    /* Starting training Forest */
    var ran=3
    var fn = new Array[String](data.dim2-1)
    val numbClasses = 7
    val cn = new Array[String](numbClasses)
    for (i <- 0 until data.dim2-1) fn(i)=s"feature$i"
    for (i <-0 until numbClasses) cn(i)=s"class$i"
    val rF = new RandomForest(subSample.selectCols(Range(0, data.dim2 - 1).toArray), subSample.col(subSample.dim2 - 1).toInt, nF = 5, bR = 0.64, fS = 7, k = numbClasses, s = ran, fn=fn, cn=cn)
    rF.train()
    var accurateCount = 0.0

    for (i <- 0 until elseFeature.dim1) {
        val d =rF.classify(elseFeature(i))._1
        if (rF.classify(elseFeature(i))._1 == elseTarget(i)) accurateCount += 1
    }
    val accuracy = accurateCount / elseFeature.dim1
    println(s"Testing Accuracy =$accuracy")

} // RandomForestTest4 object