//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller
 *  @version 1.3
 *  @date    Sun Sep 23 21:14:14 EDT 2012
 *  @see     LICENSE (MIT style license file).
 */

package scalation.analytics.classifier

import scalation.linalgebra.{VectoD, VectoI, VectorI}
import scalation.random.PermutedVecI
import scalation.random.RNGStream.ranStream

//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `Classifier` trait provides a common framework for several classifiers.
 *  A classifier is for bounded responses.  When the number of distinct responses
 *  cannot be bounded by some integer 'k', a predictor should be used.
 */
trait Classifier
{
    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given a set of data vectors and their classifications, build a classifier.
     *  @param testStart  the beginning of test region (inclusive).
     *  @param testEnd    the end of test region (exclusive).
     */
    def train (testStart: Int, testEnd: Int)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given a set of data vectors and their classifications, build a classifier.
     *  @param itest  the indices of the instances considered as testing data
     */
    def train (itest: IndexedSeq [Int]) {}

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given a set of data vectors and their classifications, build a classifier.
     */
    def train () { train (0, 0) }

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given a new discrete data vector z, determine which class it belongs to,
     *  returning the best class, its name and its relative probability.
     *  @param z  the vector to classify
     */
    def classify (z: VectoI): (Int, String, Double)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given a new continuous data vector z, determine which class it belongs to,
     *  returning the best class, its name and its relative probability.
     *  @param z  the vector to classify
     */
    def classify (z: VectoD): (Int, String, Double)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Test the quality of the training with a test-set and return the fraction
     *  of correct classifications.
     *  @param testStart  the beginning of test region (inclusive).
     *  @param testEnd    the end of test region (exclusive).
     */
    def test (testStart: Int, testEnd: Int): Double

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Test the quality of the training with a test-set and return the fraction
     *  of correct classifications.
     *  @param itest  the indices of the instances considered test data
     */
    def test (itest: VectorI): Double = 0.0

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /**  Reset the frequency and probability tables.
     */
    def reset ()

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Test the accuracy of the classified results by cross-validation, returning
     *  the accuracy.  The "test data" starts at 'testStart' and ends at 'testEnd',
     *  the rest of the data is "training data'.
     *  @param nx  the number of crosses and cross-validations (defaults to 5x).
     */
    def crossValidate (nx: Int = 10): Double =
    {
        val testSize = size / nx
        var sum      = 0.0

        for (i <- 0 until nx) {
            val testStart = i * testSize
            val testEnd   = testStart + testSize
            train (testStart, testEnd)
            sum += test (testStart, testEnd)
        } // for

        val avg = sum / nx.toDouble
//      println ("Average accuracy = " + avg)
//      println ("------------------------------------------------------------")
        avg
    } // crossValidate

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Test the accuracy of the classified results by cross-validation, returning
     *  the accuracy. This version of cross-validation relies on "subtracting"
     *  frequencies from the previously stored global data to achieve efficiency.
     *  @param nx  number of crosses and cross-validations (defaults to 10x).
     */
    def crossValidateRand (nx: Int = 10): Double =
    {
        var sum         = 0.0
        val permutedVec = PermutedVecI (VectorI.range(0, size), ranStream)
        val randOrder   = permutedVec.igen
        val itestA      = randOrder.split (nx)

        for (itest <- itestA) {
            train (itest())
            sum += test (itest)
        } // for

        sum / nx.toDouble
    } // crossValidateRand

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Return the size of the feature set.
     */
    def size: Int
    
} // Classifier trait