//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller, Naman Fatehpuria
 *  @version 1.1
 *  @date    Mon Mar  3 14:39:17 EST 2014
 *  @see     LICENSE (MIT style license file).
 *  @see     http://fbim.fh-regensburg.de/~saj39122/Diplomarbeiten/Miklos/Papers/Keerthi%20improvement%20on%20SMO.pdf
 */

package scalation.analytics

import java.io.{BufferedWriter, File, FileWriter}

import collection.mutable.Set
import io.Source
import math.abs
import util.control.Breaks

import scalation.linalgebra.{MatrixD, VectorD}
import scalation.linalgebra_gen.VectorN
import scalation.linalgebra_gen.Vectors.VectorI
import scalation.util.Error

//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `SupportVectorMachine` class is a translation of Pseudo-Code from a
 *  modified SMO (Modification 2) found at the above URL's into Scala and includes
 *  a few simplifications (e.g., currently only works for linear kernels, dense
 *  data and binary classification).
 *  @param x  the training data points stored as rows in a matrix
 *  @param y  the classification of the data points stored in a vector
 */
class SupportVectorMachine (x: MatrixD, y: VectorI)
      extends Classifier with Error
{
    type Pair = Tuple2 [Double, Double]
    
    private val DEBUG   = true               // debug flag
    private val EPSILON = 1E-3               // a number close to zero
    private val TOL     = 1E-3               // tolerance level
    private val C       = 0.05               // crossing penalty
    private val m       = x.dim1             // number of rows (data points)
    private val n       = x.dim2             // number of columns (variables)
    private val alp     = new VectorD (m)    // alpha (Langrange multipliers)
    private val fCache  = new VectorD (m)    // errors for all non-bound examples
    private val w       = new VectorD (n)    // weights
    
    private var I_0     = Set [Int] ()       // {i: 0 < alp(i) < C}
    private var I_1     = Set [Int] ()       // {i: y(i) =  1, alp(i) = 0}
    private var I_2     = Set [Int] ()       // {i: y(i) = -1, alp(i) = C}
    private var I_3     = Set [Int] ()       // {i: y(i) =  1, alp(i) = C}
    private var I_4     = Set [Int] ()       // {i: y(i) = -1, alp(i) = 0}
    
    private var b       =  0.0               // threshold
    private var b_Low   =  1.0               // lower threshold
    private var b_Up    = -1.0               // upper threshold
    private var i_Low   = -1                 // index for b_Low
    private var i_Up    = -1                 // index for b_Up
    
    private var al1     = 0.0                // old Langrange multiplier 1
    private var a1      = 0.0                // new Langrange multiplier 1
    private var al2     = 0.0                // old Langrange multiplier 2
    private var a2      = 0.0                // new Langrange multiplier 2

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Train uses SMO (Sequential Minimum Optimization) algorithm to solves the 
     *  optimization problem for the weight vector 'w' and the threshold 'b' for 
     *  the model '(w dot z) - b'.
     */
    def train ()
    {
        // Fill Set I_1 and I_4, as initially alp[i] = 0
        // Initialize i_Up to any index of class +1 
        // Initialize i_Low to any index of class -1
        for (i <- 0 until m) {
            if (y(i) == 1) {
                I_1 += i; i_Up = i
            } else {
                I_4 += i; i_Low = i
            } // if
        } // for
        
        fCache (i_Low) =  1
        fCache (i_Up)  = -1
      
        var nChanged = 0
        var checkAll = true
        
        val loop = new Breaks
        
        while (nChanged > 0 || checkAll) {
            nChanged = 0
            if (checkAll) for (k <- 0 until m if checkExample (k)) nChanged += 1
            else {
                var success = true
                nChanged = 0
                while ( ( b_Up < b_Low - 2 * TOL ) && success ) {
                    success = takeStep (i_Up, i_Low)
                    if (success) nChanged += 1
                } // while
            } // if  
            if (checkAll) checkAll = false
            else if (nChanged == 0 ) checkAll = true
        } // while
        
        b = (b_Low + b_Up) / 2.0          
    } // train
    
    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given a new continuous data vector z, determine which class it belongs to.
     *  Classify returns 1 meaning 'z' belongs to the positive class, while 
     *  -1 means it belongs to the negative class.
     *  @param z  the vector to classify
     */
    def classify (z: VectorD): Tuple2 [Int, String] =
    {
        if ((w dot z) >= b) (1, "+") else (-1, "-")
    } // classify
    
    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given a new discrete data vector z, determine which class it belongs to.
     *  Classify returns 1 meaning 'z' belongs to the positive class, while 
     *  -1 means it belongs to the negative class.
     *  @param z  the vector to classify
     */
    def classify (z: VectorI): Tuple2 [Int, String] =
    {
        val zd = new VectorD (z.dim)
        for (j <- 0 until z.dim) zd(j) = z(j).toDouble
        classify (zd)
    } // classify
    
    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the value using the learned function (assumes linear, dense).
     *  @param k  the row index into the data matrix
     */
    private def func (k: Int): Double = w dot x(k)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the value using the kernel function (assumes linear, dense).
     *  @param i1  the first row index into the data matrix
     *  @param i2  the second row index into the data matrix
     */
    private def kernel_func (i1: Int, i2: Int): Double = x(i1) dot x(i2)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Optimize by replacing old values of Lagrange multipliers al1, al2 with
     *  new values a1 and a2.
     *  @param i1  the index for the first Lagrange multipliers (alpha) 
     *  @param i2  the index for the second Lagrange multipliers (alpha)
     */
    private def takeStep (i1: Int, i2: Int): Boolean =
    {
        if (DEBUG) println ("takeStep (" + i1 + ", " + i2 + ")")
        if (i1 == i2) { println ("takeStep: skip if i1 == i2"); return false }
        
        al1 = alp (i1); al2 = alp (i2)
        var y1 = y (i1); var y2 = y (i2)
        var F1 = fCache (i1); var F2 = fCache (i2) 
        var s  = y1 * y2
        
        val (l, h) = computeLH (y1, y2)
        if (l == h) { println ("takeStep: skip if l == h"); return false }
        
        // compute eta
        val k11 = kernel_func (i1, i1)
        val k12 = kernel_func (i1, i2)
        val k22 = kernel_func (i2, i2)
        val eta = 2.0 * k12 - (k11 + k22)

        if (eta < 0.0) {
            a2 = al2 - y2 * (F1 - F2) / eta
            if (a2 < l) a2 = l else if (a2 > h) a2 = h
        } else {
            val c1 = eta / 2.0
            val c2 = y2 * (F1 - F2) - eta * al2
            val lObj = c1 * l * l + c2 * l
            val hObj = c1 * h * h + c2 * h
            a2 = if (lObj > hObj + EPSILON) l else if (lObj < hObj - EPSILON) h else al2
        } // if
        
        if (abs (a2 - al2) < EPSILON * (a2 + al2 + EPSILON)) {
            println ("takeStep: skip if a2 = " + a2 + " ~=  al2 = " + al2)      // almost no change
            return false
        } // if
        
        a1 = al1 + s * (al2 - a2)
        if (a1 < 0.0) {
            a2 += s * a1; a1 = 0
        } else if (a1 > C) {
            val t = a1 - C; a2 += s * t; a1 = C
        } // if
        
        update (i1, i2, y1, y2)                // weights and fCache
        alp (i1) = a1; alp (i2) = a2           // store a1, a2 in alp array
        
        // Compute update F values for i1 and i2.
        fCache(i1) = F1 + y1 * (a1 - al1) * k11 + y2 * (a2 - al2) * k12
        fCache(i2) = F2 + y1 * (a1 - al1) * k12 + y2 * (a2 - al2) * k22
        
        // Update I_0, I_1, I_2, I_3, I_4 for i1
        if (a1 > 0 && a1 < C)       I_0 += i1 else I_0 -= i1
        if (y(i1) == 1 && a1 == 0)  I_1 += i1 else I_1 -= i1
        if (y(i1) == -1 && a1 == C) I_2 += i1 else I_2 -= i1
        if (y(i1) == 1 && a1 == C)  I_3 += i1 else I_3 -= i1
        if (y(i1) == -1 && a1 == 0) I_4 += i1 else I_4 -= i1
        
        // Update I_0, I_1, I_2, I_3, I_4 for i2
        if (a2 > 0 && a2 < C)       I_0 += i2 else I_0 -= i2
        if (y(i2) == 1 && a2 == 0)  I_1 += i2 else I_1 -= i2
        if (y(i2) == -1 && a2 == C) I_2 += i2 else I_2 -= i2
        if (y(i2) == 1 && a2 == C)  I_3 += i2 else I_3 -= i2
        if (y(i2) == -1 && a2 == 0) I_4 += i2 else I_4 -= i2
        
        // Compute (i_Low, b_Low) and (i_Up, b_Up), using i1, i2 and indices in I_0
        var I = I_0
        I += (i1, i2)
        for (i <- I) {
            if (fCache (i) < b_Up) {
                b_Up = fCache (i)
                i_Up = i
            } // if
            if (fCache (i) > b_Low) {
                b_Low = fCache (i)
                i_Low = i
            } // if
        } // for
        true
    } // takeStep
    
    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute 'l' and 'h'.
     *  @param y1  the first target value
     *  @param y2  the second target value
     */
    private def computeLH (y1: Int, y2: Int): Pair =
    {
        if (y1 == y2) {
            val gamma = al1 + al2
            return if (gamma > C) (gamma - C, C) else (0.0, gamma)
        } else {
            val gamma = al1 - al2
            return if (gamma > 0.0) (0.0, C - gamma) else (-gamma, C )
        } // if
    } // computeLH
    
    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Update weights 'w' and error cache 'fCache'.
     *  @param i1  the index for the first Lagrange multipliers (alpha) 
     *  @param i2  the index for the second Lagrange multipliers (alpha)
     *  @param y1  the first target value
     *  @param y2  the second target value
     */
    def update (i1: Int, i2: Int, y1: Int, y2: Int)
    {
        val t1 = y1 * (a1 - al1)
        val t2 = y2 * (a2 - al2)
        
        // Update weight vector to reflect change in a1 and a2
        for (j <- 0 until n) w(j) += x(i1, j) * t1 + x(i2, j) * t2

        // Update fCache(i) for i in I_0 using new Lagrange multipliers (a1 & a2)
        for (i <- I_0 if i != i1 && i != i2) { 
            fCache(i) += t1 * kernel_func (i1, i) + t2 * kernel_func (i2, i)
//          fCache(i) += (y1 * a1 * kernel_func (i1, i)) - y(i) + (y2 * a2 * kernel_func (i2, i)) - y(i)
        } // for
    } // update
    
    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Using the index i2 from training data, update (b_Low, i_Low) or (b_Up, i_Up)
     *  Then check for optimality using current b_Low and b_Up and, if
     *  violated, find an index i1 to do joint optimization with i2.
     *  @param i2 second data point for optimization
     */
    private def checkExample (i2: Int): Boolean =
    {
        var i1 = -1
        var F2 = 0.0
        al2 = alp (i2)
        
        if (I_0 contains i2) F2 = fCache (i2)
        else {
            F2 = func (i2) - y (i2)
            fCache (i2) = F2

            // Update (b_Low, i_Low) or (b_Up, i_Up) using (F2, i2)
            if (((I_1 contains i2) || (I_2 contains i2)) && (F2 < b_Up)) {
                b_Up = F2
                i_Up = i2
            } else if (((I_3 contains i2) || (I_4 contains i2)) && (F2 > b_Low)) {
                b_Low = F2
                i_Low = i2
            } // if
        } // if
        
        // Check optimality using current b_Low and b_Up and, if
        // violated, find an index i1 to do joint optimization with i2.
        var optimality = true
        
        if ((I_0 contains i2) || (I_1 contains i2) || (I_2 contains i2)) {
            if (b_Low - F2 > 2 * TOL) {
                optimality = false
                i1 = i_Low
            } // if
        } // if

        if ((I_0 contains i2) || (I_3 contains i2) || (I_4 contains i2)) {
            if(F2 - b_Up > 2 * TOL) {
                optimality = false
                i1 = i_Up
            } // if
        } // if
        
        if (optimality) return false
        
        // For i2 in I_0 choose the better i1.
        if (I_0 contains i2) {
            if(b_Low - F2 > F2 - b_Up) i1 = i_Low
            else i1 = i_Up
        } // if
        
        takeStep(i1, i2)
    } // checkExample
    
    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Convert svm to a string showing (w, b).
     */
    override def toString = "(w, b) = " + (w, b)

} // SupportVectorMachine


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `SupportVectorMachineTest` is used to test the `SupportVectorMachine` class.
 */
object SupportVectorMachineTest extends App
{
    // Test 1
    val x = new MatrixD ((4, 2), 1.0, 2.0,    // 4 data points
                                 2.0, 1.0,
                                 2.0, 3.0,
                                 3.0, 2.0)
    val y = VectorN (-1, -1, 1, 1)            // classification of points
    val z = VectorD (4.0, 3.0)
    
    val svm   = new SupportVectorMachine (x, y)               // create optimizer
    svm.train ()
    println (svm)
    println ("classify (" + z + ") = " + svm.classify (z))

} // SupportVectorMachineTest object


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `SupportVectorMachineTest2` is used to test the `SupportVectorMachine` class.
 */
object SupportVectorMachineTest2 extends App
{
    // FIX - put in a larger test example

} // SupportVectorMachineTest2 object