//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller, Naman Fatehpuria
 *  @version 2.0
 *  @date    Mon Mar  3 14:39:17 EST 2014
 *  @see     LICENSE (MIT style license file).
 *
 *  @note    Model: Support Vector Machine
 *
 *  @see fbim.fh-regensburg.de/~saj39122/Diplomarbeiten/Miklos/Papers/Keerthi%20improvement%20on%20SMO.pdf
 *  @see www.microsoft.com/en-us/research/wp-content/uploads/2016/02/tr-98-14.pdf
 *  @see repository.ias.ac.in/127671/1/smo_mod.pdf
 */

//  U N D E R   D E V E L O P M E N T

package scalation
package modeling
package classifying

import scala.collection.mutable.Set
import scala.math.abs

import scalation.mathstat._

//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `SupportVectorMachine` class is a translation of Pseudo-Code from a
 *  modified SMO (Modification 2) found at the above URL's into Scala and includes
 *  a few simplifications (e.g., currently only works for linear kernels, dense
 *  data and binary classification).
 *  @param x       the input/data matrix with points stored as rows
 *  @param y       the classification of the data points stored in a vector
 *  @param fname_  the feature/variable names
 *  @param cname_  the class names
 *  @param hparam  the hyper-parameters
 */
class SupportVectorMachine (x: MatrixD, y: VectorI, fname_ : Array [String] = null,
                            cname_ : Array [String] = Array ("-", "+"),
                            hparam: HyperParameter = Classifier.hp)
      extends Classifier (x, y, fname_, 2, cname_, hparam)
         with FitC ():

    type Pair = (Double, Double)
    
    private val debug   = debugf ("SupportVectorMachine", false) // debug flag
    private val EPSILON = 1E-3                                   // a number close to zero
    private val TOL     = 1E-3                                   // tolerance level
    private val C       = 0.05                                   // crossing penalty

    private var alp: VectorD    = null                           // alpha (Lagrange multipliers)
    private var fCache: VectorD = null                           // errors for all non-bound examples
    private var w: VectorD      = null                           // weights
    
    private val I_0     = Set [Int] ()                           // {i: 0 < alp(i) < C}
    private val I_1     = Set [Int] ()                           // {i: y(i) =  1, alp(i) = 0}
    private val I_2     = Set [Int] ()                           // {i: y(i) = -1, alp(i) = C}
    private val I_3     = Set [Int] ()                           // {i: y(i) =  1, alp(i) = C}
    private val I_4     = Set [Int] ()                           // {i: y(i) = -1, alp(i) = 0}
    
    private var b       =  0.0                                   // threshold/offset
    private var b_Low   =  1.0                                   // lower threshold
    private var b_Up    = -1.0                                   // upper threshold
    private var i_Low   = -1                                     // index for b_Low
    private var i_Up    = -1                                     // index for b_Up
    
    private var al1     = 0.0                                    // old Lagrange multiplier 1
    private var a1      = 0.0                                    // new Lagrange multiplier 1
    private var al2     = 0.0                                    // old Lagrange multiplier 2
    private var a2      = 0.0                                    // new Lagrange multiplier 2

    modelName = "SupportVectorMachine"                           // the name of the model

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Return the vector of model parameter values [ w | b ]
     */
    override def parameter: VectorD = w :+ b
    
    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Train a classification model y_ = f(x_) + e where x_ is the data/input
     *  matrix and y_ is the response/output vector.  These arguments default
     *  to the full dataset x and y, but may be restricted to a training set.
     *  Train uses SMO (Sequential Minimum Optimization) algorithm to solve the 
     *  optimization problem for the weight vector w and the threshold b for 
     *  the model (w dot z) - b.
     *  Fill Set I_1 and I_4, as initially alp[i] = 0
     *  Initialize i_Up to any index of class +1 
     *  Initialize i_Low to any index of class -1
     *  @param x_  the training/full data/input matrix (defaults to full x)
     *  @param y_  the training/full response/output vector (defaults to full y)
     */
    override def train (x_ : MatrixD = x, y_ : VectorI = y): Unit =
//      super.train (x_, y_)   
        alp     = new VectorD (x_.dim)                            // alpha (Lagrange multipliers)
        fCache  = new VectorD (x_.dim)                            // errors for all non-bound examples
        w       = new VectorD (x_.dim2)                           // weights
        for i <- y_.indices do
            if y_(i) == 1 then
                I_1 += i; i_Up = i
            else
                I_4 += i; i_Low = i
            end if
        end for
        
        fCache(i_Low) =  1
        fCache(i_Up)  = -1
      
        var nChanged = 0
        var checkAll = true
        
        while nChanged > 0 || checkAll do
            nChanged = 0
            if checkAll then for k <- x_.indices if checkExample (k) do nChanged += 1
            else
                var success = true
                nChanged = 0
                while (b_Up < b_Low - 2 * TOL) && success do
                    success = takeStep (i_Up, i_Low)
                    if success then nChanged += 1
                end while
            end if  
            if checkAll then checkAll = false
            else if nChanged == 0 then checkAll = true
        end while
        
        b = (b_Low + b_Up) / 2.0
        debug ("train", s"w = $w, b = $b")
    end train

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Test the predictive model y_ = f(x_) + e and return its predictions and QoF vector.
     *  Testing may be in-sample (on the full dataset) or out-of-sample (on the testing set)
     *  as determined by the parameters passed in.
     *  Note: must call train before test.
     *  @param x_  the testing/full data/input matrix (defaults to full x)
     *  @param y_  the testing/full response/output vector (defaults to full y)
     */
    def test (x_ : MatrixD = x, y_ : VectorI = y): (VectorI, VectorD) =
        val yp  = predictI (x_)                                          // predicted classes
        val qof = diagnose (y_, yp)                                      // diagnose from actual and predicted
        debug ("test", s" yp = $yp \n qof = $qof")
        (yp, qof)
    end test

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Diagnose the health of the model by computing the Quality of Fit (QoF) measures,
     *  from the error/residual vector and the predicted & actual responses.
     *  Requires the actual and predicted responses to be non-negative integers.
     *  This override maps -1 to 0.
     *  @param y_  the actual response/output vector to use (test/full)
     *  @param yp  the predicted response/output vector (test/full)
     */
    override def diagnose (y_ : VectorI, yp: VectorI): VectorD =
        super.diagnose (y_.map2 (-1, 0), yp.map2 (-1, 0))                // compute basic QoF from `FitM`
    end diagnose

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Given a new continuous data vector z, determine which class it belongs to.
     *  Classify returns 1 meaning z belongs to the positive (+) class, while 
     *  -1 means it belongs to the negative (-) class.
     *  @param z  the vector to classify
     */
    override def predictI (z: VectorD): Int = if (w dot z) >= b then 1 else -1

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Produce a QoF summary for a model with diagnostics for each predictor x_0, x_1,
     *  and the overall Quality of Fit (QoF).
     *  @param x_      the testing/full data/input matrix
     *  @param fname_  the array of feature/variable names
     *  @param b_      the parameters/coefficients for the model
     *  @param vifs    the Variance Inflation Factors (VIFs)
     */
    override def summary (x_ : MatrixD = null, fname_ : Array [String] = null,
                          b_ : VectorD = p_y, vifs: VectorD = null): String =
        super.summary (x_, fname_, b_, vifs)                             // summary from `Fit`
    end summary
    
    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the value using the learned function (assumes linear, dense).
     *  @param k  the row index into the data matrix
     */
    private def func (k: Int): Double = w dot x(k)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute the value using the kernel function (assumes linear, dense).
     *  @param i1  the first row index into the data matrix
     *  @param i2  the second row index into the data matrix
     */
    private def kernel_func (i1: Int, i2: Int): Double = x(i1) dot x(i2)

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Optimize by replacing old values of Lagrange multipliers al1, al2 with
     *  new values a1 and a2.
     *  @param i1  the index for the first Lagrange multipliers (alpha) 
     *  @param i2  the index for the second Lagrange multipliers (alpha)
     */
    private def takeStep (i1: Int, i2: Int): Boolean =
        debug ("takeStep", s"(i1, i2) = ($i1, $i2)")
        if i1 == i2 then { debug ("takeStep", s"skip if i1 == i2"); return false }
        
        al1 = alp(i1); al2 = alp(i2)
        val y1 = y(i1); val y2 = y(i2)
        val F1 = fCache(i1); val F2 = fCache(i2) 
        val s  = y1 * y2
        
        val (l, h) = computeLH (y1, y2)
        if l == h then { debug ("takeStep", s"skip if l == h"); return false }
        
        // compute eta
        val k11 = kernel_func (i1, i1)
        val k12 = kernel_func (i1, i2)
        val k22 = kernel_func (i2, i2)
        val eta = 2.0 * k12 - (k11 + k22)

        if eta < 0.0 then
            a2 = al2 - y2 * (F1 - F2) / eta
            if a2 < l then a2 = l else if a2 > h then a2 = h
        else
            val c1 = eta / 2.0
            val c2 = y2 * (F1 - F2) - eta * al2
            val lObj = c1 * l * l + c2 * l
            val hObj = c1 * h * h + c2 * h
            a2 = if lObj > hObj + EPSILON then l else if lObj < hObj - EPSILON then h else al2
        end if
        
        if abs (a2 - al2) < EPSILON * (a2 + al2 + EPSILON) then
            debug ("takeStep", s"skip if a2 = $a2 ~=  al2 = $al2")      // almost no change
            return false
        end if
        
        a1 = al1 + s * (al2 - a2)
        if a1 < 0.0 then
            a2 += s * a1; a1 = 0
        else if a1 > C then
            val t = a1 - C; a2 += s * t; a1 = C
        end if
        
        update (i1, i2, y1, y2)                // weights and fCache
        alp(i1) = a1; alp(i2) = a2             // store a1, a2 in alp array
        
        // Compute update F values for i1 and i2.
        fCache(i1) = F1 + y1 * (a1 - al1) * k11 + y2 * (a2 - al2) * k12
        fCache(i2) = F2 + y1 * (a1 - al1) * k12 + y2 * (a2 - al2) * k22
        
        // Update I_0, I_1, I_2, I_3, I_4 for i1
        if a1 > 0 && a1 < C then       I_0 += i1 else I_0 -= i1
        if y(i1) == 1 && a1 == 0 then  I_1 += i1 else I_1 -= i1
        if y(i1) == -1 && a1 == C then I_2 += i1 else I_2 -= i1
        if y(i1) == 1 && a1 == C then  I_3 += i1 else I_3 -= i1
        if y(i1) == -1 && a1 == 0 then I_4 += i1 else I_4 -= i1
        
        // Update I_0, I_1, I_2, I_3, I_4 for i2
        if a2 > 0 && a2 < C then       I_0 += i2 else I_0 -= i2
        if y(i2) == 1 && a2 == 0 then  I_1 += i2 else I_1 -= i2
        if y(i2) == -1 && a2 == C then I_2 += i2 else I_2 -= i2
        if y(i2) == 1 && a2 == C then  I_3 += i2 else I_3 -= i2
        if y(i2) == -1 && a2 == 0 then I_4 += i2 else I_4 -= i2
        
        // Compute (i_Low, b_Low) and (i_Up, b_Up), using i1, i2 and indices in I_0
        val I = I_0
        I ++= Set (i1, i2)
        for i <- I do
            if fCache (i) < b_Up then
                b_Up = fCache (i)
                i_Up = i
            end if
            if fCache (i) > b_Low then
                b_Low = fCache (i)
                i_Low = i
            end if
        end for
        true
    end takeStep
    
    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Compute l and h.
     *  @param y1  the first target value
     *  @param y2  the second target value
     */
    private def computeLH (y1: Int, y2: Int): Pair =
        if y1 == y2 then
            val gamma = al1 + al2
            return if gamma > C then (gamma - C, C) else (0.0, gamma)
        else
            val gamma = al1 - al2
            return if gamma > 0.0 then (0.0, C - gamma) else (-gamma, C)
        end if
    end computeLH
    
    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Update weights w and error cache fCache.
     *  @param i1  the index for the first Lagrange multipliers (alpha) 
     *  @param i2  the index for the second Lagrange multipliers (alpha)
     *  @param y1  the first target value
     *  @param y2  the second target value
     */
    private def update (i1: Int, i2: Int, y1: Int, y2: Int): Unit =
        val t1 = y1 * (a1 - al1)
        val t2 = y2 * (a2 - al2)
        
        // Update weight vector to reflect change in a1 and a2
        for j <- x.indices2 do w(j) += x(i1, j) * t1 + x(i2, j) * t2

        // Update fCache(i) for i in I_0 using new Lagrange multipliers (a1 & a2)
        for i <- I_0 if i != i1 && i != i2 do
            fCache(i) += t1 * kernel_func (i1, i) + t2 * kernel_func (i2, i)
//          fCache(i) += (y1 * a1 * kernel_func (i1, i)) - y(i) + (y2 * a2 * kernel_func (i2, i)) - y(i)
        end for
        debug ("update", s"w = $w")
    end update
    
    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Using the index i2 from training data, update (b_Low, i_Low) or (b_Up, i_Up)
     *  Then check for optimality using current b_Low and b_Up and, if
     *  violated, find an index i1 to do joint optimization with i2.
     *  @param i2  the second data point for optimization
     */
    private def checkExample (i2: Int): Boolean =
        var i1 = -1
        var F2 = 0.0
        al2 = alp(i2)
        
        if I_0 contains i2 then
            F2 = fCache (i2)
        else
            F2 = func (i2) - y (i2)
            fCache (i2) = F2

            // Update (b_Low, i_Low) or (b_Up, i_Up) using (F2, i2)
            if ((I_1 contains i2) || (I_2 contains i2)) && (F2 < b_Up) then
                b_Up = F2
                i_Up = i2
            else if ((I_3 contains i2) || (I_4 contains i2)) && (F2 > b_Low) then
                b_Low = F2
                i_Low = i2
        end if
        
        // Check optimality using current b_Low and b_Up and, if
        // violated, find an index i1 to do joint optimization with i2.
        var optimality = true
        
        if (I_0 contains i2) || (I_1 contains i2) || (I_2 contains i2) then
            if b_Low - F2 > 2 * TOL then
                optimality = false
                i1 = i_Low
        end if

        if (I_0 contains i2) || (I_3 contains i2) || (I_4 contains i2) then
            if F2 - b_Up > 2 * TOL then
                optimality = false
                i1 = i_Up
        end if
        
        if optimality then return false
        
        // For i2 in I_0 choose the better i1.
        if I_0 contains i2 then
            if b_Low - F2 > F2 - b_Up then i1 = i_Low
            else i1 = i_Up
        end if
        
        takeStep (i1, i2)
    end checkExample
    
    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Convert svm to a string showing (w, b).
     */
    override def toString: String = "(w, b) = " + (w, b)

end SupportVectorMachine


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `SupportVectorMachine` companion object provides a factory method.
 */
object SupportVectorMachine:

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Create a support vector machine for the given combined matrix where the
     *  column col is the response/classification vector.
     *  @param xy      the combined data matrix (features and response)
     *  @param fname   the names for all features/variables
     *  @param cname   the names for all classes
     *  @param hparam  the hyper-parameters
     *  @param col     the designated response column (defaults to the last column)
     */
    def apply (xy: MatrixD, fname: Array [String] = null,
               cname: Array [String]  = Array ("No", "Yes"),
               hparam: HyperParameter = Classifier.hp)
              (col: Int = xy.dim2 - 1): SupportVectorMachine =
        val (x, y) = (xy.not(?, col), xy(?, col).toInt)                  // data matrix, response vector
        new SupportVectorMachine (x, y.map2 (0, -1), fname, cname, hparam)
    end apply

end SupportVectorMachine


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `SupportVectorMachineTest` main function tests the `SupportVectorMachine` class.
 *  > runMain scalation.modeling.classifying.supportVectorMachineTest
 */
@main def supportVectorMachineTest (): Unit =

    // Test 1
    val x = MatrixD ((4, 2), 1.0, 2.0,                       // 4 data points
                             2.0, 1.0,
                             2.0, 3.0,
                             3.0, 2.0)
    val y = VectorI (-1, -1, 1, 1)                           // classification of points
    val z = VectorD (4.0, 3.0)
    
    val mod = new SupportVectorMachine (x, y)                // create optimizer
    mod.trainNtest ()()
    println (mod.summary ())
    println (mod.predict (z))

end supportVectorMachineTest


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `supportVectorMachineTest2` main function tests the `SupportVectorMachine` class.
 *  > runMain scalation.modeling.classifying.supportVectorMachineTest2
 */
@main def supportVectorMachineTest2 (): Unit =

    val xy = MatrixD ((8, 3), 2, 2, -1,                      // 8 data points
                              4, 2, -1,
                              2, 4, -1,
                              2, 6,  1,
                              4, 4,  1,
                              6, 2,  1,
                              6, 4,  1,
                              4, 6,  1)

    val (x, y) = (xy.not(?, 2), xy(?, 2).toInt)

    val mod = new SupportVectorMachine (x, y)                // create optimizer
    mod.trainNtest ()()
    println (mod.summary ())

end supportVectorMachineTest2