//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller
 *  @version 1.3
 *  @date    Fri Sep 30 13:37:32 EDT 2011
 *  @see     LICENSE (MIT style license file).
 *------------------------------------------------------------------------------
 *  @see The Superlinear Convergence of a Modified BFGS-Type Method for Unconstrained Optimization 
 *  @see On the Robustness of Conjugate-Gradient Methods and Quasi-Newton Methods
 *  @see Limited Memory BFGS for Nonsmooth Optimization
 *  @see http://en.wikipedia.org/wiki/BFGS_method
 *  @see http://www.personal.psu.edu/cxg286/Math555.pdf
 *  @see http://people.orie.cornell.edu/aslewis/publications/bfgs_inexactLS.pdf
 *  @see http://people.orie.cornell.edu/aslewis/publications/bfgs_exactLS.pdf
 */

package scalation.minima

import scala.math.{abs, max, pow}

import scalation.calculus.Differential.{FunctionV2S, gradient, gradientD}
import scalation.linalgebra.{MatrixD, VectorD}
import scalation.linalgebra.MatrixD.{eye, outer}
import scalation.math.ExtremeD.MAX_VALUE
import scalation.util.Error

//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `QuasiNewton` the class implements the Broyden–Fletcher–Goldfarb–Shanno (BFGS)
 *  Quasi-Newton Algorithm for solving Non-Linear Programming (NLP) problems.
 *  BFGS determines a search direction by  deflecting the steepest descent direction
 *  vector (opposite the gradient) by *  multiplying it by a matrix that approximates
 *  the inverse Hessian.  Note, this  implementation may be set up to work with the matrix
 *  'b' (approximate Hessian) or directly with the 'binv' matrix (the inverse of 'b').
 *
 *  minimize    f(x)
 *  subject to  g(x) <= 0   [ optionally g(x) == 0 ]
 *
 *  @param f        the objective function to be minimized
 *  @param g        the constraint function to be satisfied, if any
 *  @param ineq     whether the constraint is treated as inequality (default) or equality
 *  @param exactLS  whether to use exact (e.g., `GoldenLS`)
 *                            or inexact (e.g., `WolfeLS`) Line Search
 */
class QuasiNewton (f: FunctionV2S, g: FunctionV2S = null,
                    ineq: Boolean = true, exactLS: Boolean = false)
      extends Minimizer with Error
{
    private val DEBUG    = true              // the debug flag
    private val WEIGHT   = 1000.0            // weight on penalty for constraint violation

    private var df: Array [FunctionV2S] = null   // array of partials
    private var b: MatrixD    = null         // approx. Hessian matrix (use b or binv)
    private var binv: MatrixD = null         // inverse of approx. Hessian matrix
    private var bfgs          = true         // use BFGS (true) or Gradient Descent (false)

    type Pair = Tuple2 [VectorD, VectorD]    // pair of vectors

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Use the Gradient Descent algorithm rather than the default BFGS algorithm.
     */
    def setSteepest () { bfgs = false }

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Update the 'b' matrix, whose inverse is used to deflect -gradient to a
     *  better direction than steepest descent (-gradient).
     *  @param s  the step vector (next point - current point)
     *  @param y  the difference in the gradients (next - current)
     */
//  def updateB (s: VectorD, y: VectorD)
//  {
//      var sy = s dot y                     // dot product of s and y
//      if (abs (sy) < TOL) sy = TOL
//      val sb = s * b
//      b += outer (y, y) / sy - outer (sb, sb) / (sb dot s)
//  } // updateB

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Update the 'binv' matrix, which is used to deflect -gradient to a better
     *  search direction than steepest descent (-gradient).
     *  Compute the 'binv' matrix directly using the Sherman–Morrison formula.
     *  @see http://en.wikipedia.org/wiki/BFGS_method
     *  @param s  the step vector (next point - current point)
     *  @param y  the difference in the gradients (next - current)
     */
    def updateBinv (s: VectorD, y: VectorD)
    {
        var sy = s dot y                     // dot product of s and y
        if (abs (sy) < TOL) sy = TOL
        val binvy = binv * y
        binv +=  (outer (s, s) * (sy + (binvy dot y))) / (sy * sy) -
                 (outer (binvy, s) + outer (s, binvy)) / sy
    } // updateBinv

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Set the partial derivative functions.  If these functions are available,
     *  they are more efficient and more accurate than estimating the values
     *  using difference quotients (the default approach).
     *  @param partials  the array of partial derivative functions
     */
    def setDerivatives (partials: Array [FunctionV2S])
    {
        if (g != null) flaw ("setDerivatives", "only works for unconstrained problems")
        df = partials       // use given functions for partial derivatives
    } // setDerivatives

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** The objective function f plus a weighted penalty based on the constraint
     *  function g.
     *  @param x  the coordinate values of the current point
     */
    override def fg (x: VectorD): Double =
    {
        val f_x = f(x)
        if (g == null) {                  // unconstrained
            f_x
        } else {                          // constrained, g(x) <= 0
            val penalty = if (ineq) max (g(x), 0.0) else abs (g(x))
            f_x + abs (f_x) * WEIGHT * penalty * penalty
        } // if
    } // fg

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Perform an exact 'GoldenSectionLS' or inexact 'WolfeLS' Line Search.
     *  Search in direction 'dir', returning the distance 'z' to move in that direction.
     *  Default to 
     *  @param x     the current point
     *  @param dir   the direction to move in
     *  @param step  the initial step size
     */
    def lineSearch (x: VectorD, dir: VectorD, step: Double = STEP): Double =
    {
        def f_1D (z: Double): Double = fg(x + dir * z)    // create a 1D function
        val ls = if (exactLS) new GoldenSectionLS (f_1D)  // Golden Section Line Search
                 else new WolfeLS (f_1D)                  // Wolfe line search ((c1 = .0001, c2 = .9)
        ls.search (step)                                  // perform a Line Search
    } // lineSearch

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Solve the following Non-Linear Programming (NLP) problem using BFGS:
     *  min { f(x) | g(x) <= 0 }.  To use explicit functions for gradient,
     *  replace 'gradient (fg, x._1 + s)' with 'gradientD (df,  x._1 + s)'.
     *  @param x0     the starting point 
     *  @param step   the initial step size
     *  @param toler  the tolerance
     */
    def solve (x0: VectorD, step: Double = STEP, toler: Double = TOL): VectorD =
    {
        if (DEBUG) println ("QuasiNewton.solve: starting at x0 = " + x0)

        var x:  Pair = (x0, gradient (fg, x0))        // current (point, gradient)
        var xx: Pair = (null, null)                   // next (point, gradient)
        var s:  VectorD = null                        // step vector
        var dir = -x._2                               // initial direction is -gradient

        binv = eye (x0.dim)                           // inverse of approx. Hessian matrix
//      b    = eye (x0.dim)                           // approx. Hessian matrix (either use b or binv)

        if (DEBUG) println ("solve: ||gradient||^2 = " + x._2.normSq)

        for (k <- 1 to MAX_ITER if x._2.normSq > TOL) {
            s  = dir * lineSearch (x._1, dir)         // update step vector
            xx = (x._1 + s, gradient (fg, x._1 + s))  // compute the next point
            if (bfgs) {
                dir = -(binv * xx._2)                 // next search direction using BFGS and 'binv'
                updateBinv (s, xx._2 - x._2)          // update the deflection matrix 'binv'
//              dir = -(b.inverse * xx._2)            // next search direction using BFGS and 'b'
//              updateB (s, xx._2 - x._2)             // update the deflection matrix 'b'
            } else {
                dir = -xx._2                          // next search direction using Gradient Descent
            } // if
            if (DEBUG) println ("solve: (k = " + k + ") move from " + x._1 + " to " + xx._1
                              + " where fg(xx._1) = " + fg(xx._1))
            for (x_i <- xx._1 if abs (x_i) > MAX_VALUE) return x._1
            x = xx                                    // make the next point, the current point
        } // for
        x._1                                          // return the current point
    } // solve

} // QuasiNewton class


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `QuasiNewtonTest` object is used to test the `QuasiNewton` class.
 *  > run-main scalation.minima.QuasiNewtonTest
 */
object QuasiNewtonTest extends App
{
    def x0 = new VectorD (2)

    println ("\nMinimize: (x_0 - 3)^2 + (x_1 - 4)^2 + 1")
    def f (x: VectorD): Double = (x(0) - 3.0) * (x(0) - 3.0) + (x(1) - 4.0) * (x(1) - 4.0) + 1.0
    var solver = new QuasiNewton (f)
    var x = solver.solve (x0)
    println ("optimal solution x = " + x + " with an objective value f(x) = " + f(x))

    println ("\nMinimize: x_0^4 + (x_0 - 3)^2 + (x_1 - 4)^2 + 1")
    def g (x: VectorD): Double = pow (x(0), 4.0) + (x(0) - 3.0) * (x(0) - 3.0) + (x(1) - 4.0) * (x(1) - 4.0) + 1.0
    solver = new QuasiNewton (g)
    x = solver.solve (x0)
    println ("optimal solution x = " + x + " with an objective value g(x) = " + g(x))

} // QuasiNewtonTest object