//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** @author  John Miller
 *  @version 2.0
 *  @date    Mon Jul 12 16:13:47 EDT 2021
 *  @see     LICENSE (MIT style license file).
 *
 *  @note    More-Thuente Line Search Algorithm
 *
 *  @see www.ii.uib.no/~lennart/drgrad/More1994.pdf
 */

package scalation
package optimization

import scala.math.{abs, max, min, sqrt}

import scalation.mathstat.VectorD

//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** Return true if variables x and y have different signs.
 *  in C: #define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.)
 *  @param x  the first variable (double)
 *  @param y  the second variable (double)
 */
inline def fsigndiff (x: Double, y: Double): Boolean = x * (y / abs (y)) < 0.0


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `MoreThuenteLS` class ...
 *  @param f   the objective/loss function to minimize (vector-to-scalar)
 *  @param g   the gradient of the objective/loss function (vector-to-vector)
 *  @param c1  constant for sufficient decrease (Wolfe condition 1: .0001 to .001)
 *  @param c2  constant for curvature/slope constraint (Wolfe condition 2: .9 to .8)
 */
class MoreThuenteLS (f: FunctionV2S, g: FunctionV2V,
                     c1: Double = 0.0001, c2: Double = 0.0,
                     minStep = 0.01, maxStep: Double = 100.0,
                     xtol: Double = 0.0001, ftol: Double = 0.0001, gtol: Double = 0.0001)
      extends WolfeConditions (f, g, c1, c2): 

    private val debug  = debugf ("MoreThuenteLS", true)                // debug function
    private val flaw   = flawf ("MoreThuenteLS")                       // flaw function

    private var brackt = false                                         // not bracketed yet
    private var stmin  = 0.0                                           // minimum step length
    private var stmax  = PositiveInfinity                              // maximum step length

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Set the minimum and maximum steps to correspond to the present interval of uncertainty.
     *  @param stx
     *  @param sty
     */
    inline def setInterval (brack: Boolean, stx: Double, sty: Double): Unit =
        if brackt then
            stmin = min (stx, sty)
            stmax = max (stx, sty)
        else
            stmin = stx
            stmax = step + 4.0 * (step - stx)
        end if
    end setInterval

    //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Perform an inexact Line Search (LS) on the function f to find an approximate
     *  local minima from the point x moving distance a (alpha) in the search direction
     *  p, which satisfies both Wolfe Conditions, returning the displacement a and the
     *  new point y = x + p * a.
     *  @param x     the current point
     *  @param fx    the functional value at x, f(x)
     *  @param p     the current search direction
     *  @param step  the initial step length
     */
    def lsearch (x: VectorD, fx: Double, p: VectorD, step: Double = 1.0): (Double, VectorD) = 

        val gx     = g(x)                                              // gradient at x
        val gxp    = gx dot p                                          // initial gradient in search direction
        var stage1 = true                                              // in stage 1

        if step <= 0.0 then flaw ("lsearch", "step size must be strictly positive")
        if gxp > 0 then     flaw ("lsearch", "p must be descent direction") 

    //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
    /** Improve the current point x by moving in the search direction s.
     *  @param n      the dimension of search space
     *  @param x_     the current location/point vector
     *  @param f_     the value of the objective function
     *  @param g_     the gradient vector
     *  @param s      the search direction
     *  @param step_  the current step size
     *  @param xp_    the previous location/point
     *  @param gp     the previous gradient vector
     *  @param wp     not used by this algorithm (use default)
     */
    def lsearch (n:      Int,
                                 x_ :    VectorD,
                                 f_ :    Double,
                                 g_ :    VectorD,
                                 s:      VectorD,
                                 step_ : Double,
                                 xp_ :   VectorD,
                                 gp:     VectorD,
                                 wp:     VectorD = null): (Double, Double, Int) =

        
        debug ("lsearch", s"linesearch from x_ = $x_")
        var x  = x_ 
        var f  = f_ 
        var g  = g_ 
        var a = step
        var xp = xp_ 
    
        var count = 0
        var brackt = false
        var stage1 = false
        var uinfo = 0
        var dg = 0.0
        var stx, fx, dgx = 0.0
        var sty, fy, dgy = 0.0
        var fxm, dgxm, fym, dgym, fm, dgm = 0.0
        var finit, ftest1, dginit, dgtest = 0.0
        var width, prev_width = 0.0
        var stmin, stmax = 0.0
    
    
        // Initialize local variables.
        finit = f
        dgtest = param.ftol * dginit
        width = param.max_step - param.min_step
        prev_width = 2.0 * width
    
        */
        fx  = finit
        fy  = finit
        dgx = dginit
        dgy = dginit

        // Variables a, f, dg contain the values of the step, function, and derivative at the current step.
        var (a, fx, dgx) = (step, f(x), g(x))

        // Variables stx, fx, dgx contain the values of the step, function, and directional derivative at the best step.
        var (stx, fx, dgx) = (0.0, finit, dginit)
    
        // Variables sty, fy, dgy contain the value of the step, function, and derivative at the other endpoint of the interval of uncertainty.
        var (sty, fy, dgy) = (0.0, finit, dginit)

        var (go, it) = (true, 0)
        cfor (go && it < MAX_IT, it += 1) {

            var uinfo = 0
    
            // Clip the step a to be in the range of [minStep, maxStep]
            if a < minStep then s = minStep
            if a > maxStep then a = maxStep
    
            //  If an unusual termination is to occur then let step be the lowest point obtained so far.
            if (brackt && ((a <= stmin || stmax <= a) || it >= MAX_IT || uinfo != 0)) ||
               (brackt && (stmax - stmin <= xtol * stmax)) then
                a = stx
            end if
    
            val y   = x + p * a                                        // new point: x + search dir * step
            val fy  = f(y)                                             // new functional value
            val gy  = g(y)                                             // new gradient
            val gyp = gy dot p                                         // dotted with search direction p
    
            val wolf1 = wolfe1 (fx, fy, a, gxp)                        // is Wolfe Condition 1 satisfied
    
            // Test for errors and convergence

            if brackt && ((a <= stmin || stmax <= a) || uinfo != 0) then
                flaw ("lsearch", "rounding errors prevent further progress")
                go = false
    
            if a == maxStep && f <= ftest1 && dg <= dgtest then
                flaw ("lsearch", "the step size is at its maximum value")
                go = false
    
            if a == minStep && (ftest1 < f || dgtest <= dg) then
                flaw ("lsearch", "the step size is at its minimum value")
                go = false
    
            if brackt && (stmax - stmin) <= xtol * stmax then
                flaw ("lsearch", "width of the interval of uncertainty is too small")
                go = false
    
            if f <= ftest1 && abs (dg) <= gtol * (-dginit) then
                println ("lsearch: done as sufficient decrease cond. and the directional deriv cond. hold.
                go = false
    
            // In the first stage we seek a step for which the modified
            // function has a nonpositive value and nonnegative derivative.

            if stage1 && f <= ftest1 && min (ftol, gtol) * dginit <= dg then
                stage1 = false

/***********
            // A modified function is used to predict the step only if we have not obtained a step
            // for which the modified function has a nonpositive function value and nonnegative derivative,
            // and if a lower function value has been obtained but the decrease is not sufficient.
  
            if stage1 && ftest1 < f && f <= fx then                     // define the modified function and derivative values.
                val fm   = f - a * dgtest
                val fxm  = fx - stx * dgtest
                val fym  = fy - sty * dgtest
                val gm   = d - dgtest
                val gxpm = gxp - dgtest
                val gypm = gyp - dgtest
    
                // Call update_trial_interval() to update the interval of uncertainty and to compute new step

                uinfo = update_trial_interval (stx, fxm, gxpm, sty, fym, gypm, a, fm, gm, stmin, stmax, brackt)
    
                // Reset the function and gradient values for f

                fx  = fxm + stx * dgtest
                fy  = fym + sty * dgtest
                gxp = dgxm + dgtest
                gyp = dgym + dgtest
            else
                // Call update_trial_interval() to update the interval of uncertainty and to compute new step.

                uinfo = update_trial_interval (stx, fx, gxp, sty, fy, gyp, a, f, g, stmin, stmax, brackt)
            end if
***********/
    
            // Call update_trial_interval() to update the interval of uncertainty and to compute new step.

            uinfo = update_trial_interval (stx, fx, gxp, sty, fy, gyp, a, f, g, stmin, stmax, brackt)

            // Force a sufficient decrease in the interval of uncertainty

            if brackt then
                if 0.66 * prev_width <= abs (sty - stx) then a = stx + 0.5 * (sty - stx)
                prev_width = width
                width = abs (sty - stx)
            end if
        } // cfor
    
        (a, y)
    end lsearch
    
end MoreThuenteLS


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** Update a safeguarded trial value and interval for line search.
 *------------------------------------------------------------------------------
 *  The parameter x represents the step with the least function value.
 *  The parameter t represents the current step. This function assumes
 *  that the derivative at the point of x in the direction of the step.
 *  If the bracket is set to true, the minimizer has been bracketed in
 *  an interval of uncertainty with endpoints between x and y.
 *------------------------------------------------------------------------------
 *  @see Jorge J. More and David J. Thuente. Line search algorithm with
 *       guaranteed sufficient decrease. ACM Transactions on Mathematical
 *       Software (TOMS), Vol 20, No 3, pp. 286-307, 1994.
 *------------------------------------------------------------------------------
 *  @param  x       The pointer to the value of one endpoint.
 *  @param  fx      The pointer to the value of f(x).
 *  @param  dx      The pointer to the value of f'(x).
 *  @param  y       The pointer to the value of another endpoint.
 *  @param  fy      The pointer to the value of f(y).
 *  @param  dy      The pointer to the value of f'(y).
 *  @param  t       The pointer to the value of the trial value, t.
 *  @param  ft      The pointer to the value of f(t).
 *  @param  dt      The pointer to the value of f'(t).
 *  @param  tmin    The minimum value for the trial value, t.
 *  @param  tmax    The maximum value for the trial value, t.
 *  @param  brackt  The pointer to the predicate if the trial value is bracketed.
 *  @return int     Status value. Zero indicates a normal termination.
 */
def update_trial_interval (x_  : Double,
                           fx_ : Double,
                           dx_ : Double,
                           y_  : Double,
                           fy_ : Double,
                           dy_ : Double,
                           t_  : Double,
                           ft:   Double,
                           dt:   Double,
                           tmin: Double,
                           tmax: Double,
                           brackt_ : Boolean): Int =
    var x  = x_
    var fx = fx_
    var dx = dx_
    var y  = y_
    var fy = fy_
    var dy = dy_
    var t  = t_
    var brackt = brackt_

    var bound = false
    var dsign = fsigndiff (dt, dx)
    var mc    = 0.0                                     // minimizer of an interpolated cubic.
    var mq    = 0.0                                     // minimizer of an interpolated quadratic.
    var newt  = 0.0                                     // new trial value.

    // Check the input parameters for errors.
    if brackt then
        if t <= min (x, y) || max (x, y) <= t then     // The trival value t is out of the interval.
            return LBFGSERR_OUTOFINTERVAL.code

        if 0.0 <= dx * (t - x) then                     // The function must decrease from x.
            return LBFGSERR_INCREASEGRADIENT.code

        if tmax < tmin then                             // Incorrect tmin and tmax specified.
            return LBFGSERR_INCORRECT_TMINMAX.code
    end if

    // Trial value selection.
    if fx < ft then
        /* Case 1: a higher function value.
           The minimum is brackt. If the cubic minimizer is closer
           to x than the quadratic one, the cubic one is taken, else
           the average of the minimizers is taken.
         */
        brackt = true
        bound  = true
        mc = cubic_minimizer (mc, x, fx, dx, t, ft, dt)
        mq = quad_minimizer (mq, x, fx, dx, t, ft)
        newt = if abs (mc - x) < abs (mq - x) then mc else mc + 0.5 * (mq - mc)

    else if dsign then
        /* Case 2: a lower function value and derivatives of
           opposite sign. The minimum is brackt. If the cubic
           minimizer is closer to x than the quadratic (secant) one,
           the cubic one is taken, else the quadratic one is taken.
         */
        brackt = true
        bound  = false
        mc = cubic_minimizer (mc, x, fx, dx, t, ft, dt)
        mq = quad_minimizer2 (mq, x, dx, t, dt)
        newt = if abs (mc - t) > abs (mq - t) then mc else mq

    else if abs (dt) < abs (dx) then
        /* Case 3: a lower function value, derivatives of the
           same sign, and the magnitude of the derivative decreases.
           The cubic minimizer is only used if the cubic tends to
           infinity in the direction of the minimizer or if the minimum
           of the cubic is beyond t. Otherwise the cubic minimizer is
           defined to be either tmin or tmax. The quadratic (secant)
           minimizer is also computed and if the minimum is brackt
           then the the minimizer closest to x is taken, else the one
           farthest away is taken.
         */
        bound = true
        mc = cubic_minimizer2 (mc, x, fx, dx, t, ft, dt, tmin, tmax)
        mq = quad_minimizer2 (mq, x, dx, t, dt)
        if brackt then
            newt = if abs (t - mc) < abs (t - mq) then mc else mq
        else
            newt = if abs (t - mc) > abs (t - mq) then mc else mq
        end if

    else
        /* Case 4: a lower function value, derivatives of the
           same sign, and the magnitude of the derivative does
           not decrease. If the minimum is not brackt, the step
           is either tmin or tmax, else the cubic minimizer is taken.
         */
        bound = false
        newt = if brackt then cubic_minimizer (newt, t, ft, dt, y, fy, dy)
               else if x < t then tmax
               else tmin
    end if

    /* Update the interval of uncertainty. This update does not
       depend on the new step or the case analysis above.
       - Case a: if f(x) < f(t),
           x <- x, y <- t.
       - Case b: if f(t) <= f(x) && f'(t)*f'(x) > 0,
           x <- t, y <- y.
       - Case c: if f(t) <= f(x) && f'(t)*f'(x) < 0, 
           x <- t, y <- x.
     */
    if fx < ft then                                    // Case a
        y = t; fy = ft; dy = dt
    else                                               // Case c
        if dsign then
            y = x; fy = fx; dy = dx
        end if
                                                       // Cases b and c
        x = t; fx = ft; dx = dt
    end if

    // Clip the new trial value in [tmin, tmax].
    if tmax < newt then newt = tmax
    if newt < tmin then newt = tmin

    // Redefine the new trial value if it is close to the upper bound of the interval.
    if brackt && bound then
        mq = x + 0.66 * (y - x)
        if x < y then
            if mq < newt then newt = mq
        else
            if newt < mq then newt = mq
        end if
    end if

    // Return the new trial value.
    t = newt
    0                                                  // return success
end update_trial_interval


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** Return the 1-norm of vector x.
 *  @param x      the vector whose 1-norm is sought
 *  @param start  the start index
 *  @param n      the end index
 */
def owlqn_x1norm (x: VectorD, start: Int, n: Int): Double =
    var norm = 0.0
    for i <- start until n do norm += abs (x(i))
    norm
end owlqn_x1norm


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** Project ...
 *  @param d      the distance vector
 *  @param sign   the sign vector
 *  @param start  the start index
 *  @param end_   the end index
 */
def owlqn_project (d: VectorD, sign: VectorD, start: Int, end_ : Int): Unit =
    for i <- start until end_ do
        if d(i) * sign(i) <= 0.0 then d(i) = 0.0
    end for
end owlqn_project


//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::
/** The `bFGS_LSTest` main function tests the `BFGSLS` object.
 *  > runMain scalation.optimization.bFGS_LSTest
 */
@main def bFGS_LSTest (): Unit =

    println ("\nMinimize: (x_0 - 3)^2 + (x_1 - 4)^2 + 1")

    def ff (x: VectorD): Double  = (x(0) - 3) * (x(0) - 3) + (x(1) - 4) * (x(1) - 4) + 1.0
    def gf (x: VectorD): VectorD = VectorD (6 - 2*x(0), 8 - 2*x(1))

    BFGS_LS.set_ff (ff)
    BFGS_LS.set_gf (gf)

    val n    = 2                           // the dimension of the search space
    val x    = VectorD (0, 0)              // the current location/point vector
    val f    = 26.0                        // the objective function value f(x)
    val g    = VectorD (-6, -8)            // the gradient vector at x
    val s    = VectorD (6, 8)              // the search direction (e.g., opposite g)
    val step = 0.2                         // the initial step size
    val xp   = VectorD (0, 0)              // the previous location/point vector
    val gp   = VectorD (0, 0)              // the previous gradient vector

    val code = BFGS_LS.line_search_backtracking (n, x, f, g, s, step, xp, gp)

    println (s"optimal solution x = $x with an objective value ff(x) = ${ff(x)}, with status code $code")

end bFGS_LSTest