//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 2.0 * @date Mon Jul 12 16:13:47 EDT 2021 * @see LICENSE (MIT style license file). * * @note More-Thuente Line Search Algorithm * * @see www.ii.uib.no/~lennart/drgrad/More1994.pdf */ package scalation package optimization import scala.math.{abs, max, min, sqrt} import scalation.mathstat.VectorD //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return true if variables x and y have different signs. * in C: #define fsigndiff(x, y) (*(x) * (*(y) / fabs(*(y))) < 0.) * @param x the first variable (double) * @param y the second variable (double) */ inline def fsigndiff (x: Double, y: Double): Boolean = x * (y / abs (y)) < 0.0 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `MoreThuenteLS` class ... * @param f the objective/loss function to minimize (vector-to-scalar) * @param g the gradient of the objective/loss function (vector-to-vector) * @param c1 constant for sufficient decrease (Wolfe condition 1: .0001 to .001) * @param c2 constant for curvature/slope constraint (Wolfe condition 2: .9 to .8) */ class MoreThuenteLS (f: FunctionV2S, g: FunctionV2V, c1: Double = 0.0001, c2: Double = 0.0, minStep = 0.01, maxStep: Double = 100.0, xtol: Double = 0.0001, ftol: Double = 0.0001, gtol: Double = 0.0001) extends WolfeConditions (f, g, c1, c2): private val debug = debugf ("MoreThuenteLS", true) // debug function private val flaw = flawf ("MoreThuenteLS") // flaw function private var brackt = false // not bracketed yet private var stmin = 0.0 // minimum step length private var stmax = PositiveInfinity // maximum step length //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Set the minimum and maximum steps to correspond to the present interval of uncertainty. * @param stx * @param sty */ inline def setInterval (brack: Boolean, stx: Double, sty: Double): Unit = if brackt then stmin = min (stx, sty) stmax = max (stx, sty) else stmin = stx stmax = step + 4.0 * (step - stx) end if end setInterval //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Perform an inexact Line Search (LS) on the function f to find an approximate * local minima from the point x moving distance a (alpha) in the search direction * p, which satisfies both Wolfe Conditions, returning the displacement a and the * new point y = x + p * a. * @param x the current point * @param fx the functional value at x, f(x) * @param p the current search direction * @param step the initial step length */ def lsearch (x: VectorD, fx: Double, p: VectorD, step: Double = 1.0): (Double, VectorD) = val gx = g(x) // gradient at x val gxp = gx dot p // initial gradient in search direction var stage1 = true // in stage 1 if step <= 0.0 then flaw ("lsearch", "step size must be strictly positive") if gxp > 0 then flaw ("lsearch", "p must be descent direction") //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Improve the current point x by moving in the search direction s. * @param n the dimension of search space * @param x_ the current location/point vector * @param f_ the value of the objective function * @param g_ the gradient vector * @param s the search direction * @param step_ the current step size * @param xp_ the previous location/point * @param gp the previous gradient vector * @param wp not used by this algorithm (use default) */ def lsearch (n: Int, x_ : VectorD, f_ : Double, g_ : VectorD, s: VectorD, step_ : Double, xp_ : VectorD, gp: VectorD, wp: VectorD = null): (Double, Double, Int) = debug ("lsearch", s"linesearch from x_ = $x_") var x = x_ var f = f_ var g = g_ var a = step var xp = xp_ var count = 0 var brackt = false var stage1 = false var uinfo = 0 var dg = 0.0 var stx, fx, dgx = 0.0 var sty, fy, dgy = 0.0 var fxm, dgxm, fym, dgym, fm, dgm = 0.0 var finit, ftest1, dginit, dgtest = 0.0 var width, prev_width = 0.0 var stmin, stmax = 0.0 // Initialize local variables. finit = f dgtest = param.ftol * dginit width = param.max_step - param.min_step prev_width = 2.0 * width */ fx = finit fy = finit dgx = dginit dgy = dginit // Variables a, f, dg contain the values of the step, function, and derivative at the current step. var (a, fx, dgx) = (step, f(x), g(x)) // Variables stx, fx, dgx contain the values of the step, function, and directional derivative at the best step. var (stx, fx, dgx) = (0.0, finit, dginit) // Variables sty, fy, dgy contain the value of the step, function, and derivative at the other endpoint of the interval of uncertainty. var (sty, fy, dgy) = (0.0, finit, dginit) var (go, it) = (true, 0) cfor (go && it < MAX_IT, it += 1) { var uinfo = 0 // Clip the step a to be in the range of [minStep, maxStep] if a < minStep then s = minStep if a > maxStep then a = maxStep // If an unusual termination is to occur then let step be the lowest point obtained so far. if (brackt && ((a <= stmin || stmax <= a) || it >= MAX_IT || uinfo != 0)) || (brackt && (stmax - stmin <= xtol * stmax)) then a = stx end if val y = x + p * a // new point: x + search dir * step val fy = f(y) // new functional value val gy = g(y) // new gradient val gyp = gy dot p // dotted with search direction p val wolf1 = wolfe1 (fx, fy, a, gxp) // is Wolfe Condition 1 satisfied // Test for errors and convergence if brackt && ((a <= stmin || stmax <= a) || uinfo != 0) then flaw ("lsearch", "rounding errors prevent further progress") go = false if a == maxStep && f <= ftest1 && dg <= dgtest then flaw ("lsearch", "the step size is at its maximum value") go = false if a == minStep && (ftest1 < f || dgtest <= dg) then flaw ("lsearch", "the step size is at its minimum value") go = false if brackt && (stmax - stmin) <= xtol * stmax then flaw ("lsearch", "width of the interval of uncertainty is too small") go = false if f <= ftest1 && abs (dg) <= gtol * (-dginit) then println ("lsearch: done as sufficient decrease cond. and the directional deriv cond. hold. go = false // In the first stage we seek a step for which the modified // function has a nonpositive value and nonnegative derivative. if stage1 && f <= ftest1 && min (ftol, gtol) * dginit <= dg then stage1 = false /*********** // A modified function is used to predict the step only if we have not obtained a step // for which the modified function has a nonpositive function value and nonnegative derivative, // and if a lower function value has been obtained but the decrease is not sufficient. if stage1 && ftest1 < f && f <= fx then // define the modified function and derivative values. val fm = f - a * dgtest val fxm = fx - stx * dgtest val fym = fy - sty * dgtest val gm = d - dgtest val gxpm = gxp - dgtest val gypm = gyp - dgtest // Call update_trial_interval() to update the interval of uncertainty and to compute new step uinfo = update_trial_interval (stx, fxm, gxpm, sty, fym, gypm, a, fm, gm, stmin, stmax, brackt) // Reset the function and gradient values for f fx = fxm + stx * dgtest fy = fym + sty * dgtest gxp = dgxm + dgtest gyp = dgym + dgtest else // Call update_trial_interval() to update the interval of uncertainty and to compute new step. uinfo = update_trial_interval (stx, fx, gxp, sty, fy, gyp, a, f, g, stmin, stmax, brackt) end if ***********/ // Call update_trial_interval() to update the interval of uncertainty and to compute new step. uinfo = update_trial_interval (stx, fx, gxp, sty, fy, gyp, a, f, g, stmin, stmax, brackt) // Force a sufficient decrease in the interval of uncertainty if brackt then if 0.66 * prev_width <= abs (sty - stx) then a = stx + 0.5 * (sty - stx) prev_width = width width = abs (sty - stx) end if } // cfor (a, y) end lsearch end MoreThuenteLS //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Update a safeguarded trial value and interval for line search. *------------------------------------------------------------------------------ * The parameter x represents the step with the least function value. * The parameter t represents the current step. This function assumes * that the derivative at the point of x in the direction of the step. * If the bracket is set to true, the minimizer has been bracketed in * an interval of uncertainty with endpoints between x and y. *------------------------------------------------------------------------------ * @see Jorge J. More and David J. Thuente. Line search algorithm with * guaranteed sufficient decrease. ACM Transactions on Mathematical * Software (TOMS), Vol 20, No 3, pp. 286-307, 1994. *------------------------------------------------------------------------------ * @param x The pointer to the value of one endpoint. * @param fx The pointer to the value of f(x). * @param dx The pointer to the value of f'(x). * @param y The pointer to the value of another endpoint. * @param fy The pointer to the value of f(y). * @param dy The pointer to the value of f'(y). * @param t The pointer to the value of the trial value, t. * @param ft The pointer to the value of f(t). * @param dt The pointer to the value of f'(t). * @param tmin The minimum value for the trial value, t. * @param tmax The maximum value for the trial value, t. * @param brackt The pointer to the predicate if the trial value is bracketed. * @return int Status value. Zero indicates a normal termination. */ def update_trial_interval (x_ : Double, fx_ : Double, dx_ : Double, y_ : Double, fy_ : Double, dy_ : Double, t_ : Double, ft: Double, dt: Double, tmin: Double, tmax: Double, brackt_ : Boolean): Int = var x = x_ var fx = fx_ var dx = dx_ var y = y_ var fy = fy_ var dy = dy_ var t = t_ var brackt = brackt_ var bound = false var dsign = fsigndiff (dt, dx) var mc = 0.0 // minimizer of an interpolated cubic. var mq = 0.0 // minimizer of an interpolated quadratic. var newt = 0.0 // new trial value. // Check the input parameters for errors. if brackt then if t <= min (x, y) || max (x, y) <= t then // The trival value t is out of the interval. return LBFGSERR_OUTOFINTERVAL.code if 0.0 <= dx * (t - x) then // The function must decrease from x. return LBFGSERR_INCREASEGRADIENT.code if tmax < tmin then // Incorrect tmin and tmax specified. return LBFGSERR_INCORRECT_TMINMAX.code end if // Trial value selection. if fx < ft then /* Case 1: a higher function value. The minimum is brackt. If the cubic minimizer is closer to x than the quadratic one, the cubic one is taken, else the average of the minimizers is taken. */ brackt = true bound = true mc = cubic_minimizer (mc, x, fx, dx, t, ft, dt) mq = quad_minimizer (mq, x, fx, dx, t, ft) newt = if abs (mc - x) < abs (mq - x) then mc else mc + 0.5 * (mq - mc) else if dsign then /* Case 2: a lower function value and derivatives of opposite sign. The minimum is brackt. If the cubic minimizer is closer to x than the quadratic (secant) one, the cubic one is taken, else the quadratic one is taken. */ brackt = true bound = false mc = cubic_minimizer (mc, x, fx, dx, t, ft, dt) mq = quad_minimizer2 (mq, x, dx, t, dt) newt = if abs (mc - t) > abs (mq - t) then mc else mq else if abs (dt) < abs (dx) then /* Case 3: a lower function value, derivatives of the same sign, and the magnitude of the derivative decreases. The cubic minimizer is only used if the cubic tends to infinity in the direction of the minimizer or if the minimum of the cubic is beyond t. Otherwise the cubic minimizer is defined to be either tmin or tmax. The quadratic (secant) minimizer is also computed and if the minimum is brackt then the the minimizer closest to x is taken, else the one farthest away is taken. */ bound = true mc = cubic_minimizer2 (mc, x, fx, dx, t, ft, dt, tmin, tmax) mq = quad_minimizer2 (mq, x, dx, t, dt) if brackt then newt = if abs (t - mc) < abs (t - mq) then mc else mq else newt = if abs (t - mc) > abs (t - mq) then mc else mq end if else /* Case 4: a lower function value, derivatives of the same sign, and the magnitude of the derivative does not decrease. If the minimum is not brackt, the step is either tmin or tmax, else the cubic minimizer is taken. */ bound = false newt = if brackt then cubic_minimizer (newt, t, ft, dt, y, fy, dy) else if x < t then tmax else tmin end if /* Update the interval of uncertainty. This update does not depend on the new step or the case analysis above. - Case a: if f(x) < f(t), x <- x, y <- t. - Case b: if f(t) <= f(x) && f'(t)*f'(x) > 0, x <- t, y <- y. - Case c: if f(t) <= f(x) && f'(t)*f'(x) < 0, x <- t, y <- x. */ if fx < ft then // Case a y = t; fy = ft; dy = dt else // Case c if dsign then y = x; fy = fx; dy = dx end if // Cases b and c x = t; fx = ft; dx = dt end if // Clip the new trial value in [tmin, tmax]. if tmax < newt then newt = tmax if newt < tmin then newt = tmin // Redefine the new trial value if it is close to the upper bound of the interval. if brackt && bound then mq = x + 0.66 * (y - x) if x < y then if mq < newt then newt = mq else if newt < mq then newt = mq end if end if // Return the new trial value. t = newt 0 // return success end update_trial_interval //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the 1-norm of vector x. * @param x the vector whose 1-norm is sought * @param start the start index * @param n the end index */ def owlqn_x1norm (x: VectorD, start: Int, n: Int): Double = var norm = 0.0 for i <- start until n do norm += abs (x(i)) norm end owlqn_x1norm //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Project ... * @param d the distance vector * @param sign the sign vector * @param start the start index * @param end_ the end index */ def owlqn_project (d: VectorD, sign: VectorD, start: Int, end_ : Int): Unit = for i <- start until end_ do if d(i) * sign(i) <= 0.0 then d(i) = 0.0 end for end owlqn_project //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `bFGS_LSTest` main function tests the `BFGSLS` object. * > runMain scalation.optimization.bFGS_LSTest */ @main def bFGS_LSTest (): Unit = println ("\nMinimize: (x_0 - 3)^2 + (x_1 - 4)^2 + 1") def ff (x: VectorD): Double = (x(0) - 3) * (x(0) - 3) + (x(1) - 4) * (x(1) - 4) + 1.0 def gf (x: VectorD): VectorD = VectorD (6 - 2*x(0), 8 - 2*x(1)) BFGS_LS.set_ff (ff) BFGS_LS.set_gf (gf) val n = 2 // the dimension of the search space val x = VectorD (0, 0) // the current location/point vector val f = 26.0 // the objective function value f(x) val g = VectorD (-6, -8) // the gradient vector at x val s = VectorD (6, 8) // the search direction (e.g., opposite g) val step = 0.2 // the initial step size val xp = VectorD (0, 0) // the previous location/point vector val gp = VectorD (0, 0) // the previous gradient vector val code = BFGS_LS.line_search_backtracking (n, x, f, g, s, step, xp, gp) println (s"optimal solution x = $x with an objective value ff(x) = ${ff(x)}, with status code $code") end bFGS_LSTest