//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 2.0 * @date Thu Jun 29 20:43:44 EDT 2023 * @see LICENSE (MIT style license file). * * @note Wolfe Conditions for Line Search Algorithms * * @see reference.wolfram.com/language/tutorial/UnconstrainedOptimizationStepControl.html * @see pages.cs.wisc.edu/~ferris/cs730/chap3.pdf * @see Limited memory BFGS for Nonsmooth Optimization */ package scalation package optimization import scala.math.abs import scalation.calculus.Differential.∇ import scalation.mathstat._ //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `WolfeConditions` class specifies conditions for inexact line search * algorithms to acceptable/near minimal point along a given search direction p * that exhibits * (1) SDC: sufficient decrease (f(x) enough less that f(0)) and * (2) CC: the slope at x is less steep than the slope at 0. * That is, the line search looks for a value for x satisfying the two Wolfe conditions. * * f(x) <= f(0) + c1 * f'(0) * x Wolfe condition 1 (Armijo condition) * f'(x) >= c2 * f'(0) Wolfe condition 2 (Weak version, more robust) * |f'(x)| <= c2 * |f'(0)| Wolfe condition 2 (Strong version) * * Note: c1 and c2 defaults below intended for Quasi Newton methods such as BFGS or L-BFGS * * @param f the objective/loss function to minimize (vector-to-scalar) * @param g the gradient of the objective/loss function (vector-to-vector) * @param c1 constant for sufficient decrease (Wolfe condition 1: .0001 to .001) * @param c2 constant for curvature/slope constraint (Wolfe condition 2: .9 to .8) */ class WolfeConditions (f: FunctionV2S, var g: FunctionV2V, c1: Double = 0.0001, c2: Double = 0.9): if g == null then g = (x: VectorD) => ∇ (f)(x) // no function for gradient => use numerical //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return whether Wolfe condition 1, the Sufficient Decrease Condition (SDC) is satisfied. * @param fx the functional value of the original point * @param fy the functional value of the new point y = x + p * a * @param a the displacement in the search direction * @param gxp the dot product of the gradient vector g(x) and the search vector p */ inline def wolfe1 (fx: Double, fy: Double, a: Double, gxp: Double): Boolean = fy <= fx + c1 * a * gxp //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return whether weak Wolfe condition 2, the Curvature Condition (CC) is satisfied. * @param p the search direction vector * @param gy the gradient at new point y = x + p * a * @param gxp the dot product of the gradient vector g(x) and the search vector p */ inline def wolfe2 (p: VectorD, gy: VectorD, gxp: Double): Boolean = (gy dot p) >= c2 * gxp //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return whether strong Wolfe condition 2, the Curvature Condition (CC) is satisfied. * @param p the search direction vector * @param gy the gradient at new point y = x + p * a * @param gxp the dot product of the gradient vector g(x) and the search vector p */ inline def wolfe2s (p: VectorD, gy: VectorD, gxp: Double): Boolean = abs (gy dot p) <= c2 * abs (gxp) //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Perform an inexact Line Search (LS) on the function f to find an approximate * local minima from the point x moving distance a (alpha) in the search direction * p, which satisfies both Wolfe Conditions, returning the displacement a and the * new point y = x + p * a. * @param x the current point * @param fx the functional value at x, f(x) * @param p the current search direction * @param step the initial step length */ def lsearch (x: VectorD, fx: Double, p: VectorD, step: Double = 1.0): (Double, VectorD) = ??? end WolfeConditions //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `wolfeConditionsTest` main function is used to test the `WolfeConditions` class. * > runMain scalation.optimization.wolfeConditionsTest */ @main def wolfeConditionsTest (): Unit = val aa = new VectorD (60) val ff = new VectorD (60) val ww = new VectorD (60) val x = VectorD (0.0, 0.0) // zero vector, the origin val p = VectorD (1.0, 1.0) // direction to search in banner ("Problem: (x_0 - 2)^2 + (x_1 - 3)^2 + 1") def f(x: VectorD): Double = (x(0) - 2.0) * (x(0) - 2.0) + (x(1) - 3.0) * (x(1) - 3.0) + 1.0 def g(x: VectorD): VectorD = VectorD (2.0 * (x(0) - 2.0), 2.0 * (x(1) - 3.0)) val wls = new WolfeLS2 (f, g) val fx = f(x) val gx = g(x) val gxp = gx dot p println (s"x = $x, fx = $fx, gx = $gx") for i <- 0 until 60 do val a = 0.1 * i val y = x + p * a val fy = f(y) val gy = g(y) val wolf1 = wls.wolfe1 (fx, fy, a, gxp) val wolf2 = wls.wolfe2 (p, gy, gxp) println (s"a = $a, y = $y, fy = $fy, wolf1 = $wolf1, wolf2 = $wolf2") aa(i) = a ff(i) = fy ww(i) = 2.0 + is (wolf1) + 2.0 * is (wolf2) end for new Plot (aa, ff, ww, "fy vs. a") end wolfeConditionsTest