//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 2.0 * @date Mon Jun 19 01:30:58 EDT 2023 * @see LICENSE (MIT style license file). * * @note Modified Wolfe Line Search Optimizer * * @see arch.library.northwestern.edu/concern/generic_works/jm214p59n?locale=en */ package scalation package optimization import scala.math.abs import scalation.calculus.Differential.∇ import scalation.mathstat._ //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `WolfeLS3` class performs an inexact line search on f to find (1) a point x * that exhibits * (1) SDC: sufficient decrease (f(x) enough less that f(0)) and * (2) CC: the slope at x is less steep than the slope at 0. * That is, the line search looks for a value for x satisfying the two Wolfe conditions. * * f(x) <= f(0) + c1 * f'(0) * x Wolfe condition 1 (Armijo condition) * |f'(x)| <= |c2 * f'(0)| Wolfe condition 2 (Strong version) * f'(x) >= c2 * f'(0) Wolfe condition 2 (Weak version, more robust) * * The it uses bisection (or interpolative search) to find an approximate local minimal point. * Currently, the strong version is not supported. * Note: c1 and c2 defaults below intended for Quasi Newton methods such as BFGS or L-BFGS * * @param f the objective/loss function to minimize * @param g the gradient of the objective/loss function * @param c1 constant for sufficient decrease (Wolfe condition 1: .0001 to .001) * @param c2 constant for curvature/slope constraint (Wolfe condition 2: .9 to .8) * @param c3 constant for noise control condition * @param eg estimate of gradient noise */ class WolfeLS3 (f: FunctionV2S, var g: FunctionV2V, c1: Double = 0.0001, c2: Double = 0.9, c3: Double = 0.0, eg: Double = 0.0): private val debug = debugf ("WolfeLS3", true) // debug function private val MAX_IT = 30 // maximum number of iterations if g == null then g = (x: VectorD) => ∇ (f)(x) // no function for gradient => use numerical //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return whether Wolfe condition 1, the Sufficient Decrease Condition (SDC) is satisfied. * @param fx the functional value of the original point * @param fy the functional value of the new point y = x + p * a * @param a the displacement in the search direction * @param gxp the dot product of the gradient vector g(x) and the search vector p */ inline def wolfe1 (fx: Double, fy: Double, a: Double, gxp: Double): Boolean = fy <= fx + c1 * a * gxp //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return whether Wolfe condition 2, the Curvature Condition (CC) is satisfied. * @param p the search direction vector * @param gy the gradient at new point y = x + p * a * @param gxp the dot product of the gradient vector g(x) and the search vector p */ inline def wolfe2 (p: VectorD, gy: VectorD, gxp: Double): Boolean = (gy dot p) >= c2 * gxp //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return whether the noise control condition is satisfied. * @param p the search direction * @param gx the gradient at original point x * @param gy the gradient at new point y = x + p * a */ inline def noiseControl (p: VectorD, gx: VectorD, gy: VectorD): Boolean = if c3 > 0.0 then abs (gy - gx dot p) < 2 * (1 + c3) * eg * p.norm else false //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Two-Phase Armijo-Wolfe Line Search and Lengthening: Initial Phase, Algorithm 3.2 in * @see arch.library.northwestern.edu/concern/generic_works/jm214p59n?locale=en * @param x the current point * @param p the current search direction * @param step the initial step size */ def lsearch (x: VectorD, p: VectorD, step: Double = 1.0): (Double, Double) = var split = true // whether to do split phase val (fx, gx) = (f(x), g(x)) // function and gradient at x: f(x), g(x) val gxp = gx dot p // gradient dot search direction debug ("lsearch", s"x = $x, fx = $fx, gx = $gx, p = $p, gxp = $gxp") var a = step // start for a = step size var l = 0.0 // lower bound var u = Double.MaxValue // upper bound var (go, it) = (true, 0) cfor (go && it < MAX_IT, it += 1) { val y = x + p * a // new point val (fy, gy) = (f(y), g(y)) // function and gradient at y: f(y), g(y) val wolf1 = wolfe1 (fx, fy, a, gxp) // Wolfe condition 1: SDC val wolf2 = wolfe2 (p, gy, gxp) // Wolfe condition 2: CC debug ("lsearch", s"(it = $it) before a = $a, y = $y, fy = $fy, gy = $gy, wolf1 = $wolf1, wolf2 = $wolf2") if ! wolf1 then // Wolfe condition 1: SDC fails u = a a = (l + u) / 2.0 // backtrack else if noiseControl (gx, gy, p) then // noise control condition go = false else if ! wolf2 then // Wolfe condition 2: CC fails l = a a = if u == Double.MaxValue then 2.0 * a else (l + u) / 2.0 else split = false go = false end if debug ("lsearch", s"(it = $it) after a = $a in [$l, $u]") } // cfor val b = a if split then splitPhase (x, fx, gx, p, gxp, a, b) else (a, b) end lsearch //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Two-Phase Armijo-Wolfe Line Search and Lengthening: Split Phase, Algorithm 3.3 in * @see arch.library.northwestern.edu/concern/generic_works/jm214p59n?locale=en * @param x the initial point * @param fx the functional value at x * @param gx the gradient at x * @param p the search direction * @param gxp the dot product if g(x) and p * @param a_ the initial step length * @param b_ the initial lengthening parameter */ def splitPhase (x: VectorD, fx: Double, gx: VectorD, p: VectorD, gxp: Double, a_ : Double, b_ : Double): (Double, Double) = var (a, b) = (a_, b_) while val y = x + p * a // new point based on a val fy = f(y) // functional value debug ("splitPhase", s"a = $a, y = $y, fy = $fy") wolfe1 (fx, fy, a, gxp) == false // Wolfe condition 1: SDC do a /= 10.0 // backtrack while val y = x + p * a // new point based on b noiseControl (gx, g(y), p) // noise control condition do b *= 2 // lengthen step (a, b) end splitPhase end WolfeLS3 //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `wolfeLS3Test` main function is used to test the `WolfeLS3` class on scalar functions. * > runMain scalation.optimization.wolfeLS3Test */ @main def wolfeLS3Test (): Unit = val z0 = VectorD (0.0) // zero vector, the origin val p = VectorD (1.0) // direction to search in def f(x: VectorD): Double = (x(0) - 4.0) * (x(0) - 4.0) + 1.0 // no expansion phase // def f(x: VectorD): Double = (x(0) - 40.0) * (x(0) - 40.0) + 1.0 // requires expansion phase def g(x: VectorD): VectorD = VectorD (2.0 * (x(0) - 4.0)) val wls = new WolfeLS3 (f, g) val (a, b) = wls.lsearch (z0, p) val x = z0 + p * a println ("\nProblem 1: (x - 4)^2 + 1") println (s"start z0 = $z0, search direction = $p") println (s"displacement a = $a") println (s"lengthening b = $b") println (s"optimal x solution = $x") println (s"optimal f solution = ${f(x)}") end wolfeLS3Test //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `wolfeLS3Test2` main function is used to test the `WolfeLS3` class on scalar functions. * > runMain scalation.optimization.wolfeLS3Test2 */ @main def wolfeLS3Test2 (): Unit = val z0 = VectorD (0.0, 0.0) // zero vector, the origin val p = VectorD (1.0, 1.0) // direction to search in def f(x: VectorD): Double = (x(0) - 2.0) * (x(0) - 2.0) + (x(1) - 3.0) * (x(1) - 3.0) + 1.0 def g(x: VectorD): VectorD = VectorD (2.0 * (x(0) - 2.0), 2.0 * (x(1) - 3.0)) val yy = VectorD (0.5, 0.5) banner (s"yy = $yy, fyy = ${f(yy)}, gyy = ${g(yy)}") val wls = new WolfeLS3 (f, g) val (a, b) = wls.lsearch (z0, p) val x = z0 + p * a println ("\nProblem 1: (x_0 - 2)^2 + (x_1 - 3)^2 + 1") println (s"start z0 = $z0, search direction = $p") println (s"displacement a = $a") println (s"lengthening b = $b") println (s"optimal x solution = $x") println (s"optimal f solution = ${f(x)}") /* def f2(x: VectorD): Double = x(0)/4 + 5*x(0)~^2 + x(0)~^4 - 9*x(0)~^2*x(1) + 3*x(1)~^2 + 2*x(1)~^4 def g2(x: VectorD): VectorD = VectorD (0.25 + 10 * x(0) + 4.0 * x(0)~^3 - 18.0 * x(0) * x(1), 9.0 * x(0)~^2 + 6.0 * x(1) + 8.0 * x(1)~^3) val wls2 = new WolfeLS3 (f2, g2) val (a2, b2) = wls2.lsearch (z0, p) val x2 = z0 + p * a2 // @see http://math.fullerton.edu/mathews/n2003/gradientsearch/GradientSearchMod/Links/GradientSearchMod_lnk_5.html println ("\nProblem 4: x_0/4 + 5x_0^2 + x_0^4 - 9x_0^2 x_1 + 3x_1^2 + 2x_1^4") println (s"start z0 = $z0, search direction = $p") println (s"displacement a = $a2") println (s"lengthening b = $b2") println (s"optimal x solution = $x2") println (s"optimal f solution = ${f2(x2)}") */ end wolfeLS3Test2 //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `wolfeLS3Test3` main function is used to test the `WolfeLS3` class on vector functions. * This test uses the Rosenbrock function. * @see https://mikl.dk/post/2019-wolfe-conditions/ * > runMain scalation.optimization.wolfeLS3Test3 */ @main def wolfeLS3Test3 (): Unit = def f (x: VectorD): Double = (1.0 - x(0))~^2 + 100.0 * (x(1) - x(0)~^2)~^2 def g (x: VectorD): VectorD = VectorD (-2.0 * (1 - x(0)) - 400.0 * x(0) * (x(1) - x(0)~^2), 200 * (x(1) - x(0)~^2)) val z0 = VectorD (-2.2, 3) val p = VectorD (1625.6, 368) // -gradient at z0 val wls = new WolfeLS3 (f, g) val (a, b) = wls.lsearch (z0, p) val x = z0 + p * a println ("\nProblem 1: (1 - x_0)^2 + 100 (x_1 - x_0^2)^2") println (s"start z0 = $z0, search direction = $p") println (s"displacement a = $a") println (s"lengthening b = $b") println (s"optimal x solution = $x") println (s"optimal f solution = ${f(x)}") end wolfeLS3Test3 //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `wolfeLS3Test4` main function is used to test the `WolfeLS3` class on scalar functions. * > runMain scalation.optimization.wolfeLS3Test4 */ @main def wolfeLS3Test4 (): Unit = val aa = new VectorD (60) val ff = new VectorD (60) val ww = new VectorD (60) val x = VectorD (0.0, 0.0) // zero vector, the origin val p = VectorD (1.0, 1.0) // direction to search in banner ("Problem: (x_0 - 2)^2 + (x_1 - 3)^2 + 1") def f(x: VectorD): Double = (x(0) - 2.0) * (x(0) - 2.0) + (x(1) - 3.0) * (x(1) - 3.0) + 1.0 def g(x: VectorD): VectorD = VectorD (2.0 * (x(0) - 2.0), 2.0 * (x(1) - 3.0)) val wls = new WolfeLS3 (f, g) val fx = f(x) val gx = g(x) val gxp = gx dot p println (s"x = $x, fx = $fx, gx = $gx") for i <- 0 until 60 do val a = 0.1 * i val y = x + p * a val fy = f(y) val gy = g(y) val wolf1 = wls.wolfe1 (fx, fy, a, gxp) val wolf2 = wls.wolfe2 (p, gy, gxp) println (s"a = $a, y = $y, fy = $fy, wolf1 = $wolf1, wolf2 = $wolf2") aa(i) = a ff(i) = fy ww(i) = 2.0 + is (wolf1) + 2.0 * is (wolf2) end for new Plot (aa, ff, ww, "fy vs. a") end wolfeLS3Test4