//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 2.0 * @date Wed Jun 7 17:54:59 EDT 2023 * @see LICENSE (MIT style license file). * * @note BFGS, Quasi Method Method to Find Minima for Functions of Vectors * * @see web.stanford.edu/class/cme304/docs/newton-type-methods.pdf * @see `BFGS` for similar code that uses line-search */ package scalation package optimization package quasi_newton import scalation.calculus.Differential.∇ import scalation.mathstat._ import QNewton.aHi_inc //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `BFGS_NoLS` class is used to find optima for functions of vectors. * The solve method finds local optima using a Quasi Newton method that uses * the BFGS update to approximate the inverse Hessian. * * min f(x) where f: R^n -> R * * @see `BFGS` for one that uses a different line search. * @param f the vector to scalar function to find optima of * @param useLS whether to use Line Search (LS) */ class BFGS_NoLS (f: FunctionV2S, useLS: Boolean = false) extends Minimize: private val debug = debugf ("BFGS_NoLS", true) // debug function private val gradDesc = false // true = Gradient Descent // false => BFGS_NoLS Method //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Solve for an optima by finding a local optima close to the starting point/guess 'x0'. * This version numerically approximates the first derivatives. * @param x0 the starting point/guess * @param α the current learning rate */ def solve (x0: VectorD, α: Double = eta): FuncVec = val wls = new WolfeLS2 (f, null) // Wolfe Line Search val x = x0 // current point var f_x = f(x) // function value at x var df_x = ∇ (f)(x) // compute gradient, numerically val aHi = MatrixD.eye (x.dim, x.dim) // approximate Hessian inverse // start with identity matrix var it = 1 // iteration counter cfor (it <= MAX_IT && df_x.norm > EPS, it += 1) { debug ("solve", s"it = $it: f($x) = $f_x, df_x = $df_x, aHi = $aHi") val d = if gradDesc then df_x // direction = gradient else aHi * df_x // use approximate Hessian inverse (aHi) val s = if useLS then d * -wls.lsearch (x, -d)._1 // step vector using line sesrch else d * -α // step vector using learning rate x += s // new point: add step val df_x_ = df_x // save previous gradient df_x = ∇ (f)(x) // compute new gradient, numerically aHi += aHi_inc (aHi, s, df_x - df_x_) // update approximate Hessian inverse (aHi) f_x = f(x) // functional value } // cfor println (s"optimal solution x= $x, f = ${f(x)}") (f_x, x) end solve //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Solve for an optima by finding a local optima close to the starting point/guess 'x0'. * This version uses explicit functions for the gradient (partials derivatives) * @param x0 the starting point/guess * @param grad the gradient as explicit functions for partials * @param α the current learning rate */ def solve2 (x0: VectorD, grad: FunctionV2V, α: Double = eta): FuncVec = val wls = new WolfeLS2 (f, grad) // Wolfe Line Search val x = x0 // current point var f_x = f(x) // function value at x var df_x = grad (x) // compute gradient by function evaluation val aHi = MatrixD.eye (x.dim, x.dim) // approximate Hessian inverse // start with identity matrix var it = 1 // iteration counter cfor (it <= MAX_IT && df_x.norm > EPS, it += 1) { debug ("solve", s"it = $it: f($x) = $f_x, df_x = $df_x, aHi = $aHi") val d = if gradDesc then df_x // direction = gradient else aHi * df_x // use approximate Hessian inverse (aHi) val s = if useLS then d * -wls.lsearch (x, -d)._1 // step vector using line sesrch else d * -α // step vector using learning rate x += s // update new x val df_x_ = df_x // save previous gradient df_x = grad (x) // compute new gradient by function evaluation aHi += aHi_inc (aHi, s, df_x - df_x_) // update approximate Hessian inverse (aHi) f_x = f(x) // functional value } // cfor println (s"optimal solution x= $x, f = ${f(x)}") (f_x, x) end solve2 end BFGS_NoLS //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `bFGS_NoLSTest` main function is used to test the `BFGS_NoLS` class. * This test numerically approximates the derivatives to find minima. * f(x) = (x_0 - 3)^2 + (x_1 - 4)^2 + 1 * > runMain scalation.optimization.quasi_newton.bFGS_NoLSTest */ @main def bFGS_NoLSTest (): Unit = val eta = 0.5 // learning rate (may need adjustment) val n = 2 // dimension of the search space val x0 = new VectorD (n) // starting point (0, 0) banner ("Minimize: (x_0 - 3)^2 + (x_1 - 4)^2 + 1") def f (x: VectorD): Double = (x(0) - 3.0)~^2 + (x(1) - 4.0)~^2 + 1.0 val optimizer = new BFGS_NoLS (f) // use learning rate - needs a good eta // val optimizer = new BFGS_NoLS (f, true) // use Line Search val opt = optimizer.solve (x0, eta) println (s"][ optimal solution (f(x), x) = $opt") end bFGS_NoLSTest //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `bFGS_NoLSTest2` main function is used to test the `BFGS_NoLS` class. * This tests use of functions for partial derivatives to find minima. * f(x) = (x_0 - 3)^2 + (x_1 - 4)^2 + 1 * > runMain scalation.optimization.quasi_newton.bFGS_NoLSTest2 */ @main def bFGS_NoLSTest2 (): Unit = val eta = 0.5 // learning rate (may need adjustment) val n = 2 // dimension of the search space val x0 = new VectorD (n) // starting point (0, 0) banner ("Minimize: (x_0 - 3)^2 + (x_1 - 4)^2 + 1") def f (x: VectorD): Double = (x(0) - 3)~^2 + (x(1) - 4)~^2 + 1 def grad (x: VectorD): VectorD = VectorD (2 * x(0) - 6, 2 * x(1) - 8) val optimizer = new BFGS_NoLS (f) // use learning rate - needs a good eta // val optimizer = new BFGS_NoLS (f, true) // use Line Search val opt = optimizer.solve2 (x0, grad, eta) println (s"][ optimal solution (f(x), x) = $opt") end bFGS_NoLSTest2 //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `bFGS_NoLSTest3` main function is used to test the `BFGS_NoLS` class. * This test uses the Rosenbrock function. * f(x) = (1 - x_0)^2 + 100 (x_1 - x_0^2)^2 * > runMain scalation.optimization.quasi_newton.bFGS_NoLSTest3 */ @main def bFGS_NoLSTest3 (): Unit = val eta = 0.5 // learning rate (may need adjustment) val n = 2 // dimension of the search space val x0 = new VectorD (n) // starting point (0, 0) banner ("Minimize: (1 - x_0)^2 + 100 (x_1 - x_0^2)^2") def f (x: VectorD): Double = (1.0 - x(0))~^2 + 100.0 * (x(1) - x(0)~^2)~^2 def grad (x: VectorD): VectorD = VectorD (-2.0 * (1 - x(0)) - 400.0 * x(0) * (x(1) - x(0)~^2), 200.0 * (x(1) - x(0)~^2)) val optimizer = new BFGS_NoLS (f) // use learning rate - needs a good eta // val optimizer = new BFGS_NoLS (f, true) // use Line Search // val opt = optimizer.solve (x0, eta) // use numerical partials val opt = optimizer.solve2 (x0, grad, eta) // use functions for partials println (s"][ optimal solution (f(x), x) = $opt") end bFGS_NoLSTest3 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `bFGS_NoLSTest4` main function is used to test the `BFGS_NoLS` class on f(x): * f(x) = 1/x(0) + x_0^4 + (x_0 - 3)^2 + (x_1 - 4)^2 + 1 * > runMain scalation.optimization.quasi_newton.bFGS_NoLSTest4 */ @main def bFGS_NoLSTest4 (): Unit = val eta = 0.5 // learning rate (may need adjustment) val x0 = VectorD (0.1, 0.0) // starting point (.1, 0) banner ("Minimize: 1/x(0) + x_0^4 + (x_0 - 3)^2 + (x_1 - 4)^2 + 1") def f (x: VectorD): Double = 1/x(0) + x(0)~^4 + (x(0) - 3.0)~^2 + (x(1) - 4.0)~^2 + 1.0 def grad (x: VectorD): VectorD = VectorD (-(x(0)~^(-2)) + 4.0 * x(0)~^3 + 2 * x(0) - 6, 2 * x(1) - 8) // val optimizer = new BFGS_NoLS (f) // use learning rate - needs a good eta val optimizer = new BFGS_NoLS (f, true) // use Line Search // val opt = optimizer.solve (x0, eta) // use numerical partials val opt = optimizer.solve2 (x0, grad, eta) // use functions for partials println (s"][ optimal solution (f(x), x) = $opt") // opt = optimizer.resolve (n) // try multiple starting points // println (s"][ optimal solution (f(x), x) = $opt") end bFGS_NoLSTest4