//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author Hao Peng * @version 2.0 * @date Fri Oct 7 12:27:00 EDT 2017 * @see LICENSE (MIT style license file). * * @note Limited memory BFGS with Bounds (L-BFGS-B) * *------------------------------------------------------------------------------ * Limited memory Broyden–Fletcher–Goldfarb–Shanno (BFGS) for Bound constrained * optimization (L-BFGS-B) algorithm. Originally proposed by Byrd et. al in 1995. * See the first two links for the original paper and authors' software (written * in Fortran) distribution site, respectively. This implementation is translated * from a C++ implementation found in the last link. * * @see www.ece.northwestern.edu/~nocedal/PSfiles/limited.ps.gz * @see users.iems.northwestern.edu/~nocedal/lbfgsb.html * @see github.com/PatWie/CppNumericalSolvers/blob/master/include/cppoptlib/solver/lbfgsbsolver.h */ package scalation package optimization package quasi_newton import scala.collection.mutable.ArrayBuffer import scala.math.{abs, max, min} import scala.util.control.Breaks.{break, breakable} import scalation.calculus.Differential.∇ import scalation.mathstat._ import MatrixD.eye //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `LBFGS_B` the class implements the Limited memory Broyden–Fletcher– * Goldfarb–Shanno for Bound constrained optimization (L-BFGS-B) * Quasi-Newton Algorithm for solving Non-Linear Programming (NLP) problems. * L-BFGS-B determines a search direction by deflecting the steepest descent direction * vector (opposite the gradient) by * multiplying it by a matrix that approximates * the inverse Hessian. Furthermore, only a few vectors represent the approximation * of the Hessian Matrix (limited memory). The parameters estimated are also bounded * within user specified lower and upper bounds. * * minimize f(x) * subject to g(x) <= 0 [ optionally g(x) == 0 ] * * @param f the objective function to be minimized * @param g the constraint function to be satisfied, if any * @param ineq whether the constraint is treated as inequality (default) or equality * @param exactLS whether to use exact (e.g., `GoldenLS`) * or inexact (e.g., `WolfeLS`) Line Search * @param l vector of lower bounds for all input parameters * @param u vector of upper bounds for all input parameters */ class LBFGS_B (f: FunctionV2S, g: FunctionV2S = null, ineq: Boolean = true, exactLS: Boolean = false, private var l: VectorD = null, private var u: VectorD = null) extends Minimizer: private val debug = debugf ("L-BFGS", true) // the debug flag private val WEIGHT = 1000.0 // weight on penalty for constraint violation private var ww, mm: MatrixD = null // workspace matrices private var theta = 0.0 // a scaling parameter private var dim = 0 // dimension of the input vector private var hs = 5 // history size, number of historical vectors to store //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Sort pairs (k, v) according to v into ascending order. * @param v ArrayBuffer of Tuple2 to be sorted by the 2nd element */ private def sortIndices (v: ArrayBuffer [(Int, Double)]): VectorI = val sv = v.sortBy (_._2) val idx = new VectorI (sv.length) for i <- idx.indices do idx(i) = sv(i)._1 idx end sortIndices //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Algorithm CP: Computation of the Generalized Cauchy Point. See page 8 of * @see www.ece.northwestern.edu/~nocedal/PSfiles/limited.ps.gvz * @param x the parameter vector * @param gr the gradient vector */ private def getGCP (x: VectorD, gr: VectorD): (VectorD, VectorD) = debug ("getGCP", s"x = $x, gr = $gr") val setOfT = new ArrayBuffer [(Int, Double)] () val d = -gr for j <- 0 until dim do if gr(j) == 0 then setOfT.append ((j, MAX_VALUE)) else val tmp = if gr(j) < 0 then (x(j) - u(j)) / gr(j) else (x(j) - l(j)) / gr(j) setOfT.append ((j, tmp)) if tmp == 0 then d(j) = 0 end if end for val sortedIndices = sortIndices (setOfT) val xCauchy = x.copy val p = ww.transpose * d val c = new VectorD (ww.dim2) var fPrime = -d dot d var fDoublePrime = max (-theta * fPrime - (p dot mm * p), EPSILON) val f_dp_orig = fDoublePrime var dt_min = -fPrime / fDoublePrime var t_old = 0.0 var i = 0 breakable { for j <- 0 until dim do i = j if setOfT (sortedIndices(j))._2 > 0 then break () end for } // breakable var b = sortedIndices (i) var t = setOfT (b)._2 var dt = t while dt_min >= dt && i < dim do xCauchy(b) = if d(b) > 0 then u(b) else if d(b) < 0 then l(b) else xCauchy(b) val zb = xCauchy(b) - x(b) c += p * dt val wbt = ww(b) fPrime += dt * fDoublePrime + gr(b) * gr(b) + theta * gr(b) * zb - gr(b) * (wbt dot mm * c) fDoublePrime += -theta * gr(b) * gr(b) - 2.0 * (gr(b) * (wbt dot mm * p)) - gr(b) * gr(b) * (wbt dot mm * wbt) fDoublePrime = max (EPSILON * f_dp_orig, fDoublePrime) p += wbt * gr(b) d(b) = 0 dt_min = -fPrime / fDoublePrime t_old = t i += 1 if i < dim then b = sortedIndices (i) t = setOfT (b)._2 dt = t - t_old end if end while dt_min = max (dt_min, 0.0) t_old += dt_min for ii <- i until xCauchy.dim do val si = sortedIndices (ii) xCauchy(si) = x(si) + t_old * d(si) end for c += p * dt_min (xCauchy, c) end getGCP //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Find the alpha* parameter, a positive scalar. See Equation 5.8 on page 11 of * @see www.ece.northwestern.edu/~nocedal/PSfiles/limited.ps.gvz * @param x_cp vector of cauchy point * @param du vector containing intermediate results used to find alpha* * @param freeVar an ArrayBuffer storing the indices of free variable */ private def findAlpha (x_cp: VectorD, du: VectorD, freeVar: ArrayBuffer [Int]): Double = debug ("findAlpha", s"x_cp = $x_cp, du = $du, freeVar = $freeVar") var alphastar = 1.0 val n = freeVar.size assert (du.dim == n) for i <- 0 until n do val fi = freeVar(i) alphastar = if du(i) > 0 then min (alphastar, (u(fi) - x_cp(fi)) / du(i)) else min (alphastar, (l(fi) - x_cp(fi)) / du(i)) end for alphastar end findAlpha //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Minimization of the subspace of free variables. See Section 5 on page 9 of * @see www.ece.northwestern.edu/~nocedal/PSfiles/limited.ps.gvz * @param x the parameter vector * @param gr the gradient vector * @param xCauchy the vector of cauchy points * @param c vector obtained from getGCP used to initialize the subspace * minimization process */ private def subspaceMinimize (x: VectorD, gr: VectorD, xCauchy: VectorD, c: VectorD): VectorD = debug ("subspaceMinimize", s"x = $x, gr = $gr, xCauchy = $xCauchy, c = $c") val thetaInverse = 1.0 / theta val freeVarIdx = new ArrayBuffer [Int] () for i <- xCauchy.indices if xCauchy(i) != u(i) && xCauchy(i) != l(i) do freeVarIdx.append (i) val freeVarCount = freeVarIdx.size val wwzz = new MatrixD (ww.dim2, freeVarCount) for i <- 0 until freeVarCount do wwzz(?, i) = ww(freeVarIdx(i)) val rr = (gr + (xCauchy - x) * theta - ww * (mm * c)) val r = new VectorD (freeVarCount) for i <- 0 until freeVarCount do r(i) = rr(freeVarIdx(i)) var v = mm * (wwzz * r) var nn = wwzz * wwzz.transpose * thetaInverse nn = eye (nn.dim, nn.dim) - mm * nn val lu = new Fac_LU (nn) lu.factor () v = lu.solve (v) val du = r * -thetaInverse - wwzz.transpose * v * thetaInverse * thetaInverse val alpha_star = findAlpha (xCauchy, du, freeVarIdx) val dStar = du * alpha_star val subspaceMin = xCauchy.copy for i <- 0 until freeVarCount do subspaceMin (freeVarIdx(i)) += dStar(i) subspaceMin end subspaceMinimize //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Obtain the mean gradient norm squared * @param x the parameter vector * @param gr the gradient vector */ private def getMgn (x: VectorD, gr: VectorD): Double = val x_gr = x - gr val checkLower = VectorD (for i <- l.indices yield max (x_gr(i) , l(i))) val checkUpper = VectorD (for i <- u.indices yield min (checkLower(i), u(i))) val mgn = (checkUpper - x).normSq / dim mgn end getMgn //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Force the values within 'v' to stay within the pre-defined bounds. * @param v the Vector containing values to be adjusted */ private def forceBounds (v: VectorD): Unit = for i <- v.indices do if v(i) > u(i) then v(i) = u(i) else if v(i) < l(i) then v(i) = l(i) end for end forceBounds //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Modify the number of historical vectors to store. * @param hs_ the new history size */ def setHistorySize (hs_ : Int): Unit = { hs = hs_ } //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The objective function f plus a weighted penalty based on the constraint * function g. * @param x the coordinate values of the current point */ override def fg (x: VectorD): Double = val f_x = f(x) if g == null then // unconstrained f_x else // constrained, g(x) <= 0 val penalty = if ineq then max (g(x), 0.0) else abs (g(x)) f_x + abs (f_x) * WEIGHT * penalty * penalty end if end fg //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Perform an exact `GoldenSectionLS` or inexact `WolfeLS` Line Search. * Search in direction dir, returning the distance z to move in that direction. * @param x the current point * @param dir the direction to move in * @param step the initial step size */ def lineSearch (x: VectorD, dir: VectorD, step: Double = STEP): Double = debug ("linesearch", s"x = $x, dir = $dir, step = $step") def f_1D (z: Double): Double = fg(x + dir * z) // create a 1D function val ls = if exactLS then new GoldenSectionLS (f_1D ) // Golden Section Line Search else new WolfeLS (f_1D) // Wolfe line search ((c1 = .0001, c2 = .9) ls.search (step) // perform a Line Search end lineSearch //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Solve the following Non-Linear Programming (NLP) problem using L-BFGS_B: * min { f(x) | g(x) <= 0 }. * @param x0 the starting point * @param alphaInit the initial step size * @param toler the tolerance */ def solve (x0: VectorD, alphaInit : Double = STEP, toler: Double = TOL): FuncVec = debug ("solve", s"x0 = $x0, alphaInit = $alphaInit, toler = $toler") var best = (MAX_VALUE, VectorD.nullv) dim = x0.size theta = 1.0 if l == null then l = VectorD.fill (dim)(NEGATIVE_INFINITY) if u == null then u = VectorD.fill (dim)(POSITIVE_INFINITY) ww = new MatrixD (dim, 0) mm = new MatrixD (0, 0) val yHistory = new ArrayBuffer [VectorD] () val sHistory = new ArrayBuffer [VectorD] () var yHistoryMx: MatrixD = null var sHistoryMx: MatrixD = null var (x, gr) = (x0, ∇ (fg)(x0)) var fv = fg(x) var mgn = 0.0 var count = 0 val countMax = 10 breakable { for k <- 1 to MAX_IT do banner (s"solve: iteration $k: f(x) = $fv, x = $x") val f_old = fv val x_old = x val g_old = gr val mgn_old = mgn // STEP 2: compute the cauchy point val (xCauchy, c) = getGCP (x, gr) forceBounds (xCauchy) // STEP 3: compute a search direction d_k by the primal method for the sub-problem val subspaceMin = subspaceMinimize (x, gr, xCauchy, c) forceBounds (subspaceMin) // STEP 4: perform linesearch val rate = lineSearch (x, subspaceMin-x, alphaInit) // STEP 5: compute gradient x = x - (x - subspaceMin) * rate // update current guess and function information forceBounds (x) fv = fg(x) if blown ((fv, x)) then { best = better ((f_old, x_old), best); break () } gr = ∇ (fg)(x) mgn = getMgn (x, gr) if mgn < toler || count > countMax then { best = better ((fv, x), best); break () } if abs (mgn - mgn_old) < toler then count += 1 val newY = gr - g_old // prepare for next iteration val newS = x - x_old // STEP 6 val test = abs (newS dot newY) if test > EPSILON * newY.normSq then if yHistory.size >= hs then { yHistory.remove (0); sHistory.remove (0) } yHistory append newY sHistory append newS theta = (newY dot newY) / (newY dot newS) // STEP 7 yHistoryMx = MatrixD (yHistory).transpose sHistoryMx = MatrixD (sHistory).transpose ww = yHistoryMx ++^ (sHistoryMx * theta) val aa = sHistoryMx.transpose * yHistoryMx val ll = aa.lower ll(?, ?) = 0.0 // set ll's diagonal to 0 val dd = new MatrixD (aa.dim, aa.dim2) dd.setDiag (-aa(?)) // set dd diangonal to aa's val mm2 = (dd ++^ ll.transpose) ++ (ll ++^ (sHistoryMx.transpose * sHistoryMx * theta)) mm = Fac_LU.inverse (mm2)() end if debug ("solve", s"(k = $k) move from $x_old to $x where fg(x) = $fv") best = better ((fv, x), best) if abs (f_old - fv) < toler then break () // successive function values too similar end for } // breakable banner (s"solve: optimal solution = $best") best end solve end LBFGS_B //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `lBFGS_BTest` main function is used to test the `LBFGS_B` class. * f(x) = (x_0 - 3)^2 + (x_1 - 4)^2 + 1 * > runMain scalation.optimization.quasi_newton.lBFGS_BTest */ @main def lBFGS_BTest (): Unit = val n = 2 val x0 = new VectorD (n) def f (x: VectorD): Double = (x(0) - 3)~^2 + (x(1) - 4)~^2 + 1 banner ("Minimize (no bounds): (x_0 - 3)^2 + (x_1 - 4)^2 + 1") var optimizer = new LBFGS_B (f) var opt = optimizer.solve (x0) println (s"o][ ptimal solution (x, f(x)) = $opt") banner ("Minimize (bounds): (x_0 - 3)^2 + (x_1 - 4)^2 + 1") val l = VectorD.fill (x0.dim)(3.5) val u = VectorD.fill (x0.dim)(5.0) optimizer = new LBFGS_B (f, l = l, u = u) opt = optimizer.solve (x0) println (s"][ optimal solution (x, f(x)) = $opt") end lBFGS_BTest //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `lBFGS_BTest2` main function is used to test the `LBFGS_B` class. * f(x) = x_0^4 + (x_0 - 3)^2 + (x_1 - 4)^2 + 1 * > runMain scalation.optimization.quasi_newton.lBFGS_BTest2 */ @main def lBFGS_BTest2 (): Unit = val n = 2 val x0 = new VectorD (n) def f (x: VectorD): Double = x(0)~^4 + (x(0) - 3)~^2 + (x(1) - 4)~^2 + 1 banner ("Minimize (no bounds): x_0^4 + (x_0 - 3)^2 + (x_1 - 4)^2 + 1") var optimizer = new LBFGS_B (f) var opt = optimizer.solve (x0) println (s"][ optimal solution (x, f(x)) = $opt") banner ("Minimize (bounds): x_0^4 + (x_0 - 3)^2 + (x_1 - 4)^2 + 1") val l = VectorD.fill (x0.dim)(3.5) val u = VectorD.fill (x0.dim)(5.0) optimizer = new LBFGS_B (f, l = l, u = u) opt = optimizer.solve (x0) println (s"][ optimal solution (x, f(x)) = $opt") end lBFGS_BTest2 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `lBFGS_BTest3` main function is used to test the `LBFGS_B` class. * f(x) = 1/x_0 + x_0^4 + (x_0 - 3)^2 + (x_1 - 4)^2 + 1 * > runMain scalation.optimization.quasi_newton.lBFGS_BTest3 */ @main def lBFGS_BTest3 (): Unit = val n = 2 val x0 = VectorD (0.1, 0.0) def f (x: VectorD): Double = 1/x(0) + x(0)~^4 + (x(0) - 3)~^2 + (x(1) - 4)~^2 + 1 banner ("Minimize (no bounds): 1/x_0 + x_0^4 + (x_0 - 3)^2 + (x_1 - 4)^2 + 1") var optimizer = new LBFGS_B (f) var opt = optimizer.solve (x0) println (s"][ optimal solution (x, f(x)) = $opt") opt = optimizer.resolve (n) println (s"][ optimal solution (x, f(x)) = $opt") banner ("Minimize (bounds): 1/x_0 + x_0^4 + (x_0 - 3)^2 + (x_1 - 4)^2 + 1") val l = VectorD.fill (x0.dim)(3.5) val u = VectorD.fill (x0.dim)(5.0) optimizer = new LBFGS_B (f, l = l, u = u) opt = optimizer.solve (x0) println (s"][ optimal solution (x, f(x)) = $opt") end lBFGS_BTest3