//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author André Filipe Caldas Laranjeira * @version 2.0 * @note Fri Sep 8 14:20:11 EDT 2023 * @see LICENSE (MIT style license file). * * @note MoreThuente Line Search Implementation used by the L-BFGS Algorithm * * This Scala implementation was made based on the C implementation of the same * algorithm found in the link below. * * @see github.com/chokkan/liblbfgs */ package scalation package optimization package quasi_newton import scala.math.{abs, max, min, sqrt} import scalation.mathstat.VectorD //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `LBFGSMoreThuente` object implements the MoreThuente line search * algorithm for use in the native implementation of L-BFGS. */ object LBFGSMoreThuente extends LBFGSLineSearch: //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `LBFGSUpdateTriIntervalReturn` type is a union type representing * the return value for the `update_trial_interval` method implemented by * this line search algorithm. * * A successful execution should return `LineSearchTriInterval`, while * an execution with errors should return a `LBFGSReturnCode` error code. * Returning a `LBFGSReturnCode` success code triggers undefined * behavior. */ private type LBFGSUpdateTriIntervalReturn = LineSearchTriInterval | LBFGSReturnCode //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: override def lineSearch (n: Int, x: VectorD, f: Double, g: VectorD, s: VectorD, stp: Double, cd: LBFGSCallbackData, params: LBFGSLineSearchPrms, orthantWise: Option[OrthantWisePrms] = None): LBFGSLineSearchReturn = var count = 0 var errorCode: Option [LBFGSReturnCode] = None var dg = 0.0 var fxm, dgxm, fym, dgym, fm, dgm = 0.0 var ftest1 = 0.0 var stmin, stmax = 0.0 var xNew = x var gNew = g var fNew = f var stpNew = stp /* Check the input parameters for errors. */ if stp <= 0 then return LBFGSLineSearchFailure (LBFGSReturnCode.InvalidPrms, LBFGSLineSearchIncomplete (xNew, fNew)) /* Compute the initial gradient in the search direction. */ val dginit = g dot s /* Make sure that s points to a descent direction. */ if 0 < dginit then return LBFGSLineSearchFailure (LBFGSReturnCode.IncreaseGradient, LBFGSLineSearchIncomplete (xNew, fNew)) // Initialize local variables var brackt = false var stage1 = true val dgtest = params.ftol * dginit var width = params.maxStep - params.minStep var prev_width = 2.0 * width /* The variables stx, fx, dgx contain the values of the step, function, and directional derivative at the best step. The variables sty, fy, dgy contain the value of the step, function, and derivative at the other endpoint of the interval of uncertainty. The variables stp, f, dg contain the values of the step, function, and derivative at the current step. */ var stx, sty = 0.0 var fx, fy = f var dgx, dgy = dginit while true do // Set the minimum and maximum steps to correspond to the present interval of uncertainty. if brackt then stmin = min(stx, sty) stmax = max(stx, sty) else stmin = stx stmax = stpNew + 4.0 * (stpNew - stx) end if // Clip the step in the range of [stpmin, stpmax] if stpNew < params.minStep then stpNew = params.minStep if params.maxStep < stpNew then stpNew = params.maxStep // If an unusual termination is to occur then let stp be the lowest point obtained so far. if (brackt && ((stpNew <= stmin || stmax <= stpNew) || params.maxLineSearch <= count + 1 || errorCode.nonEmpty)) || (brackt && (stmax - stmin <= params.xtol * stmax)) then stpNew = stx end if // Compute the current value of xNew: xNew <- x + (*stp) * s. xNew = x + (s * stpNew) // Evaluate the function and gradient values val evaluationResults = cd.evaluationLogic.evaluate (cd.instance, xNew, cd.n, stpNew) fNew = evaluationResults.objFunctionValue gNew = evaluationResults.gradientVector dg = gNew dot s ftest1 = f + stpNew * dgtest count += 1 // Test for errors and convergence if brackt && ((stpNew <= stmin || stmax <= stpNew) || errorCode.nonEmpty) then // Rounding errors prevent further progress return LBFGSLineSearchFailure (LBFGSReturnCode.RoundingError, LBFGSLineSearchIncomplete (xNew, fNew)) if stpNew == params.maxStep && fNew <= ftest1 && dg <= dgtest then // The step is the maximum value return LBFGSLineSearchFailure (LBFGSReturnCode.MaximumStep, LBFGSLineSearchIncomplete (xNew, fNew)) if stpNew == params.minStep && (ftest1 < fNew || dgtest <= dg) then // The step is the minimum value return LBFGSLineSearchFailure (LBFGSReturnCode.MinimumStep, LBFGSLineSearchIncomplete (xNew, fNew)) if brackt && (stmax - stmin) <= params.xtol * stmax then // Relative width of the interval of uncertainty is at most xtol return LBFGSLineSearchFailure (LBFGSReturnCode.WidthTooSmall, LBFGSLineSearchIncomplete (xNew, fNew)) if params.maxLineSearch <= count then // Maximum number of iteration return LBFGSLineSearchFailure (LBFGSReturnCode.MaximumLineSearch, LBFGSLineSearchIncomplete (xNew, fNew)) if fNew <= ftest1 && abs(dg) <= params.gtol * (-dginit) then // The sufficient decrease condition and the directional derivative condition hold return LBFGSLineSearchStep (xNew, gNew, fNew, stpNew, count) /* In the first stage we seek a step for which the modified function has a non-positive value and non-negative derivative. */ if stage1 && fNew <= ftest1 && min(params.ftol, params.gtol) * dginit <= dg then stage1 = false /* A modified function is used to predict the step only if we have not obtained a step for which the modified function has a non-positive function value and non-negative derivative, and if a lower function value has been obtained but the decrease is not sufficient. */ if stage1 && ftest1 < fNew && fNew <= fx then // Define the modified function and derivative values fm = fNew - stpNew * dgtest fxm = fx - stx * dgtest fym = fy - sty * dgtest dgm = dg - dgtest dgxm = dgx - dgtest dgym = dgy - dgtest // Call update_trial_interval() to update the interval of uncertainty and to compute the new step. update_trial_interval ( stx, fxm, dgxm, sty, fym, dgym, stpNew, fm, dgm, stmin, stmax, brackt) match case trialInterval: LineSearchTriInterval => stx = trialInterval.x fxm = trialInterval.fx dgxm = trialInterval.dx sty = trialInterval.y fym = trialInterval.fy dgym = trialInterval.dy stpNew = trialInterval.t brackt = trialInterval.brackt case returnCode: LBFGSReturnCode => errorCode = Some(returnCode) // Reset the function and gradient values for f fx = fxm + stx * dgtest fy = fym + sty * dgtest dgx = dgxm + dgtest dgy = dgym + dgtest else // Call update_trial_interval() to update the interval of uncertainty and to compute the new step. update_trial_interval (stx, fx, dgx, sty, fy, dgy, stpNew, fNew, dg, stmin, stmax, brackt) match case trialInterval: LineSearchTriInterval => stx = trialInterval.x fx = trialInterval.fx dgx = trialInterval.dx sty = trialInterval.y fy = trialInterval.fy dgy = trialInterval.dy stpNew = trialInterval.t brackt = trialInterval.brackt case returnCode: LBFGSReturnCode => errorCode = Some(returnCode) end if // Force a sufficient decrease in the interval of uncertainty. if brackt then if 0.66 * prev_width <= abs(sty - stx) then stpNew = stx + 0.5 * (sty - stx) prev_width = width width = abs(sty - stx) end if end while LBFGSLineSearchFailure (LBFGSReturnCode.LogicError, LBFGSLineSearchIncomplete (xNew, fNew)) end lineSearch //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: private def update_trial_interval (x: Double, fx: Double, dx: Double, y: Double, fy: Double, dy: Double, t: Double, ft: Double, dt: Double, tmin: Double, tmax: Double, brackt: Boolean): LBFGSUpdateTriIntervalReturn = var newBrackt: Boolean = brackt var newx = x var newfx = fx var newdx = dx var newy = y var newfy = fy var newdy = dy var bound: Boolean = false val dsign: Boolean = (dt * (dx/abs(dx))) < 0 var mc: Double = 0.0 // minimizer of an interpolated cubic var mq: Double = 0.0 // minimizer of an interpolated quadratic var newt: Double = 0.0 // new trial value // Check the input parameters for errors if brackt then if t <= min(x, y) || max(x, y) <= t then return LBFGSReturnCode.OutOfInterval // trival value t is out of the interval if 0.0 <= dx * (t - x) then return LBFGSReturnCode.IncreaseGradient // function must decrease from x if tmax < tmin then return LBFGSReturnCode.IncorrectTMinMax // incorrect tmin and tmax specified end if // Trial value selection. if fx < ft then /* Case 1: a higher function value. The minimum is brackt. If the cubic minimizer is closer to x than the quadratic one, the cubic one is taken, else the average of the minimizers is taken. */ newBrackt = true bound = true mc = cubicMinimizer(x, fx, dx, t, ft, dt) mq = quadMinimizer(x, fx, dx, t, ft) newt = if abs(mc - x) < abs(mq - x) then mc else mc + 0.5 * (mq - mc) else if dsign then /* Case 2: a lower function value and derivatives of opposite sign. The minimum is brackt. If the cubic minimizer is closer to x than the quadratic (secant) one, the cubic one is taken, else the quadratic one is taken. */ newBrackt = true bound = false mc = cubicMinimizer(x, fx, dx, t, ft, dt) mq = quadMinimizer2(x, dx, t, dt) newt = if abs(mc - t) > abs(mq - t) then mc else mq else if abs(dt) < abs(dx) then /* Case 3: a lower function value, derivatives of the same sign, and the magnitude of the derivative decreases. The cubic minimizer is only used if the cubic tends to infinity in the direction of the minimizer or if the minimum of the cubic is beyond t. Otherwise the cubic minimizer is defined to be either tmin or tmax. The quadratic (secant) minimizer is also computed and if the minimum is brackt then the the minimizer closest to x is taken, else the one farthest away is taken. */ bound = true mc = cubicMinimizer2(x, fx, dx, t, ft, dt, tmin, tmax) mq = quadMinimizer2(x, dx, t, dt) if brackt then newt = if abs(t - mc) < abs(t - mq) then mc else mq else newt = if abs(t - mc) > abs(t - mq) then mc else mq end if else /* Case 4: a lower function value, derivatives of the same sign, and the magnitude of the derivative does not decrease. If the minimum is not brackt, the step is either tmin or tmax, else the cubic minimizer is taken. */ bound = false newt = if brackt then cubicMinimizer(t, ft, dt, y, fy, dy) else if x < t then tmax else tmin end if /* Update the interval of uncertainty. This update does not depend on the new step or the case analysis above. - Case a: if f(x) < f(t), x <- x, y <- t. - Case b: if f(t) <= f(x) && f'(t)*f'(x) > 0, x <- t, y <- y. - Case c: if f(t) <= f(x) && f'(t)*f'(x) < 0, x <- t, y <- x. */ if fx < ft then // Case a newy = t newfy = ft newdy = dt else if dsign then // Case c newy = x newfy = fx newdy = dx newx = t // Cases b and c newfx = ft newdx = dt end if // Clip the new trial value in [tmin, tmax] if tmax < newt then newt = tmax if newt < tmin then newt = tmin // Redefine the new trial value if it is close to the upper bound of the interval if newBrackt && bound then mq = newx + 0.66 * (newy - newx) if newx < newy then if mq < newt then newt = mq else if newt < mq then newt = mq end if // Return the new trial value LineSearchTriInterval (newx, newfx, newdx, newy, newfy, newdy, newt, newBrackt) end update_trial_interval //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Find a minimizer of an interpolated cubic function. * @param u The value of one point, u. * @param fu The value of f(u). * @param du The value of f'(u). * @param v The value of another point, v. * @param fv The value of f(v). * @param dv The value of f'(v). * @return Double The minimizer of the interpolated cubic. */ private inline def cubicMinimizer (u: Double, fu: Double, du: Double, v: Double, fv: Double, dv: Double): Double = val d = v - u val theta = (fu - fv) * 3/d + du + dv var p = abs(theta) var q = abs(du) var r = abs(dv) val s = max(p, max(q, r)) val a = theta / s var gamma = s * sqrt(a~^2 - (du/s) * (dv/ s)) if v < u then gamma = -gamma p = gamma - du + theta q = gamma - du + gamma + dv r = p / q u + r * d end cubicMinimizer //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Find a minimizer of an interpolated cubic function. * @param u The value of one point, u. * @param fu The value of f(u). * @param du The value of f'(u). * @param v The value of another point, v. * @param fv The value of f(v). * @param dv The value of f'(v). * @param xmin The minimum value. * @param xmax The maximum value. * @return Double The minimizer of the interpolated cubic. */ private inline def cubicMinimizer2 (u: Double, fu: Double, du: Double, v: Double, fv: Double, dv: Double, xmin: Double, xmax: Double): Double = val d = v - u val theta = (fu - fv) * 3/d + du + dv var p = abs(theta) var q = abs(du) var r = abs(dv) val s = max(p, max(q, r)) val a = theta / s var gamma = s * sqrt(max(0, a~^2 - (du/s) * (dv/ s))) if u < v then gamma = -gamma p = gamma - dv + theta q = gamma - dv + gamma + du r = p / q if r < 0 && gamma != 0 then v - r * d else if d > 0 then xmax else xmin end cubicMinimizer2 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Find a minimizer of an interpolated quadratic function. * @param u The value of one point, u. * @param fu The value of f(u). * @param du The value of f'(u). * @param v The value of another point, v. * @param fv The value of f(v). * @return Double The minimizer of the interpolated quadratic. */ private inline def quadMinimizer (u: Double, fu: Double, du: Double, v: Double, fv: Double): Double = val a = v - u u + du / ((fu - fv)/a + du)/2 * a end quadMinimizer //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Find a minimizer of an interpolated quadratic function. * @param u The value of one point, u. * @param du The value of f'(u). * @param v The value of another point, v. * @param dv The value of f'(v). * @return Double The minimizer of the interpolated quadratic. */ private inline def quadMinimizer2 (u: Double, du: Double, v: Double, dv: Double): Double = val a = u - v v + dv/(dv - du) * a end quadMinimizer2 end LBFGSMoreThuente