//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Sun Dec 28 12:00:07 EST 2014 * @see LICENSE (MIT style license file). * * @title Model Support: Activation Functions */ package scalation.analytics import scala.math.{exp, log, max, tanh} import scalation.linalgebra.{FunctionM_2M, FunctionV_2V, matrixize, vectorize} import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD} import scalation.math.FunctionS2S import scalation.plot.Plot //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `AFF` class holds an Activation Function Family (AFF). * @param f the activation function itself * @param fV the vector version of the activation function * @param fM the matrix version of the activation function * @param dV the vector version of the activation function derivative * @param dM the matrix version of the activation function derivative * @param bounds the (lower, upper) bounds on the range of the activation function */ case class AFF (f: FunctionS2S, fV: FunctionV_2V, fM: FunctionM_2M, dV: FunctionV_2V, dM: FunctionM_2M, bounds: PairD = null) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ActivationFun` object contains common Activation functions and provides * both scalar and vector versions. * @see en.wikipedia.org/wiki/Activation_function * Convention: fun activation function (e.g., sigmoid) * funV vector version of activation function (e.g., sigmoidV) * funM matrix version of activation function (e.g., sigmoidM) * funDV vector version of dervivative (e.g., sigmoidDV) * funDM matrix version of dervivative (e.g., sigmoidDM) *---------------------------------------------------------------------------------- * Supports: id, reLU, lreLU, eLU, tanh, sigmoid, gaussian, softmax * Related functions: logistic, logit */ object ActivationFun { //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // id: Identity functions //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the value of the Identity 'id' function at scalar 't'. * @param t the id function argument */ def id (t: Double): Double = t def idV (tt: VectoD): VectoD = tt // vector version def idM (tt: MatriD): MatriD = tt // matrix version //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the derivative vector for 'id' function at vector 'yp' where * 'yp' is pre-computed by 'yp = idV (tt)'. * @param yp the derivative function vector argument */ def idDV (yp: VectoD): VectoD = VectorD.one (yp.dim) val idDM: FunctionM_2M = matrixize (idDV _) // matrix version val f_id = AFF (id, idV, idM, idDV, idDM) // id family //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // reLU: Rectified Linear Unit functions //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the value of the Rectified Linear Unit 'reLU' function at scalar 't'. * @param t the reLU function argument */ def reLU (t: Double): Double = max (0.0, t) val reLUV: FunctionV_2V = vectorize (reLU _) // vector version val reLUM: FunctionM_2M = matrixize (reLUV) // matrix version //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the derivative vector for 'reLU' function at vector 'yp' where * 'yp' is pre-computed by 'yp = reLUV (tt)'. * @param yp the derivative function vector argument */ def reLUDV (yp: VectoD): VectoD = yp.map (y => if (y >= 0.0 ) 1.0 else 0.0) val reLUDM: FunctionM_2M = matrixize (reLUDV _) // matrix version val f_reLU = AFF (reLU, reLUV, reLUM, reLUDV, reLUDM) // reLU family //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // lreLU: Leaky Rectified Linear Unit functions // private var a = 0.01 // the lreLU alpha parameter (0, 1] indicating how leaky the function is private var a = 0.3 // the lreLU alpha parameter (0, 1] default values used in Keras // @see keras.io/layers/advanced-activations //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Set the lreLU 'a' (alpha) parameter for the Leaky Rectified Linear Unit functions. * @param a the rleLU alpha parameter (0, 1] indicating how leaky the function is */ def setA (a_ : Double) { if (a > 1.0) println ("setA: the lreLU 'a' (alpha) parameter cannot be greater than 1") else a = a_ } // setA //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the value of the Leaky Rectified Linear Unit 'lreLU' function at scalar 't'. * @param t the lreLU function argument */ def lreLU (t: Double): Double = max (a * t, t) val lreLUV: FunctionV_2V = vectorize (lreLU _) // vector version val lreLUM: FunctionM_2M = matrixize (lreLUV) // matrix version //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the derivative vector for 'lreLU' function at vector 'yp' where * 'yp' is pre-computed by 'yp = lreLUV (tt)'. * @param yp the derivative function vector argument */ def lreLUDV (yp: VectoD): VectoD = yp.map (y => if (y >= 0.0 ) 1.0 else a) val lreLUDM: FunctionM_2M = matrixize (lreLUDV _) // matrix version val f_lreLU = AFF (lreLU, lreLUV, lreLUM, lreLUDV, lreLUDM) // lreLU family //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // eLU: Exponential Linear Unit functions // @see arxiv.org/pdf/1511.07289.pdf private var a2 = 1.0 // the eLU alpha parameter (0, infinity) indicating how leaky the function is //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Set the eLU 'a2' (alpha) parameter for the Exponential Linear Unit functions. * @param a_ the eLU alpha parameter (0, infinity) indicating how leaky the function is */ def setA2 (a_ : Double) { a2 = a_ } // setA2 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the value of the Exponential Linear Unit 'eLU' function at scalar 't'. * @param t the eLU function argument */ def eLU (t: Double): Double = if (t > 0.0 ) t else a2 * (exp (t) - 1) val eLUV: FunctionV_2V = vectorize (eLU _) // vector version val eLUM: FunctionM_2M = matrixize (eLUV) // matrix version //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the derivative vector for 'eLU' function at vector 'yp' where * 'yp' is pre-computed by 'yp = eLUV (tt)'. * @param yp the derivative function vector argument */ def eLUDV (yp: VectoD): VectoD = yp.map (y => if (y > 0.0 ) 1.0 else y + a2) val eLUDM: FunctionM_2M = matrixize (eLUDV _) // matrix version val f_eLU = AFF (eLU, eLUV, eLUM, eLUDV, eLUDM) // eLU family //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // tanh: Hyperbolic Tangent functions //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /* Compute the value of the 'tanh' function at scalar 't'. * @param t the tanh function argument */ // @see scala.math.tanh //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the vector of values of the 'tanh' function applied to vector 'tt'. * @param tt the tanh function vector argument */ def tanhV (tt: VectoD): VectoD = tt.map (t => tanh (t)) val tanhM: FunctionM_2M = matrixize (tanhV _) // matrix version //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the derivative vector for 'tanh' function at vector 'yp' where * 'yp' is pre-computed by 'yp = tanhV (tt)'. * @param yp the derivative function vector argument */ def tanhDV (yp: VectoD): VectoD = VectorD.one (yp.dim) - yp~^2 val tanhDM: FunctionM_2M = matrixize (tanhDV _) // matrix version val f_tanh = AFF (tanh, tanhV, tanhM, tanhDV, tanhDM, (-1, 1)) // tanh family //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // sigmoid: Sigmoid functions //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the value of the Sigmoid function at 't'. This is a special case of * the logistic function, where 'a = 0' and 'b = 1'. It is also referred to as * the standard logistic function. It is also the inverse of the logit function. * @param t the sigmoid function argument */ def sigmoid (t: Double): Double = 1.0 / (1.0 + exp (-t)) val sigmoidV: FunctionV_2V = vectorize (sigmoid _) // vector version val sigmoidM: FunctionM_2M = matrixize (sigmoidV) // matrix version //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the derivative vector for 'sigmoid' function at vector 'yp' where * 'yp' is pre-computed by 'yp = sigmoidV (tt)'. * @param yp the derivative function vector argument */ def sigmoidDV (yp: VectoD): VectoD = yp * (VectorD.one (yp.dim) - yp) val sigmoidDM: FunctionM_2M = matrixize (sigmoidDV _) // matrix version val f_sigmoid = AFF (sigmoid, sigmoidV, sigmoidM, sigmoidDV, sigmoidDM, (0, 1)) // sigmoid family //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // gaussian: Gaussian functions //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the value of the Gaussian function at scalar 't'. * @param t the Gaussian function argument */ def gaussian (t: Double): Double = exp (-t * t) val gaussianV: FunctionV_2V = vectorize (gaussian _) // vector version val gaussianM: FunctionM_2M = matrixize (gaussianV) // matrix version //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the derivative vector for Gaussian function at vector 'yp' where * 'yp' is pre-computed by 'yp = gaussianV (tt)'. * @param yp the derivative function vector argument * @param tt the domain value for the function */ def gaussianDV (yp: VectoD, tt: VectoD): VectoD = tt * yp * -2.0 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the derivative matrix for 'sigmoid' function at matrix 'yp' where * 'yp' is pre-computed by 'yp = gaussianM (tt)'. * @param yp the derivative function vector argument * @param tt the domain value for the function */ def gaussianDM (yp: MatriD, tt: MatriD): MatriD = { MatrixD (for (i <- yp.range1) yield tt(i) * yp(i) * -2.0) } // gaussianDM // val f_gaussain = AFF (guassian, gaussianV, gaussianM, auassianDV, gaussianDM) // gaussian family //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // softmax: Softmax functions - FIX //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the vector of values of the Softmax function applied to vector 'tt'. * @see https://en.wikipedia.org/wiki/Softmax_function * Note, scalar function version 'softmax' is not needed. * @param tt the softmax function vector argument */ def softmaxV (tt: VectoD): VectoD = { val et = tt.map (exp (_)) val sum = et.sum et.map (_ / sum) } // softmaxV val softmaxM: FunctionM_2M = matrixize (softmaxV _) // matrix version //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the derivative vector for the Softmax function at vector 'yp' where * 'yp' is pre-computed by 'yp = softmaxV (tt)'. * @param yp the derivative function vector argument */ def softmaxDV (yp: VectoD): VectoD = null val softmaxDM: FunctionM_2M = matrixize (softmaxDV _) // matrix version //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the derivative vector for Softmax function at vector 'yp' where * 'yp' is pre-computed by 'yp = softmaxV (tt)'. * @param yp the derivative function vector argument * def softmaxDM (yp: VectoD): MatriD = { val z = new MatrixD (yp.dim, yp.dim) for (i <- yp.range; j <- yp.range) z(i, j) = if (i == j) yp(i) * (1.0 - yp(j)) else -yp(i) * yp(j) z } // softmaxDM */ val f_softmax = AFF (null, softmaxV, softmaxM, softmaxDV, softmaxDM) // softmax family //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // logistic: Logistic functions - related function //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the value of the Logistic function at scalar 't'. * With the default settings, it is identical to 'sigmoid'. * Note, it is not typically used as an activation function * @see www.cs.xu.edu/math/math120/01f/logistic.pdf * @param t the logistic function argument * @param a the shift parameter (1 => mid at 0, <1 => mid shift left, >= mid shift right * @param b the spread parameter (1 => sigmoid rate, <1 => slower than, >1 => faster than) * althtough typically positive, a negative b will cause the function to decrease * @param c the scale parameter (range is 0 to c) */ def logistic (t: Double, a: Double = 1.0, b: Double = 1.0, c: Double = 1.0): Double = { c / (1.0 + a * exp (-b*t)) } // logistic def logisticV (tt: VectoD, a: Double = 1.0, b: Double = 1.0, c: Double = 1.0): VectoD = { tt.map (t => c / (1.0 + a * exp (-b*t))) } // logisticV //++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ // logit: Logit functions - related function //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the log of the odds (Logit) of an event occurring (e.g., success, 1). * The inverse of the 'logit' function is the standard logistic function * (sigmoid function). * Note, it is not typically used as an activation function * @param p the probability, a number between 0 and 1. */ def logit (p: Double): Double = log (p / (1.0 - p)) val logitV: FunctionV_2V = vectorize (logit _) // vector version } // ActivationFun object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ActivationFunTest` is used to test the `ActivationFun` object. * > runMain scalation.analytics.ActivationFunTest */ object ActivationFunTest extends App { import ActivationFun._ val t = VectorD.range (-30, 30) / 6.0 val p = VectorD.range (1, 59) / 60.0 // Test the vector version of activation functions val ident = idV (t); new Plot (t, ident, null, "t vs. ident") val reluf = reLUV (t); new Plot (t, reluf, null, "t vs. reluf") val lreluf = lreLUV (t); new Plot (t, lreluf, null, "t vs. lreluf") val eluf = eLUV (t); new Plot (t, eluf, null, "t vs. eluf") val tanhh = tanhV (t); new Plot (t, tanhh, null, "t vs. tanhh") val sigmo = sigmoidV (t); new Plot (t, sigmo, null, "t vs. sigmo") val gauss = gaussianV (t); new Plot (t, gauss, null, "t vs. gauss") val softmo = softmaxV (t); new Plot (t, softmo, null, "t vs. softmo") // Test the vector version of related functions val logit = logitV (p); new Plot (p, logit, null, "p vs. logit") val logist = logisticV (t); new Plot (t, logist, null, "t vs. logist") // Test the vector version of activation function derivatives val identD = idDV (ident); new Plot (t, identD, null, "t vs. identD") val relufD = reLUDV (reluf); new Plot (t, relufD, null, "t vs. relufD") val lrlufD = lreLUDV (lreluf); new Plot (t, lrlufD, null, "t vs. lrlufD") val elufD = eLUDV (eluf); new Plot (t, elufD, null, "t vs. elufD") val tanhhD = tanhDV (tanhh); new Plot (t, tanhhD, null, "t vs. tanhhD") val sigmoD = sigmoidDV (sigmo); new Plot (t, sigmoD, null, "t vs. sigmoD") val gaussD = gaussianDV (gauss, t); new Plot (t, gaussD, null, "t vs. gaussD") // val softmD = softmaxDV (softmo); new Plot (t, softmD, null, "t vs. softmD") } // ActivationFunTest //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ActivationFunTest2` is used to test the `ActivationFun` object. * @see en.wikipedia.org/wiki/Softmax_function * > runMain scalation.analytics.ActivationFunTest2 */ object ActivationFunTest2 extends App { import ActivationFun.softmaxV val t = VectorD (1.0, 2.0, 3.0, 4.0, 1.0, 2.0, 3.0) println (s"softmaxV ($t) = \n ${softmaxV (t)}") } // ActivationFunTest2