//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller, Vamsi Nadella * @version 1.4 * @date Sun Sep 23 21:14:14 EDT 2012 * @see LICENSE (MIT style license file). */ package scalation.analytics.forecaster import scala.collection.immutable.ListMap import scala.math.sqrt import scalation.linalgebra.{MatriD, VectoD, VectorD, VectoI} import scalation.math.double_exp import scalation.stat.Statistic //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Forecaster` trait provides a common framework for several predictors. * A predictor is for potentially unbounded responses (real or integer). * When the number of distinct responses is bounded by some relatively small * integer 'k', a classifier is likdely more appropriate. * Note, the 'train' method must be called first followed by 'eval'. */ trait Forecaster { protected var sse = -1.0 // sum of squares error protected var ssr = -1.0 // sum of squares regression/model protected var sst = -1.0 // sum of squares total (ssr + sse) protected var mae = -1.0 // mean absolute error protected var mse = -1.0 // mean squared error protected var rmse = -1.0 // root mean squared error protected var rSq = -1.0 // coefficient of determination (quality of fit) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a set of data vectors 'x's and their corresponding responses 'yy's, * train the prediction function 'yy = f(x)' by fitting its parameters. * The 'x' values must be provided by the implementing class. * @param yy the response vector */ def train (yy: VectoD): Forecaster //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the error and useful diagnostics for the entire dataset. * @param yy the response vector */ def eval (yy: VectoD) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the error and useful diagnostics for the test dataset. * @param xx the test data matrix * @param yy the test response vector * FIX - implement in classes */ def eval (xx: MatriD, yy: VectoD) {} //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute diagostics for the predictor. Override to add more diagostics. * Note, for 'mse' and 'rmse', 'sse' is divided by the number of instances * 'm' rather than the degrees of freedom. * @see en.wikipedia.org/wiki/Mean_squared_error * @param yy the response vector * @param ee the error/residual vector */ protected def diagnose (yy: VectoD, ee: VectoD) { val m = yy.dim // number of instances sst = (yy dot yy) - yy.sum~^2.0 / m // sum of squares total sse = ee dot ee // sum of squares error ssr = sst - sse // sum of squares regression (not returned by fit) mse = sse / m // raw mean square error rmse = sqrt (mse) // root mean square error mae = ee.norm1 / m // mean absolute error rSq = ssr / sst // coefficient of determination R^2 } // diagnose //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the quality of fit including 'sst', 'sse', 'mae', rmse' and 'rSq'. * Note, if 'sse > sst', the model introduces errors and the 'rSq' may be negative, * otherwise, R^2 ('rSq') ranges from 0 (weak) to 1 (strong). * Note that 'rSq' is the last or number 5 measure. * Override to add more quality of fit measures. */ def fit: VectoD = VectorD (sst, sse, mse, rmse, mae, rSq) val index_rSq = 5 // index of rSq //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the labels for the fit. Override when necessary. */ def fitLabels: Seq [String] = Seq ("sst", "sse", "mse", "rmse", "mae", "rSq") //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a new continuous data vector z, predict the y-value of f(z). * @param z the vector to use for prediction */ def predict (z: VectoD): Double //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a new discrete data vector z, predict the y-value of f(z). * @param z the vector to use for prediction */ def predict (z: VectoI): Double = predict (z.toDouble) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Build a map of selected quality of fit measures/metrics. */ def metrics: Map [String, Any] = { ListMap ("R-Squared" -> "%.4f".format (rSq), "SSE" -> "%.4f".format (sse), "RMSE" -> "%.4f".format (rmse)) } // metrics } // Forecaster trait