//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Sun Sep 23 21:14:14 EDT 2012 * @see LICENSE (MIT style license file). * * @title Model Framework: Base Trait for Predictors */ package scalation.analytics import scala.collection.mutable.Set import scalation.linalgebra.{MatriD, MatrixD, VectoD, VectorD, VectoI} import scalation.plot.Plot import scalation.stat.StatVector.corr import scalation.util.banner //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Predictor` trait provides a common framework for several types of predictors. *

* y = f(x; b) + e * * y = response/output value * x = predictor/input vector * b = parameter vector * e = residual/error value *

* A predictor is for potentially unbounded responses (real or integer). * When the number of distinct responses is bounded by some relatively small * integer 'k', a classifier is likely more appropriate. * Note, the 'train' method (from `Model`) must be called first followed by 'eval'. */ trait Predictor extends Model { //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the 'used' data matrix 'x'. Mainly for derived classes where 'x' is expanded * from the given columns in 'x_', e.g., `QuadRegression` add squared columns. */ def getX: MatriD //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the 'used' response vector 'y'. Mainly for derived classes where 'y' is * transformed, e.g., `TranRegression`, `Regression4TS`. */ def getY: VectoD //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the vector of residuals/errors. */ def residual: VectoD //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Analyze a dataset using this model using ordinary training with the * 'train' method. * @param x_ the training/full data/input matrix (impl. classes should default x_ to x) * @param y_ the training/full response/output vector (impl. classes should default y_ to y) * @param x_e the test/full data/input matrix (impl. classes should default x_e to x) * @param y_e the test/full response/output vector (impl. classes should default y_e to y) */ def analyze (x_ : MatriD, y_ : VectoD, x_e: MatriD, y_e: VectoD): Predictor //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a new continuous data/input vector 'z', predict the 'y'-value of 'f(z)'. * @param z the vector to use for prediction */ def predict (z: VectoD): Double //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a new discrete data/input vector 'z', predict the 'y'-value of 'f(z)'. * @param z the vector to use for prediction */ def predict (z: VectoI): Double = predict (z.toDouble) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Predict the value of 'y = f(z)' by evaluating the formula 'y = b dot z', * for each row of matrix 'z'. * @param z the new matrix to predict (impl. classes should default z to x) */ def predict (z: MatriD): VectoD //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Perform forward selection to add the most predictive variable to the existing * model, returning the variable to add and the new model. * May be called repeatedly. * @see `Fit` for index of QoF measures. * @param cols the columns of matrix x included in the existing model * @param index_q index of Quality of Fit (QoF) to use for comparing quality */ def forwardSel (cols: Set [Int], index_q: Int): (Int, Predictor) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the correlation matrix for the columns in data matrix 'xx'. * @param xx the data matrix shose correlation matrix is sought */ def corrMatrix (xx: MatriD): MatriD = corr (xx.asInstanceOf [MatrixD]) // FIX //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Test the model on the full dataset (i.e., train and evaluate on full dataset). * @param modelName the name of the model being tested * @param doPlot whether to plot the actual vs. predicted response */ def test (modelName: String, doPlot: Boolean = true) { banner (s"Test $modelName") val (x, y) = (getX, getY) // get full data x and response y analyze (x, y, x, y) // train and evalaute the model on full dataset println (report) // print report about trained model if (doPlot) { val idx = VectorD.range (0, y.dim) // data instance index (for horizonal axis) val yp = predict (x) // predicted response new Plot (idx, y, yp, s"$modelName: y vs. yp", true) // plot actual vs. predicted response } // if } // test } // Predictor trait