//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author Santosh Uttam Bobade, Vinay Kumar Bingi, John Miller * @version 1.5 * @date Sat Jun 9 14:09:25 EDT 2018 * @see LICENSE (MIT style license file). */ package scalation.columnar_db import scalation.linalgebra.{Vec, VectorD, VectorI} import scalation.math.noDouble import scalation.random.Normal //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Imputation` trait specifies an imputation operation to be defined by * the objects implementing it, i.e., * `Interpolate` - Uses Linear Interpolation technqiue to impute the missing value * `ImputeMean` - Estimates the mean of the vector filtered from missing values * `ImputeNormal` - Estimates the mean and variance of the vector filtered from missing values * `ImputeMovingAverage` - Imputes the missing value in the vector by taking historical Moving Average * of last 'n' seaonal values. */ trait Imputation { //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Impute the missing value in the vector 'c' * @param c the vector with the missing values * @param args an optional paramter which takes variable no. of numeric values, * args(0) - index at which missing value needs to be imputed * args(1) - distance around index to consider when imputing from subset of values * args(2) - season 'period' to consider for taking historical average */ def impute (c: Vec, args: Int*): Double } // Imputation trait //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Interpolate` object imputes the missing value in the vector using Linear Interpolation * based on the previous and next values. */ object Interpolate extends Imputation { //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Impute the missing value in the vector using Linear Interpolation based on the * previous and next values. * @param c the vector with the missing values * @param args an optional paramter which takes variable no. of numeric values, * args(0) - index at which missing value needs to be imputed * args(1) - no. of values around index 'i' to consider for interpolation */ def impute (c: Vec, args: Int*): Double = { var n = args.length val i = if (n >= 1) args(0) else 0 // impute at index 0, by default val d = if (n >= 2) args(1) else 1 // interpolate using 1 prev. and next value, by default var sum = 0.0 var count = 0 for (k <- 1 to d if i >= k) { sum += Vec(c, i-k).asInstanceOf [Double]; count += 1; } var j = 0 // count of missing value places to skip forward for (k <- 1 to d) { while (i+j+k < c.size && Vec(c, i+j+k).asInstanceOf [Double].equals (noDouble)) j += 1 if (i+j+k < c.size) { sum += Vec(c, i+j+k).asInstanceOf [Double]; count += 1;} } // for sum / count } // impute } // Interpolate object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ImputeMean` object uses the mean of the vector filtered of missing values * as estimates for missing values. */ object ImputeMean extends Imputation { //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Impute the missing value in the vector the filtered mean. * FIX - filter out the missing values before taking the mean * @param c the vector filtered from the missing values * @param args an optional paramter which is not required for this imputation method */ def impute (c: Vec, args: Int*): Double = { val cd = Vec.toDouble (c) Vec.mean (cd).asInstanceOf [Double] } // impute } // ImputeMean object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ImputeNormal` object uses a Normal distribution estimated from the filtered * mean and variance as estimates for missing values */ object ImputeNormal extends Imputation { //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Estimates the mean and variance of the vector filtered from the missing values * FIX - filter out the missing values before taking the mean and variance * @param c the vector filtered from the missing values * @param args an optional paramter which is not required for this imputation method */ def impute (c: Vec, args: Int*): Double = { val cd = Vec.toDouble(c) val rn = Normal (Vec.mean(cd).asInstanceOf [Double], Vec.variance(cd).asInstanceOf [Double]) rn.gen } // impute } // ImputeNormal //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ImputeMovingAverage` object imputes the missing value in the vector by taking * the historical Moving Average of last 'n' seaonal values. */ object ImputeMovingAverage extends Imputation { //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Impute the missing value in the vector by taking historical Moving Average of the * last 'n' seaonal values. * @param c the vector with the missing values * @param args an optional paramter which takes variable no. of numeric values, * args(0) - index at which missing value needs to be imputed * args(1) - no. of values before index 'i' to consider for taking historical average * args(2) - seaonal 'period' value for the vector 'c' */ def impute (c: Vec, args: Int*): Double = { val n = args.length val i = if (n >= 1) args(0) else 0 val d = if (n >= 2) args(1) else 3 val period = if (n >= 3) args(2) else 1 var sum = 0.0 var count = 0 for (k <- 1 to d if i >= k * period) { sum += Vec (c, i - (k * period)).asInstanceOf [Double]; count += 1 } // for sum / count } // impute } // ImputeMovingAverage object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ImputeNormalWin` object imputes the missing value in the vector by taking * the historical Moving Average of last 'n' values. */ object ImputeNormalWin { import scalation.util.CircularQueue //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Impute the missing values in this Vector using Normal Distribution for a sliding window. * @param n size of the sliding window */ def imputeAll (c: Vec, args: Int*): VectorD = { val n = if (args.length > 0) args(0).toInt else 5 val queue = new CircularQueue [Double](n) val result = new VectorD (c.size) var isFullOnce = false var sumSq = 0.0 var sum = 0.0 for (i <- c.indices) { if (Vec (c,i) equals noDouble) { val mean = sum / n var variance = (sumSq - sum * sum / n) / (n-1.0) if (variance < 0) variance = 0.0 val normal = Normal (mean, variance) result(i) = normal.gen } else { result(i) = Vec (c, i).asInstanceOf [Double] } // if if (i == n) isFullOnce = true tally (result(i)) queue += result(i) } // for def tally (x: Double) { if (isFullOnce) { val first = queue.dequeue sumSq -= (first*first) sum -= first } // if sumSq += (x * x) sum += x sum = math.max(sum,0.0) sumSq = math.max(sumSq,0.0) } // tally result } // imputeAll } // ImputeNormalWin object