//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author Hao Peng * @version 1.6 * @date Sun Nov 19 12:27:00 EST 2017 * @see LICENSE (MIT style license file). * * @title Model Support: Statistical Test for Time Series Stationarity * * Kwiatkowski–Phillips–Schmidt–Shin (KPSS) Test * Time Series Stationarity around a deterministic trend * @see debis.deu.edu.tr/userweb//onder.hanedar/dosyalar/kpss.pdf * @see github.com/olmallet81/URT * * Unit Root Tests for Time Series Stationarity * (1 is a root of the process characteristic equation) * @see github.com/olmallet81/URT */ package scalation.analytics package forecaster import scala.collection.immutable.HashMap import scala.Double.NaN import scala.util.control.Breaks.{break, breakable} import scalation.linalgebra.{MatrixD, VectoD, VectorD, VectorS} import scalation.math.double_exp import scalation.random.Normal //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `UnitRoot` abstract class provides a common framework for various unit * root testers for Time Series Stationarity. * This code is translated from the C++ code found in * @see github.com/olmallet81/URT. * @param data the time series vector * @param lags the number of lags to use * @param trend type of trend to test for */ abstract class UnitRoot (data: VectoD, var lags: Int, var trend: String) { protected var lagsType: String = null // default lags value long or short protected var maxLags = 0 // maximum number of lags for lag length optimization protected var validTrends: VectorS = null // vector of test valid regression trends protected var y = data // copy of the reference to the original time series protected var coeff: HashMap [Double, HashMap [Int, VectorD]] = null // HashMap containing critical values coefficients protected var testName: String = null // test name protected var stat = 0.0 // test statistic protected var pval = 1.0 // test p-value protected var nobs = data.size // number of observations protected var npar = 0 // number of parameters excluding lag difference terms protected var newLags = false // control if a new number of lags has been chosen protected var newTrend = false // control if a new trend has been chosen private var prevLagsType: String = null // previous type of lags private var prevTrend: String = null // previous regression trend private var trendType: String = null // regression trend for outputting test results private var prevLags = 0 // previous number of lags private var optim = false // control if lag length is optimized private var newTest = false // control if a new test is run (true) or all parameters remain the same (false) private val probas = Array (0.001, 0.005, 0.01, 0.025, 0.05, 0.10 , 0.20 , 0.50, 0.80 , 0.90, 0.95 , 0.975, 0.99, 0.995, 0.999) // array of probabilities for p-value computation private val criticalValues = Array.ofDim [Double] (probas.length) // array containing tne test critical values //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Get test statistic. */ def getStat (): Double = stat //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Get test pvalue. */ def getPval (): Double = pval //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Get test valid trends. */ def getTrends (): VectorS = validTrends //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute test statistic. */ def statistic (): Double //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute test statistic p-value. */ def pvalue (): Double = { if (stat.isNaN) pval = NaN else if (newTest) { // if a new test has been run computeCV () // computing critical values computePval () // computing p-value newTest = false } // if pval } // pvalue //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Output test results (can be overridden by derived classes). */ def show (): Unit = { println ("\n " + testName + " Test Results") // outputting test name println (" ====================================") println (s" Statistic = $stat") // outputting test statistic print (" P-value = ") // outputting p-value if (pval <= probas(0)) print ("< 0.001") else if (pval >= probas(14)) print ("> 0.999") else print (pval) println () println (s" Lags = $lags") println (s" Trend = $trendType") println (" ------------------------------------\n") println (" Test Hypothesis") // outputting test hypothesis println (" ------------------------------------") println (" H0: The process is weakly stationary") // KPSS hypotheses println (" H1: The process contains a unit root") println () println (" Critical Values") // outputting critical values println (" ---------------") val idx = Array (12, 10, 9) // declaring array of critical value indexes if (stat.isNaN) { println (s" 1% $NaN") println (s" 5% $NaN") println (s" 10% $NaN") } else { println (s" 1% ${criticalValues(idx(0))}") println (s" 5% ${criticalValues(idx(1))}") println (s" 10% ${criticalValues(idx(2))}") } // if println () println (" Test Conclusion") // outputting test conclusion println (" ---------------") if (pval <= 0.01) println ("We can reject H0 at the 1% significance level") else if (pval <= 0.05) println ("We can reject H0 at the 5% significance level") else if (pval <= 0.10) println ("We can reject H0 at the 10% significance level") else if (! pval.isNaN) println ("We cannot reject H0") else println ("We cannot conclude, NaN produced") } // show //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Reset 'y' to the original data. */ protected def setData (): Unit = { y = data } //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Set number of lags, checking input validity. This method needs to know * optim so it needs to be run after set_method (). */ protected def setLags (): Unit = { if (! optim) { if (lags < 0) { // number of lags cannot be strictly negative lags = 0 println ("\n WARNING: number of lags cannot be negative, it has been set to 0 by default.\n") } // if if (! lagsType.isEmpty && lags != prevLags) lagsType = "" // if user has switched from a default lags value // to a value of his choice (for all tests) } else if (maxLags < 0) { // maxLags cannot be strictly negative maxLags = 0 println ("\n WARNING: maximum number of lags cannot be negative, it has been set to a default value (L12-rule).\n") } // if // updating lags only for PP and KPSS tests, for ADF and DFGLS tests lags will be updated at the next optimization or // set back to prevLags if maxLags, trend, method and level are the same as before if ((optim && (maxLags == 0)) || !lagsType.isEmpty) { // computing default lags value for KPSS test maxLags = if (lagsType == "short") (4 * (0.01 * nobs)~^0.25).toInt // short => L4-rule (Schwert 1989) else (12 * (0.01 * nobs)~^0.25).toInt // long => L12-rule (Schwert 1989) lags = maxLags } // if if (!optim && lags != prevLags) { // for all tests if the number of lags is still different than its previous value newTest = true newLags = true prevLags = lags } // if } // setLags //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Set lags type long or short for PP and KPSS default lags value or ADF * and DFGLS default maxlags value. */ protected def setLagsType (): Unit = { if (lagsType.isEmpty || (lagsType == prevLagsType)) return // skipping this method if lagsType is empty or did not change if ((lagsType != "long") && (lagsType != "short")) { println("\n WARNING: unknown default type of lags, long has been selected by default.\n") lagsType = "long" // default lags type is long } // if prevLagsType = lagsType } // setLagsType //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Set regression trend. */ protected def setTrend (): Unit = { if (trend == prevTrend) return // skipping this method if trend did not change if (! validTrends.contains (trend)) { // checking input trend validity, setting default trend to constant trend = "c" trendType = "constant" npar = 2 println ("\n WARNING: unknown regression trend selected, regression with constant term has been selected by default.\n") println (s" Possible trends for this test are $validTrends") } else { if (trend == "c") { trendType = "constant" npar = 2 } else if (trend == "nc") { trendType = "no constant" npar = 1 } else if (trend == "ct") { trendType = "constant trend" npar = 3 } else if (trend == "ctt") { trendType = "quadratic trend" npar = 4 } // if } // if if (trend != prevTrend) { newTest = true newTrend = true prevTrend = trend } // if } // setTrend //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** OLS demeaning or detrending. */ protected def olsDetrend (): Unit = { var w: MatrixD = null if (trend == "ct") { val v = Array.ofDim [Double] (nobs, 2) for (i <- 0 until v.length) { v(i)(0) = 1.0; v(i)(1) = i } w = new MatrixD (v) } else if (trend == "nc") { throw new IllegalArgumentException ("\n ERROR: olsDetrend: no detrending possible when regression trend set to no constant.\n") } // if if (trend == "c") { val mu = y.mean y = VectorD (for (i <- y.range) yield y(i) - mu) } else { val reg = new Regression (w, y) reg.train ().eval () y = reg.residual } // if } // olsDetrend //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute critical value from probabilities. */ private def computeCV (): Unit = { val n = nobs - lags - 1 // computing adjusted number of observations for (i <- probas.indices) { criticalValues(i) = 0 // computing critical value val n0 = coeff.getOrElse(probas(i), null).getOrElse (0, null).size for (j <- 0 until n0) criticalValues(i) += coeff.getOrElse (probas(i), null).getOrElse (0, null)(j) / (n~^j) val n1 = coeff.getOrElse(probas(i), null).getOrElse (1, null).size for (j <- 0 until n1) criticalValues(i) += coeff.getOrElse (probas(i), null).getOrElse (1, null)(j) * ((lags.toDouble/n)~^(j+1)) } // for } // computeCV //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute p-value by linear interpolation from critical values. */ private def computePval (): Unit = { if (stat <= criticalValues(0)) pval = probas(0) // if stat is smaller than critical value for first probability (in absolute value) else { breakable {for (i <- 1 until probas.length if stat <= criticalValues(i)) { pval = probas(i-1) + (stat - criticalValues(i-1)) * (probas(i) - probas(i-1)) / (criticalValues(i) - criticalValues(i-1)) break () }} // breakable for } // if if (stat > criticalValues.last) pval = probas.last // if stat is greater than critical value for last probability in absolute value } // computePval } // UnitRoot abstract class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The companion object for `KPSS` class, containing critical value coefficients * needed in KPSS tests for Time Series Stationarity around a deterministic trend. */ object KPSS_Stationarity { val coeffKpss = HashMap ( "c" -> HashMap ( (0.001 -> HashMap ( (0 -> VectorD (0.0167565, 0.1628078, -0.01441111)), (1 -> VectorD (0.2036426, 0.1065976, -0.5134231)))), (0.005 -> HashMap ( (0 -> VectorD (0.02160067, 0.1649124, -0.005748196)), (1 -> VectorD (0.1939746, 0.180736, -0.6535936)))), (0.01 -> HashMap ( (0 -> VectorD (0.02466286, 0.1635288, 0.04500264)), (1 -> VectorD (0.1893402, 0.2089429, -0.6943796)))), (0.025 -> HashMap ( (0 -> VectorD (0.03026605, 0.1623029, 0.09417188)), (1 -> VectorD (0.1847549, 0.2238625, -0.6818411)))), (0.05 -> HashMap ( (0 -> VectorD (0.03650665, 0.1643194, 0.118059)), (1 -> VectorD (0.1819509, 0.2103519, -0.5991279)))), (0.1 -> HashMap ( (0 -> VectorD (0.04597858, 0.1695003, 0.1046518)), (1 -> VectorD (0.1809212, 0.1596069, -0.4150671)))), (0.2 -> HashMap ( (0 -> VectorD (0.06222337, 0.1785042, 0.06728867)), (1 -> VectorD (0.1794514, 0.1031811, -0.2438007)))), (0.5 -> HashMap ( (0 -> VectorD (0.1188952, 0.2136505, -0.1988974)), (1 -> VectorD (0.1738762, -0.04517147, 0.1061083)))), (0.8 -> HashMap ( (0 -> VectorD (0.2413558, 0.2427114, -0.4925517)), (1 -> VectorD (0.07161341, 0.3635483, -0.8456676)))), (0.9 -> HashMap ( (0 -> VectorD (0.347477, 0.1815891, -0.8897294)), (1 -> VectorD (-0.1026364, 0.4255538, -0.6916551)))), (0.95 -> HashMap ( (0 -> VectorD (0.461564, -0.03241585, -0.4093091)), (1 -> VectorD (-0.4579306, 1.037593, -0.9517771)))), (0.975 -> HashMap ( (0 -> VectorD (0.5808698, -0.4198131, 1.286591)), (1 -> VectorD (-1.021283, 2.743161, -2.926945)))), (0.99 -> HashMap ( (0 -> VectorD (0.7434379, -1.282285, 7.296387)), (1 -> VectorD (-2.025517, 6.584114, -8.361188)))), (0.995 -> HashMap ( (0 -> VectorD (0.8686703, -2.108955, 13.70689)), (1 -> VectorD (-3.003148, 11.11568, -15.74966)))), (0.999 -> HashMap ( (0 -> VectorD (1.162377, -4.578843, 34.27324)), (1 -> VectorD (-5.808165, 25.97152, -41.79132)))) ), "ct" -> HashMap ( (0.001 -> HashMap( (0 -> VectorD (0.01234275, 0.1655343, 0.01672944, -1.675843)), (1 -> VectorD (0.2106948, 0.03970888, -0.3368414)))), (0.005 -> HashMap( (0 -> VectorD (0.01526251, 0.1588324, 0.2212439, -3.176791)), (1 -> VectorD (0.2015772, 0.1179992, -0.5024903)))), (0.01 -> HashMap( (0 -> VectorD (0.01699618, 0.1544975, 0.3604586, -4.262545)), (1 -> VectorD (0.1969925, 0.1538411, -0.5702144)))), (0.025 -> HashMap( (0 -> VectorD (0.02005522, 0.1485785, 0.5907839, -6.211238)), (1 -> VectorD (0.1880815, 0.2169062, -0.6777426)))), (0.05 -> HashMap( (0 -> VectorD (0.02326679, 0.1414555, 0.8224264, -7.893967)), (1 -> VectorD (0.1804144, 0.2639471, -0.7486399)))), (0.1 -> HashMap( (0 -> VectorD (0.02781531, 0.1377224, 0.917243, -8.218599)), (1 -> VectorD (0.170477, 0.3089857, -0.7857759)))), (0.2 -> HashMap( (0 -> VectorD (0.03489574, 0.1345627, 0.9657117, -7.923499)), (1 -> VectorD (0.1579158, 0.3369968, -0.7515043)))), (0.5 -> HashMap( (0 -> VectorD (0.0556109, 0.1397416, 0.4620311, -1.559948)), (1 -> VectorD (0.1300005, 0.2833583, -0.4283284)))), (0.8 -> HashMap( (0 -> VectorD (0.09159799, 0.1345566, -0.619965, 9.817577)), (1 -> VectorD (0.05585951, 0.2773908, -0.1216728)))), (0.9 -> HashMap( (0 -> VectorD (0.1193112, 0.09514355, -1.079983, 16.87997)), (1 -> VectorD (-0.03271029, 0.4379497, -0.2279858)))), (0.95 -> HashMap( (0 -> VectorD (0.1478756, 0.03863841, -1.89105, 30.13683)), (1 -> VectorD (-0.149998, 0.6506317, -0.2305911)))), (0.975 -> HashMap( (0 -> VectorD (0.1773233, -0.03755152, -3.125617, 51.66143)), (1 -> VectorD (-0.3091562, 1.057115, -0.4266199)))), (0.99 -> HashMap( (0 -> VectorD (0.2172606, -0.2049983, -4.308647, 82.65278)), (1 -> VectorD (-0.5765132, 1.915267, -1.057833)))), (0.995 -> HashMap( (0 -> VectorD (0.2480191, -0.3967449, -4.07506, 101.2076)), (1 -> VectorD (-0.8216993, 2.85189, -1.957697)))), (0.999 -> HashMap( (0 -> VectorD (0.3203646, -1.046171, -0.8434172, 144.566)), (1 -> VectorD (-1.515429, 6.124994, -6.22971)))) ) // HashMap ) // HashMap } // KPSS_Stationarity object import KPSS_Stationarity.coeffKpss //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `KPSS` class provides capabilities of performing KPSS test to determine * if a time series is stationary around a deterministic trend. * This code is translated from the C++ code found in * @see github.com/olmallet81/URT. * @param data_ the time series vector * @param lags_ the number of lags to use * @param lagsType_ type of lags, long or short * @param trend_ type of trend to test for */ class KPSS_Stationarity (data_ : VectoD, lags_ : Int, lagsType_ : String, trend_ : String = "c") extends UnitRoot (data_, lags_, trend_) { testName = "KPSS" validTrends = VectorS ("c", "ct") lagsType = lagsType_ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Alternative constructor that only provides the number of lags. * @param data_ the time series vector * @param lags_ the number of lags to use * @param trend_ type of trend to test for */ def this (data_ : VectoD, lags_ : Int, trend_ : String) = { this (data_, lags_, "", trend_) } // constructor //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Alternative constructor that only provides the type of lags. * @param data_ the time series vector * @param lagsType_ type of lags, long or short * @param trend_ type of trend to test for */ def this (data_ : VectoD, lagsType_ : String, trend_ : String) = { this (data_, 0, lagsType_, trend_) } // constructor //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Computes test statistics. */ def computeStat (): Unit = { val factor = 1.0 / nobs val s = new VectorD (nobs) s(0) = y(0) for (i <- 1 until nobs) s(i) = y(i) + s(i-1) val eta = factor * factor * (s dot s) var s2 = factor * (y dot y) var tmp1 = 0.0 // estimating long run variance for (i <- 1 to lags) { val tmp2 = y.slice(i, nobs) dot y.slice(0, nobs - i) val weight = 1.0 - i / (lags + 1.0) // computing Bartlett weight tmp1 += weight * tmp2 } // for s2 += factor * 2.0 * tmp1 stat = eta / s2 } // computeStat //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute KPSS test statistic. */ def statistic (): Double = { setLagsType () // setting type of lags (if a default type of lags value has been chosen) setLags () // setting number of lags setTrend () // setting regression trend if (newTrend) { // if new trend setData () // setting pointer back to original data for new detrending olsDetrend () // detrending data by OLS } // if if (newTrend || newLags) { // if new trend or new lags computeStat () // computing statistic newTrend = false newLags = false } // if stat } // statistics //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute test statistic p-value. */ override def pvalue (): Double = { statistic () // computing test statistic coeff = coeffKpss.getOrElse(trend, null) // setting critical values coefficients pval = 1 - super.pvalue() // computing p-value pval } // pvalue //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Output test results. */ override def show (): Unit = { pvalue () // computing p-value super.show () // outputting results } // show //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Check to see if H0 can be rejected. Must call pvalue first to compute pval. */ def canReject (alpha: Double = 0.05): Boolean = pval < alpha } // KPSS_Stationarity class //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `KPSS_StationarityTest` object is used to test the `KPSS_Stationarity` class. * > runMain scalation.analytics.forecaster.KPSS_StationarityTest */ object KPSS_StationarityTest extends App { val nobs = 1000 val ran = new Normal () // generating stationary random data val data = new VectorD (nobs) for (i <- data.range) data(i) = ran.gen val test = new KPSS_Stationarity (data, "short", "c") // initializing KPSS test with lags of type short and constant trend test.show() // outputting test results test.lags = 5 // switching to test with 5 lags and constant term test.trend = "ct" test.show () // outputting test results } // KPSS_StationarityTest