//::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Fri Jul 5 14:47:06 EDT 2019 * @see LICENSE (MIT style license file). * * @title (Two-Sided) One-Sample T-Test * * @see www.itl.nist.gov/div898/handbook/eda/section3/eda353.htm */ package scalation.stat import scala.math.{abs, sqrt} import scalation.linalgebra.VectorD import scalation.random.CDF.studentTCDF import scalation.random.Quantile.studentTInv import scalation.util.banner //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `T_Test1` class is used to test whether a data sample 'x' comes from a * population with "known" mean 'μ0', by comparing the sample mean 'μ' with 'μ0', * using a (Two-Sided/Two-Tailed) One-Sample t-test. * Assumes the sample is drawn from a Normal distribution. * The error in the test is measured by the conditional probability 'p' that * a "difference is detected" when there "actually is none". *

* p = P(different | μ = μ0) *

* The power of the test is the ability to detect actual differences. '1 - power' is * measured by the conditional probability 'p2' that a "difference is not detected" * when there "actually is one". *

* p2 = P(! different | μ ≠ μ0) *

* These are called Type I (measured by p) and Type II (measured by p2) errors. * @see en.wikipedia.org/wiki/Type_I_and_type_II_errors *----------------------------------------------------------------------------- * H0: μ = μ0 Null Hypothesis * H1: μ ≠ μ0 Alternative Hypothesis (Two-Sided) *----------------------------------------------------------------------------- * @param x the sample's vector of data * @param μ0 the "known" population mean */ class T_Test1 (x: VectorD, μ0: Double) { private val DEBUG = true // debug flag private val n = x.dim // sample size for x private val df = n - 1 // degrees of freedom private val (μ, σ) = (x.mean, x.stddev) // sample mean and standard deviation of x private val δ = abs (μ - μ0) // absolute difference in sample and population means if (DEBUG) { println (s"n = $n") // sample size println (s"μ0 = $μ0") // "known" population mean println (s"μ = $μ") // sample mean println (s"σ = $σ") // sample standard deviation println (s"δ = $δ") // absolute difference in means } // if //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the standard error for mean difference. */ def se (n: Int): Double = σ / sqrt (n) //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the Student's t statistic. */ def t (se: Double): Double = δ / se //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the probability of a Type I Error using the Student's t distribution. * @see scalation.random.CDF.studentTCDF * Mistake probability: in rejecting the null hypothesis. * @param t the value of the Student's t statistic * @param df the effective degrees of freedom */ def p (t: Double, df: Int): Double = 2.0 * (1.0 - studentTCDF (t, df)) //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Compute the probability of a Type II Error using the Student's t distribution. * Mistake probability: in not rejecting the null hypothesis. * @see statweb.stanford.edu/~susan/courses/s141/hopower.pdf * @param t the value of the Student's t statistic * @param df the effective degrees of freedom * @param α the desired sigificance level */ def p2 (t: Double, df: Int, α: Double = 0.05): Double = { val c = T_Test1.c (df, α) // critical value studentTCDF (-t - c, df) + studentTCDF (t - c, df) } // p2 } // T_Test1 class //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `T_Test1` companion object provides a convenient method for performing t-test. * If 'difference' is false (fail to reject), making a decision on this basis * can be risky, so 'same' should be called. * If neither 'different' or 'same' are true, need to collect more data. */ object T_Test1 { //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the critical value for the Student's t-distribution. * For a two-sided/two-tailed test, the critical values are '-c' and 'c' and * the total probability mass in the tails being 'α'a. * @param df the effective degrees of freedom * @param α the desired sigificance level */ def c (df: Int, α: Double = 0.05): Double = studentTInv (1 - α/2, df) //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Determine whether the difference in the two means is statistically * significant. * If true, "reject" the null hypothesis that μ = μ0, else "fail to reject" * @param p the probability of a Type I error * @param α the desired sigificance level */ def different (p: Double, α: Double = 0.05): Boolean = p < α //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Determine whether the difference in the two means is statistically * insignificant. Note, typically this is a weaker test than 'different'. * @param p2 the probability of a Type II error * @param pw the desired power level (p2 = 1 - power) */ def same (p2: Double, pw: Double = 0.9): Boolean = p2 < 1 - pw //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Perform a one-sample t-test based on comapring the sample mean with the * "known" population mean. * @param x the sample's vector of data * @param μ0 the "known" population mean * @param α the desired sigificance level * @param pw the desired power of the test */ def test (x: VectorD, μ0: Double, α: Double = 0.05, pw: Double = 0.9): Unit = { banner (s"T_Test1 - one sample: μ =? μ0 = $μ0") val ttest = new T_Test1 (x, μ0) // construct a t-test val se = ttest.se (x.dim) // standard error val df = ttest.df // degrees of freedom val t = ttest.t (se) // Student's t statistic val c = T_Test1.c (df, α) // Student's t critical value val p = ttest.p (t, df) // probability of Type I error val p2 = ttest.p2 (t, df) // probability of Type II error val diff = T_Test1.different (p, α) // are the means different? val same = T_Test1.same (p2, pw) // are the means same? println (s"T_Test1 se = $se \t standard error") println (s"T_Test1 df = $df \t\t\t degrees of freedom") println (s"T_Test1 t = $t \t Student's t statistic") println (s"T_Test1 c = $c \t Student's t critical value") println (s"T_Test1 p = $p \t probability of Type I error") println (s"T_Test1 p2 = $p2 \t probability of Type II error") println (s"T_Test1 diff = $diff \t\t\t are the means different?") println (s"T_Test1 same = $same \t\t\t are the means same?") } // test } // T_Test1 object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `T_Test1Test` object is used to test the `T_Test1` class. * > runMain scalation.stat.T_Test1Test */ object T_Test1Test extends App { val x = VectorD (20.4, 24.2, 15.4, 21.4, 20.2, 18.5, 21.5) val y = VectorD (20.2, 16.9, 18.5, 17.3, 20.5) val μ0 = y.mean // assume the second sample give the true mean // @see `T_Test` T_Test1.test (x, μ0) // one sample t-test } // T_Test1Test object //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `T_Test1Test2` object is used to test the `T_Test1` class. * @see www.statsdirect.com/help/parametric_methods/single_sample_t.htm * > runMain scalation.stat.T_Test1Test2 */ object T_Test1Test2 extends App { val x = VectorD (128, 127, 118, 115, 144, 142, 133, 140, 132, 131, 111, 132, 149, 122, 139, 119, 136, 129, 126, 128) for (μ0 <- 120 to 130) { // for several given poulation means T_Test1.test (x, μ0) // one sample t-test } // for } // T_Test1Test2 object