//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Thu Oct 24 11:59:36 EDT 2013 * @see LICENSE (MIT style license file). * * @title Model Support: Proability and Entropy */ package scalation.stat import scala.math.abs import scalation.linalgebra._ import scalation.linalgebra.MatrixD.outer import scalation.math.{logb, log2, double_exp} import scalation.plot.Plot import scalation.util.{banner, Error} //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Probability` object provides methods for operating on univariate and * bivariate probability distributions of discrete random variables 'X' and 'Y'. * A probability distribution is specified by its probability mass functions (pmf) * stored either as a "probability vector" for a univariate distribution or * a "probability matrix" for a bivariate distribution. *

* joint probability matrix: pxy(i, j) = P(X = x_i, Y = y_j) * marginal probability vector: px(i) = P(X = x_i) * conditional probability matrix: px_y(i, j) = P(X = x_i|Y = y_j) *

* In addition to computing joint, marginal and conditional probabilities, * methods for computing entropy and mutual information are also provided. * Entropy provides a measure of disorder or randomness. If there is * little randomness, entropy will close to 0, while when randomness is * high, entropy will be close to, e.g., 'log2 (px.dim)'. Mutual information * provides a robust measure of dependency between random variables * (contrast with correlation). * @see scalation.stat.StatVector */ object Probability extends Error { private val DEBUG = false // debug flag private val EPSILON = 1E-9 // a number close to zero //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Determine whether the vector 'px' is a legitimate "probability vector". * The elements of the vector must be non-negative and add to one. * @param px the probability vector */ def isProbability (px: VectoD): Boolean = px.min () >= 0.0 && abs (px.sum - 1.0) < EPSILON //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Determine whether the matrix 'pxy' is a legitimate joint "probability matrix". * The elements of the matrix must be non-negative and add to one. * @param pxy the probability matrix */ def isProbability (pxy: MatriD): Boolean = pxy.min () >= 0.0 && abs (pxy.sum - 1.0) < EPSILON //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Count the frequency of occurrence of each distinct value within integer vector 'y', * (e.g., result 'nu' = (5, 9) didn't play 5, played 9). * Restriction: 'y' may not contain negative integer values. * @param y the feature/columne vector of integer values whose frequency counts are sought * @param k the maximum value of y + 1 * @param idx_ the index positions within y (if null, use all index positions) */ def frequency (y: VectoI, k: Int, idx_ : IndexedSeq [Int] = null): VectoI = { val idx = if (idx_ == null) IndexedSeq.range (0, y.dim) else idx_ val nu = new VectorI (k) for (i <- idx) nu(y(i)) += 1 nu } // frequency //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Count the frequency of occurrence in vector 'x' of value 'vl' for each of 'y's * classification values, e.g., 'x' is column 2 (Humidity), 'vl' is 1 (High)) and * 'y' can be 0 (no) or 1 (yes). Also, determine the fraction of training cases * where the feature has this value (e.g., fraction where Humidity is High = 7/14). * @param x the feature/column vector (e.g., column j of matrix) * @param y the response/classification vector * @param k the maximum value of y + 1 * @param vl one of the possible branch values for feature x (e.g., 1 (High)) * @param idx_ the index positions within x (if null, use all index positions) */ def frequency (x: VectoI, y: VectoI, k: Int, vl: Int, idx_ : IndexedSeq [Int]): (Double, VectoI) = { val idx = if (idx_ == null) IndexedSeq.range (0, y.dim) else idx_ val nu = new VectorI (k) // frequency counts var cnt = 0 // count for branch value for (i <- idx if x(i) == vl) { nu(y(i)) += 1; cnt += 1 } if (DEBUG) println (s"frequency: k = $k, vl = $vl, nu = $nu") (cnt.toDouble / idx.size, nu) // return fraction and frequency vector } // frequency //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Count the frequency of occurrence in vector 'x' of value 'vl' for each of 'y's * classification values, e.g., 'x' is column 2 (Humidity), 'vl' is 1 (High)) and * 'y' can be 0 (no) or 1 (yes). Also, determine the fraction of training cases * where the feature has this value (e.g., fraction where Humidity is High = 7/14). * This method works for vectors with integer or continuous values. * @param x the feature/column vector (e.g., column j of matrix) * @param y the response/classification vector * @param k the maximum value of y + 1 * @param vl one of the possible branch values for feature x (e.g., 1 (High)) * @param idx_ the index positions within x (if null, use all index positions) * @param cont whether feature/variable x is to be treated as continuous * @param thres the splitting threshold for features/variables treated as continuous */ def frequency (x: VectoD, y: VectoI, k: Int, vl: Int, idx_ : IndexedSeq [Int], cont: Boolean, thres: Double): (Double, VectoI) = { val idx = if (idx_ == null) IndexedSeq.range (0, y.dim) else idx_ val nu = new VectorI (k) // frequency counts var cnt = 0 // count for the value branch if (cont) { if (vl == 0) { for (i <- idx) if (x(i) <= thres) { nu(y(i)) += 1; cnt += 1 } } else { for (i <- idx) if (x(i) > thres) { nu(y(i)) += 1; cnt += 1 } } // if } else { for (i <- idx) if (x(i) == vl) { nu(y(i)) += 1; cnt += 1 } } // if if (DEBUG) println (s"frequency: k = $k, vl = $vl, cont = $cont, thres = $thres, nu = $nu") (cnt.toDouble / idx.size, nu) // return fraction and frequency vector } // frequency //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Count the total number of occurrence in vector 'x' of value 'vl', e.g., * 'x' is column 2 (Humidity), 'vl' is 1 (High) matches 7 rows. * This method works for vectors with integer or continuous values. * @param x the feature/column vector (e.g., column j of matrix) * @param y the response/classification vector * @param k the maximum value of y + 1 * @param vl one of the possible branch values for feature x (e.g., 1 (High)) * @param idx_ the index positions within x (if null, use all index positions) * @param cont whether feature/variable x is to be treated as continuous * @param thres the splitting threshold for features/variables treated as continuous */ def count (x: VectoD, vl: Int, idx_ : IndexedSeq [Int], cont: Boolean, thres: Double): Int = { val idx = if (idx_ == null) IndexedSeq.range (0, x.dim) else idx_ var cnt = 0 // count for the value branch if (cont) { if (vl == 0) { for (i <- idx) if (x(i) <= thres) cnt += 1 } else { for (i <- idx) if (x(i) > thres) cnt += 1 } // if } else { for (i <- idx) if (x(i) == vl) cnt += 1 } // if cnt } // count //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a frequency vector, convert it to a probability vector. * @param nu the frequency vector */ def toProbability (nu: VectoI): VectoD = nu.toDouble / nu.sum.toDouble //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a frequency vector, convert it to a probability vector. * @param nu the frequency vector * @param n the total number of instances/trials collected */ def toProbability (nu: VectoI, n: Int): VectoD = nu.toDouble / n.toDouble //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a frequency matrix, convert it to a probability matrix. * @param nu the frequency matrix */ def toProbability (nu: MatriI): MatriD = nu.toDouble / nu.sum.toDouble //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a frequency matrix, convert it to a probability matrix. * @param nu the frequency matrix * @param n the total number of instances/trials collected */ def toProbability (nu: MatriI, n: Int): MatriD = nu.toDouble / n.toDouble //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given two independent random variables 'X' and 'Y', compute their * "joint probability", which is the outer product of their probability * vectors 'px' and 'py', i.e., P(X = x_i, Y = y_j). */ def jointProbXY (px: VectoD, py: VectoD): MatriD = outer (px, py) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a joint probability matrix 'pxy', compute the "marginal probability" * for random variable 'X', i.e, P(X = x_i). * @param pxy the probability matrix */ def margProbX (pxy: MatriD): VectoD = { val px = new VectorD (pxy.dim1) for (i <- pxy.range1) px(i) = pxy(i).sum px } // margProbX //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a joint probability matrix 'pxy', compute the "marginal probability" * for random variable 'Y', i.e, P(Y = y_j). * @param pxy the probability matrix */ def margProbY (pxy: MatriD): VectoD = { val py = new VectorD (pxy.dim2) for (j <- pxy.range2) py(j) = pxy.col(j).sum py } // margProbY //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a joint probability matrix 'pxy', compute the "conditional probability" * for random variable 'Y' given random variable 'X', i.e, P(Y = y_j|X = x_i). * @param pxy the joint probability matrix * @param px_ the marginal probability vector for X */ def condProbY_X (pxy: MatriD, px_ : VectoD = null): MatriD = { val px = if (px_ == null) margProbX (pxy) else px_ val py_x = new MatrixD (pxy.dim1, pxy.dim2) for (i <- pxy.range1) py_x(i) = pxy(i) / px(i) py_x } // condProbY_X //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a joint probability matrix 'pxy', compute the "conditional probability" * for random variable 'X' given random variable 'Y', i.e, P(X = x_i|Y = y_j). * @param pxy the joint probability matrix * @param py_ the marginal probability vector for Y */ def condProbX_Y (pxy: MatriD, py_ : VectoD = null): MatriD = { val py = if (py_ == null) margProbY (pxy) else py_ val px_y = new MatrixD (pxy.dim1, pxy.dim2) for (j <- pxy.range2) px_y.setCol (j, pxy.col(j) / py(j)) px_y } // condProbX_Y //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a probability vector 'px', compute the "log-probability". * Requires each probability to be non-zero. * @param px the probability vector */ def logProb (px: VectoD): VectoD = px.map (- log2 (_)) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a probability vector 'px', compute the "entropy" of random * variable 'X'. * @see http://en.wikipedia.org/wiki/Entropy_%28information_theory%29 * @param px the probability vector */ def entropy (px: VectoD): Double = { var sum = 0.0 for (p <- px if p > 0.0) sum -= p * log2 (p) sum } // entropy //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a frequency vector 'nu', compute the "entropy" of random variable 'X'. * @see http://en.wikipedia.org/wiki/Entropy_%28information_theory%29 * @param nu the frequency vector */ def entropy (nu: VectoI): Double = { val tot = nu.sum.toDouble var sum = 0.0 for (c <- nu if c > 0) { val p = c / tot; sum -= p * log2 (p) } sum } // entropy //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a probability vector 'px', compute the " base-k entropy" of random * variable 'X'. * @see http://en.wikipedia.org/wiki/Entropy_%28information_theory%29 * @param px the probability vector * @param b the base for the logarithm */ def entropy (px: VectoD, b: Int): Double = { var sum = 0.0 for (p <- px if p > 0.0) sum -= p * logb (b, p) sum } // entropy //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a probability vector 'px', compute the "normalized entropy" of * random variable 'X'. * @see http://en.wikipedia.org/wiki/Entropy_%28information_theory%29 * @param px the probability vector */ def nentropy (px: VectoD): Double = { val k = px.dim // let the base k = # elements in probability vector var sum = 0.0 for (p <- px if p > 0.0) sum -= p * logb (k, p) sum } // nentropy //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given probability vectors 'px' and 'qx', compute the "relative entropy". * @param px the first probability vector * @param qx the second probability vector (requires qx.dim >= px.dim) */ def rentropy (px: VectoD, qx: VectoD): Double = { var sum = 0.0 for (i <- px.indices if px(i) > 0.0) sum += px(i) * log2 (px(i)/qx(i)) sum } // rentropy //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given probability vectors 'px' and 'qx', compute the "cross entropy". * @param px the first probability vector * @param qx the second probability vector (requires qx.dim >= px.dim) */ def centropy (px: VectoD, qx: VectoD): Double = { var sum = 0.0 for (i <- px.indices if px(i) > 0.0) sum -= px(i) * log2 (qx(i)) sum } // centropy //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a joint probability matrix 'pxy', compute the "joint entropy" * of random variables 'X' and 'Y'. * @param pxy the joint probability matrix */ def entropy (pxy: MatriD): Double = { var sum = 0.0 for (i <- pxy.range1; j <- pxy.range2) { val p = pxy(i, j) if (p > 0.0) sum -= p * log2 (p) } // for sum } // entropy //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a joint probability matrix 'pxy' and a conditional probability * matrix 'py_x', compute the "conditional entropy" of random variable 'X' * given random variable 'Y'. * @param pxy the joint probability matrix * @param px_y the conditional probability matrix */ def entropy (pxy: MatriD, px_y: MatriD): Double = { if (pxy.dim1 != px_y.dim1 || pxy.dim2 != px_y.dim2) flaw ("entropy", "joint and conditional probability matrices are not compatible") var sum = 0.0 for (i <- pxy.range1; j <- pxy.range2) { val p = pxy(i, j) if (p > 0.0) sum -= p * log2 (px_y(i, j)) } // for sum } // entropy //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a joint probability matrix 'pxy', compute the mutual information * for random variables 'X' and 'Y'. * @param pxy the probability matrix * @param px the marginal probability vector for X * @param py the marginal probability vector for Y */ def muInfo (pxy: MatriD, px: VectoD, py: VectoD): Double = { var sum = 0.0 for (i <- pxy.range1; j <- pxy.range2) { val p = pxy(i, j) if (p > 0.0) sum += p * log2 (p / (px(i) * py(j))) } // for sum } // muInfo //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Given a joint probability matrix 'pxy', compute the mutual information * for random variables 'X' and 'Y'. * @param pxy the probability matrix */ def muInfo (pxy: MatriD): Double = muInfo (pxy, margProbX (pxy), margProbY (pxy)) } // Probability object import Probability._ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ProbabilityTest` object is used to test the `Probability` object. * > runMain scalation.stat.ProbabilityTest */ object ProbabilityTest extends App { // coin experiment: probability for 0, 1, 2 heads, when flipping 2 coins val px = VectorD (.25, .5, .25) // dice experiment: probability for 2, 3, ... 11, 12, when rolling 2 dice val py = VectorD (1/36.0, 2/36.0, 3/36.0, 4/36.0, 5/36.0, 6/36.0, 5/36.0, 4/36.0, 3/36.0, 2/36.0, 1/36.0) // joint probability for coin and dice experiments val pxy = jointProbXY (px, py) println ("isProbability (" + px + ") = " + isProbability (px)) println ("isProbability (" + py + ") = " + isProbability (py)) println ("joint probability pxy = " + pxy) println ("isProbability (pxy) = " + isProbability (pxy)) val x = VectorD.range (0, 3) new Plot (x, px, null, "Plot px vs. x") // plot the pmf for random variable X val y = VectorD.range (2, 13) new Plot (y, py, null, "Plot py vs. y") // plot the pmf for random variable Y val mpx = margProbX (pxy) // marginal probability should be the same as px val mpy = margProbY (pxy) // marginal probability should be the same as py val px_y = condProbX_Y (pxy) // conditional probability P(X = x_i|Y = y_j) val py_x = condProbY_X (pxy) // conditional probability P(Y = y_j|X = x_i) println ("marginal probability mpx = " + mpx) println ("marginal probability mpy = " + mpy) println ("conditional probability px_y = " + px_y) println ("conditional probability py_y = " + py_x) val hx = entropy (px) // entropy of random variable X val hy = entropy (py) // entropy of random variable Y val hkx = nentropy (px) // nentropy of random variable X val hky = nentropy (py) // nentropy of random variable Y val hxy = entropy (pxy) // joint entropy of random variables X and Y val hx_y = entropy (pxy, px_y) // conditional entropy of random variables X given Y val hy_x = entropy (pxy.t, py_x) // conditional entropy of random variables Y given X val ixy = muInfo (pxy) // mutual information of random variables X given Y println ("entropy hx = " + hx) println ("entropy hy = " + hy) println ("nentropy hkx = " + hkx) println ("nentropy hky = " + hky) println ("joint entropy hxy = " + hxy) println ("conditional entropy hx_y = " + hx_y) println ("conditional entropy hy_x = " + hy_x) println ("mutual information ixy = " + ixy) } // ProbabilityTest //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ProbabilityTest2` provides upper bound for 'entropy' and 'nentropy'. * > runMain scalation.stat.ProbabilityTest2 */ object ProbabilityTest2 extends App { import Probability._ banner ("maximum entropy for k-dimensional probability vector") for (k <- 2 to 32) { println (s"max entropy for k = $k \tis " + log2 (k)) } // for banner ("maximum normalized entropy for k-dimensional probability vector") for (k <- 2 to 32) { println (s"max nentropy for k = $k \tis " + logb (k, k)) } // for } // ProbabilityTest2 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ProbabilityTest3` object is used to test the `Probability` class. * Plot entropy. * > runMain scalation.stat.ProbabilityTest3 */ object ProbabilityTest3 extends App { banner ("Test: ProbabilityTest3: Plot entropy") val _1 = VectorD.one (99) val p = VectorD.range (1, 100) / 100.0 val h = p.map (p => -p * log2 (p) - (1-p) * log2 (1-p)) new Plot (p, h, _1, "Plot of entropy h vs. p") val h2 = p.map (q => { val px = VectorD (q, 1-q); entropy (px) }) new Plot (p, h2, _1, "Plot of entropy h2 vs. p") } // ProbabilityTest3 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ProbabilityTest4` object is used to test the `Probability` class. * Plot probability and log-probability for binomial distributions. * > runMain scalation.stat.ProbabilityTest4 */ object ProbabilityTest4 extends App { import scalation.random.Binomial val p = 0.6 for (n <- 2 to 16) { val k = VectorD.range (0, n+1) val y = Binomial (p, n) val pp = VectorD (y.pmf (0)) // probability vector val lp = logProb (pp) // log-probability vector new Plot (k, pp, null, s"Plot for n = $n of pp vs. k", lines = true) new Plot (k, lp, null, s"Plot for n = $n of lp vs. k", lines = true) } // for } // ProbabilityTest4 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ProbabilityTest5` object is used to test the `Probability` class. * Plot entropy and normalize entropy for binomial distributions. * > runMain scalation.stat.ProbabilityTest5 */ object ProbabilityTest5 extends App { import scalation.random.Binomial for (n <- 1 to 16) { val p = VectorD.range (0, 100) / 100.0 val h = new VectorD (p.dim) // vector of entropies val hk = new VectorD (p.dim) // vector of normalized entropies for (i <- p.indices) { val y = Binomial (p(i), n) val pp = VectorD (y.pmf (0)) h(i) = entropy (pp) hk(i) = nentropy (pp) } // for new Plot (p, h, hk, s"Plot for n = $n of entropy h, hk vs. p", lines = true) } // for } // ProbabilityTest5 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ProbabilityTest6` object is used to test the `Probability` class. * It computes entropy, relative entropy and cross entropy and verifies thst * cross entropy == entropy + relative entropy * @see arxiv.org/pdf/0808.4111.pdf * > runMain scalation.stat.ProbabilityTest6 */ object ProbabilityTest6 extends App { val px = Array (VectorD (0.1, 0.9), VectorD (0.2, 0.8), VectorD (0.3, 0.7), VectorD (0.4, 0.6), VectorD (0.5, 0.5), VectorD (0.6, 0.4), VectorD (0.7, 0.3), VectorD (0.8, 0.2), VectorD (0.9, 0.1)) banner ("entropy") for (i <- px.indices) { println (s"entropy (${px(i)}) = ${entropy (px(i))}") } // for banner ("relative entropy (KL divergence)") for (i <- px.indices) { for (j <- px.indices) { val re = rentropy (px(i), px(j)) println (s"rentropy (${px(i)}, ${px(j)}) = $re") } // for println ("--------") } // for banner ("cross entropy") for (i <- px.indices) { for (j <- px.indices) { val ce = centropy (px(i), px(j)) println (s"centropy (${px(i)}, ${px(j)}) = $ce") assert (ce =~ entropy (px(i)) + rentropy (px(i), px(j)) ) } // for println ("--------") } // for } // ProbabilityTest6 object //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ProbabilityTest7` object is used to test the `Probability` class. * It computes joint, marginal and conditional probabilities as well as * measures of independence. * @see ocw.mit.edu/courses/mathematics/18-05-introduction-to-probability-and-statistics-spring-2014/readings/MIT18_05S14_Reading7a.pdf * > runMain scalation.stat.ProbabilityTest7 */ object ProbabilityTest7 extends App { // X - dice 1: 1, 2, 3, 4, 5, 6 // X2 - dice 2: 1, 2, 3, 4, 5, 6 // Y = X + X2: 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12 val nuxy = new MatrixI ((6, 11), 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1) println ("nuxy = " + nuxy) println ("1/6 = " + 1/6.0) println ("1/36 = " + 1/36.0) banner ("Joint Probability Distribution") val pxy = toProbability (nuxy) println ("Joint XY: pxy = " + pxy) banner ("Marginal Probability Distributions") val px = margProbX (pxy) val py = margProbY (pxy) println ("Marginal X: px = " + px) println ("Marginal Y: py = " + py) banner ("Conditional Probability Distributions") val py_x = condProbY_X (pxy, px) val px_y = condProbX_Y (pxy, py) println ("Y given X: = " + py_x) println ("X given Y: px_y = " + px_y) banner ("Independence: zero relative entropy, mutual information") val re = rentropy (px, py) val mi = muInfo (pxy) println (s"rentropy = $re") println (s"muInfo = $mi") banner ("Mutual Information = Information Gain") val hx = entropy (px) val hy = entropy (py) val hy_x = entropy (pxy, py_x) val hx_y = entropy (pxy, px_y) println ("Entropy: hx = " + hx) println ("Entropy: hy = " + hy) println ("CondEnt: hy_x = " + hy_x) println ("CondEnt: hx_y = " + hx_y) println ("mi = hx - hx_y = " + (hx - hx_y)) println ("mi = hy - hy_x = " + (hy - hy_x)) assert (mi =~ hx - hx_y) assert (mi =~ hy - hy_x) } // ProbabilityTest7 object