//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.6 * @date Fri Feb 16 16:14:34 EST 2018 * @see LICENSE (MIT style license file). * * @title Example Dataset: Continuous Version of Play Tennis */ package scalation.analytics.classifier import scala.collection.mutable.Set import scalation.linalgebra.{MatrixD, MatrixI, VectoI} import scalation.util.banner //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ExampleTennisCont` object is used to test all integer based classifiers. * This is the well-known classification problem on whether to play tennis * based on given weather conditions. Applications may need to slice 'xy'. * The 'Cont' version uses continuous values for Temperature and Humidity. *

* val x = xy.sliceCol (0, 4) // columns 0, 1, 2, 3 * val y = xy.col (4) // column 4 *

* @see euclid.nmu.edu/~mkowalcz/cs495f09/slides/lesson004.pdf * @see sefiks.com/2018/05/13/a-step-by-step-c4-5-decision-tree-example */ object ExampleTennisCont { // combined data matrix [ x | y ] // dataset ---------------------------------------------------------------- // x0: Outlook: Rain (0), Overcast (1), Sunny (2) // x1: Temperature: Continuous // x2: Humidity: Continuous // x3: Wind: Weak (0), Strong (1) // y: the response/classification decision // variables/features: x0 x1 x2 x3 y val xy = new MatrixD ((14, 5), 2, 85, 85, 0, 0, // day 1 2, 80, 90, 1, 0, // day 2 1, 83, 78, 0, 1, // day 3 0, 70, 96, 0, 1, // day 4 0, 68, 80, 0, 1, // day 5 0, 65, 70, 1, 0, // day 6 1, 64, 65, 1, 1, // day 7 2, 72, 95, 0, 0, // day 8 2, 69, 70, 0, 1, // day 9 0, 75, 80, 0, 1, // day 10 2, 75, 70, 1, 1, // day 11 1, 72, 90, 1, 1, // day 12 1, 81, 75, 0, 1, // day 13 0, 71, 80, 1, 0) // day 14 val fn = Array ("Outlook", "Temp", "Humidity", "Wind") // feature names val conts = Set (1, 2) // set of continuous features val cn = Array ("No", "Yes") // class names for y val k = cn.size // number of classes val x = xy.sliceCol (0, 4) // columns 0, 1, 2, 3 val y = xy.col (4).toInt // column 4 } // ExampleTennisCont object import ClassifierReal.analyzer import ExampleTennisCont._ import Round.roundMat //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ExampleTennisContTest` test several classifiers on the Tennis dataset. * Tests all classes that extend from `ClassifierReal`. * > runMain scalation.analytics.classifier.ExampleTennisContTest */ object ExampleTennisContTest extends App { banner ("NullModel") val nm = new NullModel (y) ClassifierInt.analyze (nm) println (nm.summary (nm.parameter)) banner ("NaiveBayesR") val nb = new NaiveBayesR (x, y) analyzer (nb) println (nb.summary (nb.parameter)) banner ("SimpleLogisticRegression") val srg = new SimpleLogisticRegression (x, y) analyzer (srg) println (srg.summary (srg.parameter)) banner ("LogisticRegression") val lrg = new LogisticRegression (x, y) analyzer (lrg) println (lrg.summary (lrg.parameter)) banner ("SimpleLDA") val sda = new SimpleLDA (x.col(0), y) analyzer (sda) println (sda.summary (sda.parameter)) banner ("LDA") val lda = new LDA (x, y) analyzer (lda) println (lda.summary (lda.parameter)) banner ("KNN_Classifier") val knn = new KNN_Classifier (x, y) analyzer (knn) println (knn.summary (knn.parameter)) banner ("DecisionTreeC45") val dtc = new DecisionTreeC45 (x, y, conts = conts) analyzer (dtc) println (dtc.summary (dtc.parameter)) banner ("RandomForest") val rf = new RandomForest (x, y, conts = conts) analyzer (rf) println (rf.summary (rf.parameter)) banner ("SupportVectorMachine") val svm = new SupportVectorMachine (x, y) analyzer (svm) println (svm.summary (svm.parameter)) } // ExampleTennisContTest object