//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author Khalid Jahangeer * @version 1.6 * @date Sat Aug 8 20:26:34 EDT 2015 * @see LICENSE (MIT style license file). */ // U N D E R D E V E L O P M E N T package scalation.analytics.recommender import scalation.analytics.BASE_DIR import scalation.linalgebra.{VectorD, MatrixD, VectorI, MatrixI, SVD, SVDImputed, SVDReg} import scala.math.{abs, round, sqrt} import scalation.random.{PermutedVecI, Randi} import scalation.stat.{Statistic, vectorD2StatVector} import scalation.math._ import scalation.⁄ import scalation.plot.Plot import scalation.stat.StatVector._ import scalation.util._ //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The ModelBasedRecommender class is used to perform predictions based on * Model based Collaborative Filtering techniques (Pure SVD, Regularized SVD) * @param input original matrix * @param m number of rows * @param n number of columns */ class ModelBasedRecommender (input: MatrixI, m: Int, n: Int) extends Recommender { private val ratings = makeRatings(input, m, n) // original ratings matrix private var training = new MatrixD(ratings.dim1, ratings.dim2) // training dataset private var copy_training = new MatrixD(ratings.dim1, ratings.dim2) // copy of training dataset private var predicted = new MatrixD(m, n) // Matrix for storing SVD predicted values private val k = 10 // no of factors for SVD //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Returns a matrix with replacing all values with 0 beyond the interval specified * corresponds to Phase 3 of the expermiments * @param limit interval start point * @param input original data matrix */ def zRatings (limit: Int, input: MatrixI): Unit = { for (i <- input.range1) { if (i <= limit) training(input (i, 0), input (i, 1)) = input(i, 2) else training(input (i, 0), input (i, 1)) = 0.0 }// for copy_training = training.copy } //zRatings //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Generates the training matrix for the dataset for Phase 2 * @param exl vector of index values which will be excluded in the train * @param input original data matrix */ def genTrainTest (exl: VectorI, input: MatrixI): MatrixD = { for (i <- input.range1) { if (exl.indexOf(i) != -1) training(input (i, 0), input (i, 1)) = input(i, 2) else training(input (i, 0), input (i, 1)) = 0.0 }// for copy_training = training.copy ratings - training } //zRatings //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Generates the training matrix for the dataset for Phase 1 * @param train : training data matrix */ def genTrain2 (train: MatrixI): Unit = { for (i <- train.range1) training(train(i, 0), train(i, 1)) = train(i, 2) copy_training = training.copy } // genTrain2 //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Column mean Imputation of null values in the input matrix */ def impute: Unit = { val colmeans = colMeans // store the colmeans/min to imputed for all 0 value entries for (i <- training.range1; j <- training.range2) { if (training(i, j) =~ 0.0) training(i, j) = colmeans(j) // column mean imputation } // for } //colImputation //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a column normalized version of 'this' matrix. * for all values that are not 0 replace with self - row mean (mean calculation doesnot include 0s) */ def normalize (): Unit = { var norm_a = new MatrixD (training.dim1, training.dim2) for (i <- training.range1; j <- training.range2) { var temp = 0.0 if (training(i).filter (_ !=~ 0.0).sum !=~ 0.0) temp = training(i).filter (_ !=~ 0.0).mean if (training(i, j) !=~ 0.0) norm_a(i, j) = training(i, j) - temp } // for training = norm_a.copy } // normalize //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a column denormalized version of 'this' matrix and return the denormalized value used. */ def denormalize (i: Int): Double = { if (copy_training(i).filter (_ !=~ 0.0).sum !=~ 0.0) copy_training(i).filter (_ !=~ 0.0).mean else 0.0 } // denormalize //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Create a vector of mean values of each column without including 0's */ def colMeans: VectorD = { val minimum = minNZ // minimum value of input matrix var colmeans = new VectorD (training.dim2) // impute column mean for all 0 value entries in norm for (j <- training.range2) { if (training.col(j).filter (_ != 0.0).size == 0) colmeans(j) = minimum // if a column has all 0's then assign minimum value else colmeans(j) = training.col(j).filter (_ !=~ 0.0).mean } // for colmeans } // colMeans //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the minimum non-zero element of the entire matrix */ def minNZ: Double = { var mn = training(0, 0) for (i <- training.range1; j <- training.range2 if training(i, j) > 0.0) { if (training(i, j) < mn) mn = training(i, j) } // for mn } // minNZ //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Calculate the final predicted matrix based on pure singular value decomposition */ def SVDImp: Unit = { val svdI = new SVDImputed (training, k) // constructor call val ia = SVDImputed.impute (training) // column mean imputation val na = svdI.normalize (ia) // centralized normalization val factors = new SVD(na.t).factor123 () // SVD decomposition svdI.train (factors) // predicted values for (i <- predicted.range1; j <- predicted.range2) predicted(i, j) = svdI.predict(i, j) } //SVDImp //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Calculate the final predicted matrix based on regularized Singular value decompsition */ def SVDR: Unit = { val svdreg = new SVDReg(training, k) svdreg.factor for (i <- predicted.range1; j <- predicted.range2) predicted(i, j) = svdreg.predict(i, j) } //SVDR //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Generate a rating based on Singular value Decompostion of matrix * @param i user * @param j item */ def rate (i: Int, j: Int): Double = predicted(i, j) } //ModelBasedRecommender //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ModelBasedRecommenderTest` object is used to test the `Recommender` Trait using the MovieLens dataset. * Corresponds to Phase 1 of the Experiments * > run-main scalation.analytics.recommender.ModelBasedRecommenderTest */ object ModelBasedRecommenderTest extends App { val data_file = BASE_DIR +"recommender" + ⁄ +"sorted_data.txt" val (m, n) = (943, 1682) MatrixI.setSp ('\t') var input = MatrixI(data_file) input.setCol (0, input.col(0) -1) input.setCol (1, input.col(1) -1) val recMB = new ModelBasedRecommender (input, m, n) val train_file = BASE_DIR + "recommender" + ⁄ +"u2.base" // replace u(1-5).base val test_file = BASE_DIR + "recommender" + ⁄ +"u2.test" // replace u(1-5).test var train = MatrixI(train_file) train.setCol (0, train.col(0) -1) train.setCol(1, train.col(1) -1) var tester = MatrixI(test_file) tester.setCol (0, tester.col(0) -1) tester.setCol (1, tester.col(1) -1) recMB.genTrain2 (train) for (i <- 0 until 1) { val t = time { println ("Training Time") val t1 = time { // training time recMB.SVDImp // Pure SVD // recMB.SVDR // regualarized SVD } // time println ("Prediction Time") // testing time val t2 = time { recMB.error_metrics(tester) } } //time } // for } // ModelBasedRecommenderTest //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ModelBasedRecommenderTest2` object is used to test the `Recommender` trait * using the MovieLens dataset. Corresponds to Phase 2 of the Experiments * > run-main scalation.analytics.recommender.ModelBasedRecommenderTest2 */ object ModelBasedRecommenderTest2 extends App { val data_file = BASE_DIR + "recommender" + ⁄ +"sorted_data.txt" val kfold = 5 // value for k-fold cross-validation val diff = new VectorD (kfold) // mae values val rdiff = new VectorD (kfold) // rounded mae values val diff2 = new VectorD (kfold) // rmse value val rdiff2 = new VectorD (kfold) // rounded rmse values val hit = new VectorD (kfold) // number of successful predictions MatrixI.setSp('\t') var input = MatrixI(data_file) input.setCol(0, input.col(0) -1) input.setCol(1, input.col(1) -1) val foldsize = input.dim1/kfold val (m, n) = (943, 1682) val recMB = new ModelBasedRecommender(input, m, n) for(x <- 0 until 1) { val t = time { val indx = List.range(0, input.dim1) val rand_index = scala.util.Random.shuffle(indx) val index = new VectorI(input.dim1) val fold = VectorD.range(0, kfold) for (i <- 0 until input.dim1) index(i) = rand_index(i) // create a vector of a randomly permuted matrix for (i <- 0 until kfold) { val excl = new VectorI(foldsize) // Vector to track the exclusion ratings println(s"--------------$i------------------") for (j <- 0 until excl.dim) excl(j) = index(i * foldsize + j) val tester = recMB.genTrainTest(excl, input) println("Training Time") val t1 = time { // training time recMB.SVDImp // Pure SVD //recMB.SVDR // regularized SVD } println("Prediction time") val t2 = time { // prediction time recMB.crossValidate(tester) } val stats = recMB.getStats diff(i) = stats(0).ma rdiff(i) = stats(1).ma diff2(i) = stats(0).rms rdiff2(i) = stats(1).rms hit(i) = stats(2).mean * 100 for (j <- 0 until 3) stats(j).reset() } // for println ("MAE = " + diff.mean) println ("MAE rounded = " + rdiff.mean) println ("RMSE = " + diff2.mean) println ("RMSE rounded = " + rdiff2.mean) println ("HIT = " + hit.mean) } // time } // for } //ModelBasedRecommenderTest2 //::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `ModelBasedRecommenderTest2` object is used to test the `Recommender` Trait using the MovieLens dataset. * Corresponds to Phase 3 of the experiments * > run-main scalation.analytics.recommender.ItemBasedRecommenderTest3 */ object ModelBasedRecommenderTest3 extends App { val data_file = BASE_DIR +"recommender" + ⁄ +"sorted_data.txt" val kfold = 5 // value for k-fold cross-validation val INTERVALS = 100 // time intervals for observation of statistics val INT_SIZE = 1000 // no of ratings in each interval val INT_START = 75 // starting point of the interval val diff = new VectorD (INTERVALS - INT_START) // MAE val rdiff = new VectorD (INTERVALS - INT_START) // MAE rounded val diff2 = new VectorD (INTERVALS - INT_START) // RMSE val rdiff2 = new VectorD (INTERVALS - INT_START) // RMSE rounded val hit = new VectorD (INTERVALS - INT_START) // number of successful predictions MatrixI.setSp('\t') var input = MatrixI(data_file) input.setCol(0, input.col(0) -1) input.setCol(1, input.col(1) -1) val (m, n) = (943, 1682) val recMB = new ModelBasedRecommender (input, m, n) val t_idx = VectorD.range (INT_START, INTERVALS) for (i <- INT_START until INTERVALS) { recMB.zRatings((i-1) * INT_SIZE, input) // get Zeroes Rating matrix println(i) recMB.SVDImp // Pure SVD // recMB.SVDR // regularized SVD recMB.test ((i-1) * INT_SIZE, i * INT_SIZE, input) val stats = recMB.getStats diff(i-INT_START) = stats(0).ma rdiff(i-INT_START) = stats(1).ma diff2(i-INT_START) = stats(0).rms rdiff2(i-INT_START) = stats(1).rms hit(i-INT_START) = stats(2).mean * 100 for (j <- 0 until 3) stats(j).reset() } // for println (diff.mean) println (rdiff.mean) println (diff2.mean) println (rdiff2.mean) println (hit.mean) println (diff) println (rdiff) println (hit) new Plot (t_idx, diff, rdiff, "DIFF") new Plot (t_idx, hit, null, "HIT") } // UserBasedRecommenderTest3