//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 1.5 * @date Wed May 9 13:14:48 EDT 2018 * @see LICENSE (MIT style license file). * * Model-Free Markov Decision Processes * @see https://pdfs.semanticscholar.org/968b/ab782e52faf0f7957ca0f38b9e9078454afe.pdf */ package scalation.state import scala.collection.mutable.Map import scalation.linalgebra.{MatriD, VectoD} import scalation.linalgebra.gen.HMatrix3 import scalation.random.Randi0 import scalation.graphalytics.{Tree, TreeNode} //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `MDP` class provides Partially-Observable Markov Decision Processes. * @param n the number of states S = {0, 1, ... n-1} * @param p the number of actions A = {0, 1, ... p-1} * @param m the number of observation O = {0, 1, ... m-1} * @param t the transitional conditional probability t(s, a, q) = P(q | s, a), * where s, q in S, a in A * @param z the observational conditional probability z(s, z, o) = P(o | s, a), * where o in O, s in S, a in A * @param r the reward matrix r(s, a) * @param g the discount factor in (0, 1) */ class MDP (n: Int, p: Int, m: Int, t: HMatrix3 [Double], z: HMatrix3 [Double], r: MatriD, g: Double) { private val bgen = Randi0 (n-1) // random variate generator private val b0 = bgen.gen // initial belief state probability value private var bid = -1 // belief id private val bmap = Map [Int, Double] () // map from bid to belief state vector private val root = makeNode (b0) // make and map the root node private val xt = new Tree (root, 0.0) // make x-tree with the given root private val pi = Map [Double, Int] () // policy maps belief state into action private val s_r = 0 until n private val a_r = 0 until p private val o_r = 0 until m def optimize (): Set [Double] = { var b = b0 // the current belief state vector val al = Set [Double] () // initial set of alpha-vectors var cont = true while (cont) { sample (al) backup (al, b) prune (al) if (terminate) cont = false // check termination condition } // while al } // optimize def makeNode (b: Double): TreeNode = { bid += 1 bmap += bid -> b new TreeNode (bid, 0) } // makeNode def terminate: Boolean = { true } // terminate def sample (al: Set [Double]) { val (b, a, o) = pick // pick a belief, action and observation val bb = tau (b, a , o) // create a new belief state bb val node = makeNode (bb) xt.add (node) // make bb a child of b in the x-tree } // sample def pick: (Int, Int, Int) = { (0, 0, 0) // FIX } // pick def tau (b: Int, a: Int, o: Int): Double = { -1.0 // FIX } // tau def backup (al: Set [Double], b: Double) { for (a <- a_r; o <- o_r) { al(a, o) = VectorD (for (alp <- al) yield alp * tau (b, a, o)).argmax } // for for (a <- a_r; s <- s_r) { } // for alb = VectorD (for (a <- a_r) yield al(a) * b).argmax al += alp } // backup def prune (al: Set [Double]) { } // prune } // MDP class object MDPTest extends App { } // MDPTest object