//:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** @author John Miller * @version 2.0 * @date Mon Jul 4 12:55:00 EDT 2022 * @see LICENSE (MIT style license file). * * @note Map Implemented Using Linear Hashing * * Split Home Buckets when load factor is too high * Merge Home Buckets when load factor is too low (not yet implemented) */ package scalation package database import scala.collection.mutable.{AbstractMap, ArrayBuffer} import scala.reflect.ClassTag //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `LinHashMap` class provides hash maps that use the Linear Hashing algorithm. * A hash table is created that is an expandable array-list (`ArrayBuffer`) of buckets. * Inserts may cause the allocation of overflow buckets as well as the splitting of * home buckets. Uses the SRI rule: Split, Rehash, and then Insert. * @tparam K the type of the keys contained in this hash map * @tparam V the type of the values assigned to keys in this hash map * @param order the number of slots per bucket * @param loadFac the (lower, upper) bound on the load factor (# keys over # home slots) */ class LinHashMap [K: ClassTag, V: ClassTag] (order: Int = 4, loadFac: (Double, Double) = (0.2, 1.2)) extends AbstractMap [K, V] with Serializable: var count = 0 // count # buckets accessed (performance) private val debug = debugf ("LinHashMap", true) // debug function //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `Bucket` inner class defines buckets that are stored in this hash table. */ class Bucket () extends Serializable: val key = Array.ofDim [K] (order) // array to hold keys val value = Array.ofDim [V] (order) // array to hold values var nKeys = 0 // number of active keys var next: Bucket = null // reference to next bucket in chain //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Find key k in this bucket, returning the corresponding value or * null if not found. * @param k the key to find in this bucket */ def find (k: K): V = val j = findPos (k) if j == -1 then null.asInstanceOf [V] else value(j) end find //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Find the position of key k in this bucket, returning its position in * the bucket, or -1 if not found. * @param k the key to find in this bucket */ inline def findPos (k: K): Int = key.indexOf (k) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Add the given key-value pair to this bucket, returning true if successful * and false otherwise (when this bucket is full). * @param k the key * @param v the value */ def add (k: K, v: V): Boolean = if nKeys < order then key(nKeys) = k value(nKeys) = v nKeys += 1 true else false end add //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Move the j-th element (key-value pair) to bucket b2, returning true * if successful and false otherwise (when bucket b2 is full). * Simply remove the j-th element when b2 is null. * @param j the j-th key-value pair * @param b2 the other bucket to move the pair to */ def move (j : Int, b2: Bucket): Boolean = val (ky, vl) = (key(j), value(j)) if b2 == null || b2.add (ky, vl) then nKeys -= 1 // remove element j if j != nKeys then key(j) = key(nKeys) // overwrite with last pair value(j) = value(nKeys) end if true else false end move //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Show/print the contents of this bucket. */ def show (): Unit = print ("[ " ) for j <- 0 until nKeys do print (s"${key(j)} . ") println ("]" ) end show end Bucket private var mod1 = 4 // modulus for low resolution hashing private var mod2 = 2 * mod1 // modulus for high resolution hashing private var isplit = 0 // index of the next bucket to split private var keyCount = 0 // counter for total number of keys /** The list of buckets making up this hash table. */ private val hTable = new ArrayBuffer [Bucket] () for i <- 0 until mod1 do hTable += new Bucket () //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `HTIterator` inner class supports iterating over all the elements * in a Hash-Table by traversing through the buckets in this hash table. * @param is the starting bucket chain (defaults to 0) * @param bs the starting home bucket (defaults to hTable(0)) * @param js the starting within node index (defaults to 0) */ class HTIterator (is: Int = 0, bs: Bucket = hTable(0), js: Int = 0) extends Iterator [(K, V)]: var (i, b, j) = (is, bs, js) def hasNext: Boolean = j < b.nKeys - 1 || b.next != null || is < size0 - 1 def next (): (K, V) = if j < b.nKeys - 1 then j += 1 // next position in bucket else if b.next != null then { b = b.next; j = 0 } // next overflow bucket else { i += 1; b = hTable(i); j = 0 } // next bucket chain (b.key(j), b.value(j)) end next end HTIterator //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return an iterator for retrieving all the elements in this hash table. * @see scala.collection.IterableOnce */ def iterator: Iterator [(K, V)] = new HTIterator () //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the value associated with the key by looking it up in this hash table. * @param key the key used for look up */ def get (key: K): Option [V] = find (key, hTable(h (key)), true) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Add one key-value pair into this hash table and return this (called by put). * Note: it splits the 'isplit' bucket chain when the load factor is exceeded. * @param elem the key-value pair to add/insert */ def addOne (elem: (K, V)): this.type = val (key, value) = elem keyCount += 1 // increment the key count val lf = loadFactor // compute the load factor debug ("addOne", s"$key -> $value, load factor = $lf") if lf > loadFac._2 then split () // split when load factor is too high var b = hTable(h (key)) // find space in bucket chain while { if b.nKeys < order then { b.add (key, value); return this } b.next != null } do b = b.next end while val bnew = new Bucket () // needs a new overflow bucket bnew.add (key, value) // put pair in new bucket b.next = bnew // add new bucket at end of chain this end addOne //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Subtract/remove the one element (key-value pair) with the given key. * Note: it merges home buckets when load factor is too low. * @param key the key whose element is to be removed */ def subtractOne (key: K): this.type = keyCount -= 1 // decrement the key count val lf = loadFactor // compute the load factor debug ("subtractOne", s"key = $key, load factor = $lf") if lf < loadFac._1 then merge () // merge when load factor is too low val bh = hTable(h (key)) // home bucket var b = bh // current bucket var bp = bh // previous bucket var removed = false cfor (! removed && b != null, {bp = b; b = b.next}) { // iterate through bucket chain val j = b.findPos (key) if j >= 0 then // position of key is found b.move (j, null) // move element j to null bucket if b != bh && b.nKeys == 0 then bp.next = b.next // remove empty overflow bucket end if removed = true // element successfully removed, end loop end if } // cfor debug ("subtractOne", s"could not find key $key to remove it") this end subtractOne //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Show/print this hash table. */ def show (): Unit = println ("LinHashMap") println ("-" * 60) for i <- hTable.indices do print (s"Bucket [ $i ] = ") var j = 0 var b = hTable(i) cfor (b != null, {b = b.next}) { if j > 0 then print (" \t\t --> ") b.show () j += 1 } // cfor end for println ("-" * 60) end show //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the size (order * number of home buckets) of this hash table. */ override def size: Int = order * (mod1 + isplit) inline def size0: Int = order * (mod1 + isplit) //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Split bucket chain 'isplit' by creating a new bucket chain at the end of the * hash table and redistributing the keys according to the high resolution hash * function 'h2'. Increment 'isplit'. If current split phase is complete, * reset 'isplit' to zero, and update the hash functions. */ private def split (): Unit = debug ("split", s"bucket chain $isplit") val bnew = new Bucket () // create a new home bucket hTable += bnew // add to end of hash table val b = hTable(isplit) // b is the bucket chain to be split val isp = isplit // original value of isplit isplit += 1 // increment to split pointer if isplit == mod1 then // has it reached end in current phase? isplit = 0 // if so, reset to zero mod1 = mod2 // double first hash function's modulus mod2 *= 2 // double second hash function's modulus end if rehash (isp, b, bnew) // rehash some keys in chain b to bnew show () end split //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Merge bucket chain 'isplit-1' by merging this bucket chain with the last * bucket chain. Deccrement 'isplit'. If 'isplit' becomes negative, * set back to previous phase. */ private def merge (): Unit = debug ("merge", s"bucket chain $isplit") val bend = hTable.last // reference the last bucket chain hTable -= bend // remove the last chain from the table isplit -= 1 // decrement to split pointer if isplit < 0 then // reset back to previous phase mod2 = mod1 mod1 /= 2 isplit = mod1 - 1 end if val bh = hTable(isplit) // home bucket for merge chain var b = bh cfor (b.next != null, {b = b.next}) {} // move to the end of this chain b.next = bend // link last chain to merge chain show () end merge //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Rehash/redistribute the key-value pairs in the bucket chain for b1. * Based on h2 some pairs may need to move to bucket chain for b2. * @param isp the original value of isplit (before the split) * @param b1 the first bucket chain's home bucket (split bucket) * @param b2 the second bucket chain's home bucket (new bucket) */ private def rehash (isp: Int, b1: Bucket, b2: Bucket): Unit = var b = b1 // bucket chain for isp var c = b2 // bucket chain at end of hash table var bp = b1 // previous bucket cfor (b != null, {bp = b; b = b.next}) { // iterate through isp bucket chain var j = 0 while b.nKeys > 0 && j < b.nKeys do val keyj = b.key(j) // j-th key in bucket b val i2 = h2(keyj) // recompute its hash function debug ("rehash", s"keyj = $keyj, is = $isp, i2 = $i2") if i2 != isp then // see if the hash has changed? if ! b.move (j, c) then // move j-th pair to bucket chain c val b3 = new Bucket () // requires a new overflow bucket c.next = b3 // link it in th chain c = b3 // set c to new overflow bucket b.move (j, c) // move j-th pair to overflow bucket else if b != b1 && b.nKeys == 0 then bp.next = b.next // remove empty overflow bucket end if end if else j += 1 end if end while } // cfor end rehash //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Return the load factor (# keys over # home slots) for this hash table. */ private def loadFactor: Double = keyCount / size.toDouble //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Find the key in the bucket chain that starts with home bucket bh and * return the current value stored stored for the key. * @param key the key to find * @param bh the given home bucket * @param by_get whether 'find' is called from 'get' (performance monitoring) */ private def find (key: K, bh: Bucket, by_get: Boolean): Option [V] = var b = bh var res: Option [V] = None cfor (b != null && res == None, {b = b.next}) { if by_get then count += 1 val result = b.find (key) if result != null then res = Some (result) } // cfor res end find //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Hash the key using the low resolution hash function and if needed, the * high resolution hash function. Return the location of the bucket chain * containing the key-value pair. * @param key the key to hash */ private def h (key: K): Int = val i = key.hashCode () % mod1 if i < isplit then h2 (key) else i end h //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** Hash the key using the high resolution hash function. Return the location * of the bucket chain containing the key-value pair. * @param key the key to hash */ private def h2 (key: K): Int = key.hashCode () % mod2 end LinHashMap //:::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::::: /** The `linHashMapTest` main function used for testing the `LinHashMap` class. * > runMain scalation.database.linHashMapTest */ @main def linHashMapTest (): Unit = import scalation.random.Randi0 val RANDOMLY = false val totalKeys = 50 val ht = new LinHashMap [Int, Int] () banner ("Insert Keys") if RANDOMLY then val rng = Randi0 (2 * totalKeys) for i <- 1 to totalKeys by 2 do ht.put (rng.igen, i * i) else for i <- 1 to totalKeys by 2 do ht.put (i, i * i) end if ht.show () banner ("Find Keys") for i <- 0 to totalKeys do println (s"key = $i, value = ${ht.get (i)}") banner ("Delete Keys") ht -= 41 ht.show () ht -= 33 ht.show () banner ("Analysis Performance") println (s"Average number of buckets accessed = ${ht.count / totalKeys.toDouble}") end linHashMapTest