Intro

keyundefinedHash Functionvaluekey \xrightarrow{\text{Hash Function}} value

What is a hash function?

Hash Collisions

Hash Collision

Screen Shot 2022-06-26 at 12.42.03 PM
Screen Shot 2022-06-26 at 12.42.33 PM

Hash table in different languages: Refer to this page for a comprehensive

Implement a Hash table

Note: Both set and get functions are O(1)\text{O(1)}. In case of small memory size (size), where collisions could happen, the get can become O(n)\text{O(n)}. But in practice, with enough size memory, this implementation is O(1)\text{O(1)} and runs fast.

class HashTable {
  constructor(size){
    this.data = new Array(size);
    // this.data = [];
  }

  _hash(key) {
    let hash = 0;
    for (let i =0; i < key.length; i++){
        hash = (hash + key.charCodeAt(i) * i) % this.data.length
    }
    return hash;
  }

  set(key, value) {
    let address = this._hash(key);
    if (!this.data[address]) {
      this.data[address] = [];
    }
    this.data[address].push([key, value]);
    return this.data;
  }

  get(key){
    const address = this._hash(key);
    const currentBucket = this.data[address]
    if (currentBucket) {
      for(let i = 0; i < currentBucket.length; i++){
        if(currentBucket[i][0] === key) {
          return currentBucket[i][1]
        }
      }
    }
    return undefined;
  }
}

const myHashTable = new HashTable(50);
myHashTable.set('grapes', 10000)
myHashTable.get('grapes')
myHashTable.set('apples', 9)
myHashTable.get('apples')

Python implementation

# implement a hash table
class HashTable:
    def __init__(self, size):
        self.data = [None] * size

    def _hash(self, key):
        hash = 0
        for i in range(len(key)):
            hash = (hash + ord(list(key)[i]) * i) % len(self.data)
        return hash

    def set(self, key, value):
        address = self._hash(key)
        if self.data[address] is None:
            self.data[address] = [key, value]
        else:
            self.data[address] = [self.data[address], [key, value]]

    def get(self, key):
        address = self._hash(key)
        current_bucket = self.data[address]
        if current_bucket[0] != key:
            for i in range(len(current_bucket)):
                if current_bucket[i][0] == key:
                    value = current_bucket[i][1]
                    break
        else:
            value = current_bucket[1]
        return value
        


hasht = HashTable(5)
hasht.set('grapes', 10000)
hasht.set('grapes', 10000)
hasht.set('apples', 15000)
hasht.set('orange', 20000)
print(hasht.data)
print(hasht.get('grapes'))
print(hasht.get('apples'))
print(hasht.get('orange'))

Iterate through keys

We want to add another method to our hash table implementation above to be able to loop through the keys and give us all the keys.

class HashTable {
  constructor(size){
    this.data = new Array(size);
    // this.data = [];
  }

  _hash(key) {
    let hash = 0;
    for (let i =0; i < key.length; i++){
        hash = (hash + key.charCodeAt(i) * i) % this.data.length
    }
    return hash;
  }

  set(key, value) {
    let address = this._hash(key);
    if (!this.data[address]) {
      this.data[address] = [];
    }
    this.data[address].push([key, value]);
    return this.data;
  }

  get(key){
    const address = this._hash(key);
    const currentBucket = this.data[address]
    if (currentBucket) {
      for(let i = 0; i < currentBucket.length; i++){
        if(currentBucket[i][0] === key) {
          return currentBucket[i][1]
        }
      }
    }
    return undefined;
  }
  
	keys()  {
		if  (!this.data.length)  {
			return  undefined
		}
		let result =  []
		// loop through all the elements
		for  (let i =  0; i <  this.data.length; i++)  {
			// if it's not an empty memory cell
			if  (this.data[i]  &&  this.data[i].length)  {
				// but also loop through all the potential collisions
				if  (this.data.length >  1)  {
					for  (let j =  0; j <  this.data[i].length; j++)  {
					result.push(this.data[i][j][0])
					}
				}  else  {
				result.push(this.data[i][0])
				}
			}
		}
		return result;
		}
}

const myHashTable = new HashTable(50);
myHashTable.set('grapes', 10000)
myHashTable.set('grapes', 10000)
myHashTable.get('grapes')
myHashTable.set('apples', 9)
myHashTable.get('apples')
myHashTable.keys()

Python implementation

# implement a hash table
class HashTable:
    def __init__(self, size):
        self.data = [None] * size

    def _hash(self, key):
        hash = 0
        for i in range(len(key)):
            hash = (hash + ord(list(key)[i]) * i) % len(self.data)
        return hash

    def set(self, key, value):
        address = self._hash(key)
        if self.data[address] is None:
            self.data[address] = [key, value]
        else:
            self.data[address] = [self.data[address], [key, value]]

    def get(self, key):
        address = self._hash(key)
        current_bucket = self.data[address]
        if current_bucket[0] != key:
            for i in range(len(current_bucket)):
                if current_bucket[i][0] == key:
                    value = current_bucket[i][1]
                    break
        else:
            value = current_bucket[1]
        return value

    def keys(self):
        all_keys = []
        for i in range(len(self.data)):
            if self.data[i] is not None:
                if isinstance(self.data[i][0], list):
                    for j in range(len(self.data[i])):
                        all_keys.append(self.data[i][j][0])
                else:
                    all_keys.append(self.data[i][0])
        return all_keys
        


hasht = HashTable(5)
hasht.set('grapes', 10000)
hasht.set('grapes', 10000)
hasht.set('apples', 15000)
hasht.set('orange', 20000)
print(hasht.data)
print(hasht.get('grapes'))
print(hasht.get('apples'))
print(hasht.get('orange'))
print(hasht.keys())

Note: This shows the downside of hash tables. To get the keys, we have to loop through all memory space even though not all the buckets might have data. In arrays, we just have to loop through the size of of the array. But in hash tables, because of random assignment to different memory buckets, we have to check all the buckets, even when they’re not occupied. The larger the size of the hash table, the longer this loop will take.

Hash tables vs. Arrays

Arrays Hash Tables
search O(n)\text{O(n)} O(1)\text{O(1)}
lookup O(1)\text{O(1)} O(1)\text{O(1)}
insert O(n)\text{O(n)} O(1)\text{O(1)}
delete O(n)\text{O(n)} O(1)\text{O(1)}
push O(1)\text{O(1)}

Hash table pros:

Hash table cons:

Exercise: (Google interview question)

Given an array, return the first recurring element. For example:

//Google Question
//Given an array = [2,5,1,2,3,5,1,2,4]:
//It should return 2

//Given an array = [2,1,1,2,3,5,1,2,4]:
//It should return 1

//Given an array = [2,3,4,5]:
//It should return undefined


function firstRecurringCharacter(input) {
  for (let i = 0; i < input.length; i++) {
    for (let j = i + 1; j < input.length; j++) {
      if(input[i] === input[j]) {
        return input[i];
      }
    }
  }
  return undefined
}

function firstRecurringCharacter2(input) {
  let map = {};
  for (let i = 0; i < input.length; i++) {
    if (map[input[i]] !== undefined) {
      return input[i]
    } else {
      map[input[i]] = i;
    }
  }
  return undefined
}

firstRecurringCharacter2([1,5,5,1,3,4,6])


//Bonus... What if we had this:
// [2,5,5,2,3,5,1,2,4]
// return 5 because the pairs are before 2,2

Python implementation

# Google Question: Given an array, return the first recurring element.

# Example:
# --------
# Given an array = [2,5,1,2,3,5,1,2,4]:
# It should return 2
# Given an array = [2,1,1,2,3,5,1,2,4]:
# It should return 1
# Given an array = [2,3,4,5]:
# It should return undefined

def count_recurring_character(arr):
    char_count = {}
    for i in arr:
        if i in char_count.keys():
            char_count[i] += 1
        else:
            char_count[i] = 1
    return char_count

def find_first_second_distance(key, arr):
    first_index = arr.index(key)
    del arr[first_index]
    second_index = arr.index(key)
    return second_index - first_index

def find_first_recurring_character(arr):
    char_count = count_recurring_character(arr)
    key = None
    min_distance = 1000
    for k, v in char_count.items():
        if v > 1:
            index_distance = find_first_second_distance(k, arr)
            if index_distance < min_distance:
                min_distance = index_distance
                key = k
    return key
            

arr = [2,5,1,2,3,5,1,2,4]
print(find_first_recurring_character(arr))