Different languages have different names for hash tables with a slight variation on the hash table objects.:
Hash tables are widely used in computer science. You see them a lot in:
In hash tables, instead of index (as in arrays), we get to set a key value pair. The key is used as the index to find the value in the memory.
The mapping from key to values is done through a hash function,
Hash table in different languages: Refer to this page for a comprehensive
Note: Both set and get functions are . In case of small memory size (size), where collisions could happen, the get can become . But in practice, with enough size memory, this implementation is and runs fast.
class HashTable {
constructor(size){
this.data = new Array(size);
// this.data = [];
}
_hash(key) {
let hash = 0;
for (let i =0; i < key.length; i++){
hash = (hash + key.charCodeAt(i) * i) % this.data.length
}
return hash;
}
set(key, value) {
let address = this._hash(key);
if (!this.data[address]) {
this.data[address] = [];
}
this.data[address].push([key, value]);
return this.data;
}
get(key){
const address = this._hash(key);
const currentBucket = this.data[address]
if (currentBucket) {
for(let i = 0; i < currentBucket.length; i++){
if(currentBucket[i][0] === key) {
return currentBucket[i][1]
}
}
}
return undefined;
}
}
const myHashTable = new HashTable(50);
myHashTable.set('grapes', 10000)
myHashTable.get('grapes')
myHashTable.set('apples', 9)
myHashTable.get('apples')
Python implementation
# implement a hash table
class HashTable:
def __init__(self, size):
self.data = [None] * size
def _hash(self, key):
hash = 0
for i in range(len(key)):
hash = (hash + ord(list(key)[i]) * i) % len(self.data)
return hash
def set(self, key, value):
address = self._hash(key)
if self.data[address] is None:
self.data[address] = [key, value]
else:
self.data[address] = [self.data[address], [key, value]]
def get(self, key):
address = self._hash(key)
current_bucket = self.data[address]
if current_bucket[0] != key:
for i in range(len(current_bucket)):
if current_bucket[i][0] == key:
value = current_bucket[i][1]
break
else:
value = current_bucket[1]
return value
hasht = HashTable(5)
hasht.set('grapes', 10000)
hasht.set('grapes', 10000)
hasht.set('apples', 15000)
hasht.set('orange', 20000)
print(hasht.data)
print(hasht.get('grapes'))
print(hasht.get('apples'))
print(hasht.get('orange'))
We want to add another method to our hash table implementation above to be able to loop through the keys and give us all the keys.
class HashTable {
constructor(size){
this.data = new Array(size);
// this.data = [];
}
_hash(key) {
let hash = 0;
for (let i =0; i < key.length; i++){
hash = (hash + key.charCodeAt(i) * i) % this.data.length
}
return hash;
}
set(key, value) {
let address = this._hash(key);
if (!this.data[address]) {
this.data[address] = [];
}
this.data[address].push([key, value]);
return this.data;
}
get(key){
const address = this._hash(key);
const currentBucket = this.data[address]
if (currentBucket) {
for(let i = 0; i < currentBucket.length; i++){
if(currentBucket[i][0] === key) {
return currentBucket[i][1]
}
}
}
return undefined;
}
keys() {
if (!this.data.length) {
return undefined
}
let result = []
// loop through all the elements
for (let i = 0; i < this.data.length; i++) {
// if it's not an empty memory cell
if (this.data[i] && this.data[i].length) {
// but also loop through all the potential collisions
if (this.data.length > 1) {
for (let j = 0; j < this.data[i].length; j++) {
result.push(this.data[i][j][0])
}
} else {
result.push(this.data[i][0])
}
}
}
return result;
}
}
const myHashTable = new HashTable(50);
myHashTable.set('grapes', 10000)
myHashTable.set('grapes', 10000)
myHashTable.get('grapes')
myHashTable.set('apples', 9)
myHashTable.get('apples')
myHashTable.keys()
Python implementation
# implement a hash table
class HashTable:
def __init__(self, size):
self.data = [None] * size
def _hash(self, key):
hash = 0
for i in range(len(key)):
hash = (hash + ord(list(key)[i]) * i) % len(self.data)
return hash
def set(self, key, value):
address = self._hash(key)
if self.data[address] is None:
self.data[address] = [key, value]
else:
self.data[address] = [self.data[address], [key, value]]
def get(self, key):
address = self._hash(key)
current_bucket = self.data[address]
if current_bucket[0] != key:
for i in range(len(current_bucket)):
if current_bucket[i][0] == key:
value = current_bucket[i][1]
break
else:
value = current_bucket[1]
return value
def keys(self):
all_keys = []
for i in range(len(self.data)):
if self.data[i] is not None:
if isinstance(self.data[i][0], list):
for j in range(len(self.data[i])):
all_keys.append(self.data[i][j][0])
else:
all_keys.append(self.data[i][0])
return all_keys
hasht = HashTable(5)
hasht.set('grapes', 10000)
hasht.set('grapes', 10000)
hasht.set('apples', 15000)
hasht.set('orange', 20000)
print(hasht.data)
print(hasht.get('grapes'))
print(hasht.get('apples'))
print(hasht.get('orange'))
print(hasht.keys())
Note: This shows the downside of hash tables. To get the keys, we have to loop through all memory space even though not all the buckets might have data. In arrays, we just have to loop through the size of of the array. But in hash tables, because of random assignment to different memory buckets, we have to check all the buckets, even when they’re not occupied. The larger the size of the hash table, the longer this loop will take.
| Arrays | Hash Tables | |
|---|---|---|
| search | ||
| lookup | ||
| insert | ||
| delete | ||
| push |
Hash table pros:
Hash table cons:
Unordered
Slow key iteration
Note: Hash tables are usually the answer to improve time complexity.
Note: In cases of time vs. space tradeoff, sometimes storing extra state in memory can help the time (runtime). Hash tables usually solves this. You use more space, but you can get a time optimization to the process.
Exercise: (Google interview question)
Given an array, return the first recurring element. For example:
[2,5,1,2,3,5,1,2,4], it should return 2.[2,1,1,2,3,5,1,2,4], it should return 1.[2,3,4,5], it should return undefined (or None in Python).//Google Question
//Given an array = [2,5,1,2,3,5,1,2,4]:
//It should return 2
//Given an array = [2,1,1,2,3,5,1,2,4]:
//It should return 1
//Given an array = [2,3,4,5]:
//It should return undefined
function firstRecurringCharacter(input) {
for (let i = 0; i < input.length; i++) {
for (let j = i + 1; j < input.length; j++) {
if(input[i] === input[j]) {
return input[i];
}
}
}
return undefined
}
function firstRecurringCharacter2(input) {
let map = {};
for (let i = 0; i < input.length; i++) {
if (map[input[i]] !== undefined) {
return input[i]
} else {
map[input[i]] = i;
}
}
return undefined
}
firstRecurringCharacter2([1,5,5,1,3,4,6])
//Bonus... What if we had this:
// [2,5,5,2,3,5,1,2,4]
// return 5 because the pairs are before 2,2
Python implementation
# Google Question: Given an array, return the first recurring element.
# Example:
# --------
# Given an array = [2,5,1,2,3,5,1,2,4]:
# It should return 2
# Given an array = [2,1,1,2,3,5,1,2,4]:
# It should return 1
# Given an array = [2,3,4,5]:
# It should return undefined
def count_recurring_character(arr):
char_count = {}
for i in arr:
if i in char_count.keys():
char_count[i] += 1
else:
char_count[i] = 1
return char_count
def find_first_second_distance(key, arr):
first_index = arr.index(key)
del arr[first_index]
second_index = arr.index(key)
return second_index - first_index
def find_first_recurring_character(arr):
char_count = count_recurring_character(arr)
key = None
min_distance = 1000
for k, v in char_count.items():
if v > 1:
index_distance = find_first_second_distance(k, arr)
if index_distance < min_distance:
min_distance = index_distance
key = k
return key
arr = [2,5,1,2,3,5,1,2,4]
print(find_first_recurring_character(arr))