Code:
from typing import List, Set, Dict, TypeVar
import math

# Problem 1 a)
def compute_frequency_map(words: List[str]) -> Dict[str, float]:
counts: Dict[str, int] = {}
for word in words:
if word in counts:
counts[word] += 1
else:
counts[word] = 1
size = len(words)
freqs: Dict[str, float] = {}
for word, count in counts.items():
freqs[word] = count / size
return freqs

# Problem 1 b)
def compute_entropy(probs: Dict[str, float]) -> float:
sumparts = 0.0
for p_i in probs.values():
sumparts += p_i * math.log2(p_i)
return -sumparts

# dummy functions just to make the code run fo real
def build_huffman_code(probs: Dict[str, float]) -> Dict[str, str]:
return {"un": "1", "chat": "01", "est": "00"}
def encode_words(words: List[str], code: Dict[str, str]) -> str:
return "".join(map(lambda w: code[w], words))
S = TypeVar('S')
T = TypeVar('T')
def inverse_dict(dict: Dict[S, T]) -> Dict[T, S]:
idict: Dict[T, S] = {}
for k, v in dict.items():
idict[v] = k
return idict

# Problem 1 c)
def decode_string(bin_string: str, inverse_code: Dict[str, str]) -> List[str]:
words = []
start = 0
for end in range(1, len(bin_string) + 1):
potential_code_word = bin_string[start:end]
if potential_code_word in inverse_code:
words.append(inverse_code[potential_code_word])
start = end
return words

words: List[str] = ["un", "chat", "est", "un", "chat"]
print(f"words = {words}")
probs: Dict[str, float] = compute_frequency_map(words)
print(f"probs = {probs}")
entropy: float = compute_entropy(probs)
print(f"entropy = {entropy}")
code: Dict[str, str] = build_huffman_code(probs)
print(f"code = {code}")
message: str = encode_words(words, code)
print(f"message = {repr(message)}")
inverse_code: Dict[str, str] = inverse_dict(code)
print(f"inverse_code = {inverse_code}")
decoded_words: List[str] = decode_string(message, inverse_code)
print(f"decoded_words = {decoded_words}")

# Problem 2 a)
def hamming_distance1(c1: str, c2: str) -> int:
n: int = 0
for i in range(len(c1)):
if c1[i] != c2[i]:
n += 1
return n

# Problem 2 a) - variant
def hamming_distance2(c1: str, c2: str) -> int:
return sum(b1 != b2 for b1, b2 in zip(c1, c2))

# Problem 2 a) - test
for hamming_distance in (hamming_distance1, hamming_distance2):
print(hamming_distance("111", "010"))

# Problem 2 b)
def min_distance1(code_words: List[str]) -> int:
n = len(code_words)
min_dist = float('inf') # any large int or float is OK
for i in range(n - 1):
c1 = code_words[i]
for j in range(i + 1, n):
dist = hamming_distance1(c1, code_words[j])
if dist < min_dist:
min_dist = dist
return int(min_dist) # no need to cast, this just for mypy

# Problem 2 b) - variant
def min_distance2(code_words: List[str]) -> int:
n = len(code_words)
return min(hamming_distance(code_words[i], code_words[j]) for i in range(n - 1) for j in range(i + 1, n))

# Problem 2 b) - test
code_words = ["000000", "000111", "111000", "111111"]
for min_distance in (min_distance1, min_distance2):
print(min_distance(code_words))

# Problem 2 c)
def closest_code_word1(code_words: Set[str], c: str) -> str:
min_dist = float('inf') # any large int or float is OK
for word in code_words:
dist = hamming_distance1(word, c)
if dist < min_dist:
min_dist = dist
closest_word = word
return closest_word

# Problem 2 c) - variant
def closest_code_word2(code_words: Set[str], c: str) -> str:
return min(code_words, key=lambda c_i: hamming_distance(c_i, c))

# Problem 2 c) - test
for closest_code_word in (closest_code_word1, closest_code_word2):
print(closest_code_word(set(code_words), "101111"))
Last modified: Wednesday, 14 December 2022, 12:58