Code:
from typing import List, Set, Dict, TypeVar
import math

# Problem 1 a)
def compute_frequency_map(words: List[str]) -> Dict[str, float]:
    counts: Dict[str, int] = {}
    for word in words:
        if word in counts:
            counts[word] += 1
        else:
            counts[word] = 1
    size = len(words)
    freqs: Dict[str, float] = {}
    for word, count in counts.items():
        freqs[word] = count / size
    return freqs
   

# Problem 1 b)
def compute_entropy(probs: Dict[str, float]) -> float:
    sumparts = 0.0
    for p_i in probs.values():
        sumparts += p_i * math.log2(p_i)
    return -sumparts
   

# dummy functions just to make the code run fo real
def build_huffman_code(probs: Dict[str, float]) -> Dict[str, str]:
    return {"un": "1", "chat": "01", "est": "00"}
def encode_words(words: List[str], code: Dict[str, str]) -> str:
    return "".join(map(lambda w: code[w], words))
S = TypeVar('S')
T = TypeVar('T')
def inverse_dict(dict: Dict[S, T]) -> Dict[T, S]:
    idict: Dict[T, S] = {}
    for k, v in dict.items():
        idict[v] = k
    return idict


# Problem 1 c)
def decode_string(bin_string: str, inverse_code: Dict[str, str]) -> List[str]:
    words = []
    start = 0
    for end in range(1, len(bin_string) + 1):
        potential_code_word = bin_string[start:end]
        if potential_code_word in inverse_code:
            words.append(inverse_code[potential_code_word])
            start = end
    return words


words: List[str] = ["un", "chat", "est", "un", "chat"]
print(f"words = {words}")
probs: Dict[str, float] = compute_frequency_map(words)
print(f"probs = {probs}")
entropy: float = compute_entropy(probs)
print(f"entropy = {entropy}")
code: Dict[str, str] = build_huffman_code(probs)
print(f"code = {code}")
message: str = encode_words(words, code)
print(f"message = {repr(message)}")
inverse_code: Dict[str, str] = inverse_dict(code)
print(f"inverse_code = {inverse_code}")
decoded_words: List[str] = decode_string(message, inverse_code)
print(f"decoded_words = {decoded_words}")


# Problem 2 a)
def hamming_distance1(c1: str, c2: str) -> int:
    n: int = 0
    for i in range(len(c1)):
        if c1[i] != c2[i]:
            n += 1
    return n      

# Problem 2 a) - variant
def hamming_distance2(c1: str, c2: str) -> int:
    return sum(b1 != b2 for b1, b2 in zip(c1, c2))

# Problem 2 a) - test
for hamming_distance in (hamming_distance1, hamming_distance2):
    print(hamming_distance("111", "010"))


# Problem 2 b)
def min_distance1(code_words: List[str]) -> int:
    n = len(code_words)
    min_dist = float('inf') # any large int or float is OK
    for i in range(n - 1):
        c1 = code_words[i]
        for j in range(i + 1, n):
            dist = hamming_distance1(c1, code_words[j])
            if dist < min_dist:
                min_dist = dist
    return int(min_dist) # no need to cast, this just for mypy

# Problem 2 b) - variant
def min_distance2(code_words: List[str]) -> int:
    n = len(code_words)
    return min(hamming_distance(code_words[i], code_words[j]) for i in range(n - 1) for j in range(i + 1, n))

# Problem 2 b) - test
code_words = ["000000", "000111", "111000", "111111"]
for min_distance in (min_distance1, min_distance2):
    print(min_distance(code_words))


# Problem 2 c)
def closest_code_word1(code_words: Set[str], c: str) -> str:
    min_dist = float('inf') # any large int or float is OK
    for word in code_words:
        dist = hamming_distance1(word, c)
        if dist < min_dist:
            min_dist = dist
            closest_word = word
    return closest_word

# Problem 2 c) - variant
def closest_code_word2(code_words: Set[str], c: str) -> str:
    return min(code_words, key=lambda c_i: hamming_distance(c_i, c))

# Problem 2 c) - test
for closest_code_word in (closest_code_word1, closest_code_word2):
    print(closest_code_word(set(code_words), "101111"))
Last modified: Wednesday, 14 December 2022, 12:58