Code:
import math
from typing import TypeVar


# Problem 1 a)
def compute_frequency_map(words: list[str]) -> dict[str, float]:
    counts: dict[str, int] = {}
    for word in words:
        if word in counts:
            counts[word] += 1
        else:
            counts[word] = 1
    size = len(words)
    freqs: dict[str, float] = {}
    for word, count in counts.items():
        freqs[word] = count / size
    return freqs


# Problem 1 b)
def compute_entropy(probs: dict[str, float]) -> float:
    sumparts = 0.0
    for p_i in probs.values():
        sumparts += p_i * math.log2(p_i)
    return -sumparts


# dummy functions just to make the code run fo real
def build_huffman_code(probs: dict[str, float]) -> dict[str, str]:
    return {"un": "1", "chat": "01", "est": "00"}


def encode_words(words: list[str], code: dict[str, str]) -> str:
    return "".join(map(lambda w: code[w], words))


S = TypeVar("S")
T = TypeVar("T")


def inverse_dict(mydict: dict[S, T]) -> dict[T, S]:
    idict: dict[T, S] = {}
    for k, v in mydict.items():
        idict[v] = k
    return idict


# Problem 1 c)
def decode_string(bin_string: str, inverse_code: dict[str, str]) -> list[str]:
    words = []
    start = 0
    for end in range(1, len(bin_string) + 1):
        potential_code_word = bin_string[start:end]
        if potential_code_word in inverse_code:
            words.append(inverse_code[potential_code_word])
            start = end
    return words


words: list[str] = ["un", "chat", "est", "un", "chat"]
print(f"words = {words}")
probs: dict[str, float] = compute_frequency_map(words)
print(f"probs = {probs}")
entropy: float = compute_entropy(probs)
print(f"entropy = {entropy}")
code: dict[str, str] = build_huffman_code(probs)
print(f"code = {code}")
message: str = encode_words(words, code)
print(f"message = {repr(message)}")
inverse_code: dict[str, str] = inverse_dict(code)
print(f"inverse_code = {inverse_code}")
decoded_words: list[str] = decode_string(message, inverse_code)
print(f"decoded_words = {decoded_words}")


# Problem 2 a)
def hamming_distance1(c1: str, c2: str) -> int:
    n: int = 0
    for i in range(len(c1)):
        if c1[i] != c2[i]:
            n += 1
    return n


# Problem 2 a) - variant
def hamming_distance2(c1: str, c2: str) -> int:
    return sum(b1 != b2 for b1, b2 in zip(c1, c2))


# Problem 2 a) - test
for hamming_distance in (hamming_distance1, hamming_distance2):
    print(hamming_distance("111", "010"))


# Problem 2 b)
def min_distance1(code_words: list[str]) -> int:
    n = len(code_words)
    min_dist = float("inf")  # any large int or float is OK
    for i in range(n - 1):
        c1 = code_words[i]
        for j in range(i + 1, n):
            dist = hamming_distance1(c1, code_words[j])
            if dist < min_dist:
                min_dist = dist
    return int(min_dist)  # no need to cast, this just for mypy


# Problem 2 b) - variant
def min_distance2(code_words: list[str]) -> int:
    n = len(code_words)
    return min(hamming_distance(code_words[i], code_words[j]) for i in range(n - 1) for j in range(i + 1, n))


# Problem 2 b) - test
code_words = ["000000", "000111", "111000", "111111"]
for min_distance in (min_distance1, min_distance2):
    print(min_distance(code_words))


# Problem 2 c)
def closest_code_word1(code_words: set[str], c: str) -> str:
    min_dist = float("inf")  # any large int or float is OK
    for word in code_words:
        dist = hamming_distance1(word, c)
        if dist < min_dist:
            min_dist = dist
            closest_word = word
    return closest_word


# Problem 2 c) - variant
def closest_code_word2(code_words: set[str], c: str) -> str:
    return min(code_words, key=lambda c_i: hamming_distance(c_i, c))


# Problem 2 c) - test
for closest_code_word in (closest_code_word1, closest_code_word2):
    print(closest_code_word(set(code_words), "101111"))
Last modified: Saturday, 11 May 2024, 13:11