import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl
import pandas as pd
mpl.rcParams["font.family"] = "Segoe UI Emoji"
text1 = """
π§π§― π€π₯π¦ π€π€«π₯πΌπ€« π€π€ π¦π¦π₯Ύ
π€―π€―π§ππΌ π₯π₯¦π€π€π¦π π§π§π§π¦π§― π¦ππ¦π¦π€―π¦Ί π₯Ύπ₯Ύπ₯π₯π₯
π€π§π₯¦ π₯¦πΌπ¦π₯Ύπ¦Ίπ€ π§―π¦π¦π€Άπ¦Ίπ¦π₯·π§― ππ¦π¦π€
π§ππ¦Ίπ¦π€«π₯π§― π¦Ίπ π¦πΌπ₯¦ππ₯ π€―π¦π¦π¦Ίπ¦Ί πΌπ€π₯Ύπ€―π₯Ύπ€Άπ₯¦π¦
π¦ππ€π¦ΊπΌπ€Άπ€ π§π¦ππ€―π§π€π€ π§―π€ ππ¦π₯π§π¦π€Άπ¦Ίπ¦ π€Άπ€Άπ€«π€π¦Ί
π€π₯ππ₯π€π€ π₯¦π¦Ίπ¦π€π₯Ύπ₯· π€―π¦π¦Ίπ π§―π¦π¦ π€π§―π₯ π¦π€π§π§π€π€―π§
π€«π¦ π₯¦π€Άπ₯π€π₯ π₯¦π¦π₯¦ πΌπ€π¦ππ₯ π§π¦π€―π€«π¦π₯Ύπ¦
ππΌ π§ππ§π¦π€π¦π¦Ίπ₯· π€―π¦π€Άπ§π§π§π₯ π₯·π€Άπ¦π¦ππ§ πΌππ€«π§"""
text2 = """
π¦Ίπ€Άπ¦Ίπ¦ π¦π§― π§π₯π€― π€Άπ₯π€π₯·π
π§π¦π₯Ύ ππ€― π€«π§π€π¦Ίπ€π§― π¦Ίπ€π¦π€«πΌ
π€Άππ¦Ίπ₯Ύ π§π€Άπ₯·π§π§π€«π§ π€π₯Ύπ€«π€ππ¦π¦ π€π₯¦π₯·π§π₯
ππ§―π¦π¦ π§π¦π€π§π€« πΌπ€π¦ ππ¦π€―
π₯·π€―π€π§―π§ π§π€Άπ§π§π¦ π§π€Άπ§―π₯π€―π π₯¦π₯Ύπ€Άπ€«π πππ€―πΌπ€Άπ§π₯·π¦
π§πΌπ€ π₯π€π₯Ύ π₯Ύππ€« π§ππ§π€π¦Ίπ₯π€―π€ π¦π§―π₯· π₯Ύπ€π₯Ύ
π₯¦π§π¦ππ§π ππ€πΌπ€―π¦π₯Ύπ₯· ππ₯π€―π¦π§π§π§ π¦π§ πππ¦π§ π₯Ύπ₯Ύπ€Άπ€―
πππ§π€―π§ π€π¦Ίπ€―π§ π¦π€Άπ₯Ύπ₯π₯·π¦π¦Ί π¦πΌπ¦ π¦π€ΆπΌπ€« π¦π¦πΌπ€―π€―"""
text3 = """
π§―π§π¦π€«π₯¦π€π¦ ππ₯π€π₯·π§―π€«π€« π₯·π§―π¦Ίπ¦Ί π§π₯π₯Ύπ¦Ίπ§
π€π¦π§π₯π€π₯Ύπ€Άπ₯ π₯Ύπ€« πΌπ₯π¦Ί π€Άπ¦Ίπ¦ π€«π¦Ί π₯¦πΌπ§―π€«π¦Ίπ€Ά
π€π₯Ύπ¦Ίπ€ π€―π₯¦π€«π¦π€―π€π€π€Ά π§π₯¦π§ π¦π§―π₯Ύπ§π§― π₯π€ΆπΌ
πππ₯Ύπ§π§ π¦π₯π§πΌπ₯π€«π€ π€π€π€Άπ¦π₯Ύ π€«πΌπ€«π₯Ύπ¦π§ π€π¦π¦Ίπ§πΌ π₯Ύπ€π₯π§π€«π₯·π¦π€«
π₯π§π§π€«π₯π¦π€π₯ π§π₯π₯Ύ πΌπΌπ€Άπ€π¦ π€π¦Ίπ€π¦π§π€―π₯ π§π¦π€―π₯Ύ
π¦π₯ πΌπ₯¦π¦π§π₯π§―π₯ π€πΌ πΌπ€«
π₯Ύπ¦π₯· π¦π€ π¦π₯π€―π€«π₯π§ π€π€«ππ¦Ίπ₯ π₯Ύπ₯¦π₯π¦π§ π₯πΌππ€«π€―
π₯¦π€Άπ§π€ππ¦π§π§― π€π€«π§π₯¦π¦Ίπ€ π₯¦π€Άπ₯π¦ π¦Ίπ§π¦π₯¦π§―π¦Ί"""
text4 = """
ππ§π₯¦π¦π¦π₯¦ π€Άπ§―π₯π§―π€ π§―π¦Ίπ§―ππ§π§ π¦π§π₯π€«π€Ά π€Άπ§π¦Ίπ¦ππ€«π§π₯· ππ€π₯π€―π€π€Άπ€Άπ₯Ύ
π¦πΌπ¦π¦ππ§π€ ππ€π€―π§π€π¦π¦πΌ π₯π₯·π₯π€π€π€ π¦π₯π₯πΌπ¦π€ πΌπ€ π¦Ίπ¦π₯·ππ₯¦π€―
π€π¦ π§π€π₯Ύπ€―π₯¦ π₯¦πππ₯π₯·π€π₯Ύ π₯π€Ά π¦Ίπ€«π₯·ππ¦Ίπ€ π₯¦ππ§π§―π€π₯π€Άπ₯Ύ
ππ€π¦Ίππ₯Ύπ€π€―π€Ά π₯¦π¦π₯¦π€π€«π€―π₯π€ π€π¦π₯Ύπ€π€Άπ§― πΌπ€―π π₯¦π€π§
ππ€π₯π₯π€«π§πΌ π€―π¦π₯π€π₯ π₯π¦ππΌπ π¦π¦π₯¦π€π¦
π€―ππ₯¦π¦πΌππ€Άπ€ πΌπ€―π€―ππ₯ΎπΌπ₯· π€―π₯π¦πΌ π₯·ππ€π₯ π¦Ίπ¦Ίπ¦Ίπ₯π¦π§π€«π€
π₯·π§π₯¦π π§π€ππ¦π€―π€―ππΌ π§―π§π₯¦ ππ€― π¦π€«π₯π€π€«π¦ πΌπ§π¦Ίπππ¦Ίπ€«π₯¦
π§π₯¦π¦πΌπππ¦ π€π₯Ύ π₯π₯¦π§π€―π€Ά π¦π₯·π€π₯ππ€Ά π¦π€―πΌπΌ"""
text5 = """
π₯π¦π₯Ύπ¦π§π§―π§π€― π§ππ¦π₯π¦π₯Ύπ€― ππ€Άπ§π₯π¦π€ π§π¦π€― π¦π¦π€Άπ¦π¦π₯ π₯·π¦π§
π§―ππ€« πΌπ₯π§π₯ΎπΌππ€Ά π§π€«π§π₯Ύπ€« π¦π¦π¦π€Άπ₯πΌ
π§π¦π€―π₯Ύ π₯π¦π₯π€πΌ ππ¦ π¦π§π π€πΌ
ππ₯Ύπ€Άππ₯Ύ π€π¦ π₯ΎπΌ π₯¦π¦π₯π¦π¦
π€Άπ¦π¦ π§π§― π€«π€πππ₯¦ π₯¦π§π₯π¦πΌπ€π€Ά
π€«π₯π¦π€π₯·π§ π₯π§ π€Άπ§π π₯·π₯¦
π§―π¦π§ πΌπ€―π₯π€ π§―πΌπ§―π¦ππ₯¦ π₯Ύππ¦π€«π¦Ίπ€π§― π§ππ§π§πΌπ€― π§ππ¦Ίπ¦Ίππ¦Ί
π§―πΌπ€―π§π₯π€π§π¦ π§―π€― ππ€πΌπ₯Ύπ€π€« ππ€«"""
sequence = "πππ₯¦π¦π¦Ίπ₯π€π§π₯¦π¦π¦πΌπ₯Ύπ₯¦π€π₯π¦Ίπ€Άπ¦ππ¦π₯·π₯·π₯ππ₯π¦π¦Ίπ€Άπ€Άπ₯Ύπ₯ΎπΌπ₯ππ€«ππ¦π₯·π¦π€―π€―π¦π€π€―ππ₯·π¦π₯Ύπ¦"
[Doku]def clean_text(text):
"""
**TODO**:
Clean the text by removing all white spaces and new line character (\\\\n)
:param text: The text to clean
:return: The same text witout white spaces and new line characters
"""
pass
[Doku]def character_propabilities(text, all_chars):
"""
**TODO**:
Given a text, calculate the empirical observation propability of
all characters from the "all_chars" list.
The observation propability for character c
is given as the number of occurrences of that character divided by the total
number of characters in the string.
:param text: The text for which character observation propabilities are to be calculated
:param all_chars: A set of unique characters. The propability for each such character is to be calcualted.
:return: A dictionary mapping all characters within the all_chars parameter to its respective observation propability.
"""
pass
[Doku]def get_emmision_propabilities(all_texts):
"""
**TODO**:
Return the emmision propabilities for each character in all the sets.
This is essentially a list of dictionaries provided by :py:meth:`forward.character_propabilities`
* Join all the texts together and clean the result (call :py:meth:`clean_text`).
* Convert the joined string into a set to retrieve the unique characters (call `set <https://www.w3schools.com/python/python_sets.asp>`_)
* Return a list of emmision propabilities dictionaries for all the texts (call :py:meth:`forward.character_propabilities`)
:param all_texts: A list of texts
:return: A list of dictionaries with emmision propabilities for each text
"""
# TODO: Join all texts and clean them
# TODO: Get a unique list of all characters across all five texts
# TODO: Now get the character emmision propabilities for each text
pass
[Doku]def get_initial_alpha():
"""
**TODO**:
Return the initial alpha vector for the forward algorithm.
Hint: In the beginning, all states are equally likely
:return: np.array of shape 5x1 with the initial (equally likely) alpha values.
"""
# In the begining, we donΒ΄t know which text our colleague choose
# to start with, so all texts are equally likely
pass
[Doku]def get_state_transition_matrix():
"""
**TODO**:
Return the state transition matrix for the forward algorithm.
Hint: With 90% chance the state stays the same while the remaining 10% shall be equally divided between the four other states.
:return: np.array of shape 5x5 with the correct state transition propabilities
"""
pass
[Doku]def forward(alpha, character, state_transition_matrix, emmision_propabilities):
"""
**TODO**: Implement one step of the forward algorithm.
* Given the past alpha-values and the newly read character, use the state_transition_matrix
to first predict the new state propabilities (new alpha values) according to the script.
* Then multiply the state propabilities with the emmision propabilities of the observed character
for each alphabet to retrieve the new alpha values.
* Normalize the alpha vector after each step by diving by its sum. This helps to achieve numerically more stable results
and allows for better interpretation of the results.
:param alpha: np.array of shape (5,1) holding the past alpha values
:param character: Observed character in this step
:param state_transition_matrix: np.array of shape (5,5) holding the state transition propabilities
:param emmision_propabilities: List of dictionaries holding the character emmision propabilities for each alphabet.
:return: New alpha-vector after state transition and observation update (np.array of shape 5,1)
"""
# TODO: Implement state transition and update the alpha vector accordingly
# TODO: Retrieve symbol emmision propabilties for the given character and update the alpha vector
# TODO: Normalize alpha for better visualization (divide by sum)
# TODO: Return alpha
pass
if __name__ == "__main__":
# Get initial alpha values
alpha = get_initial_alpha()
# Estimate the emmision propabilities for the five texts
emmision_propabilities = get_emmision_propabilities(
[text1, text2, text3, text4, text5]
)
# Build the state transition matrix
state_transition_matrix = get_state_transition_matrix()
alpha_matrix = np.zeros((50, 5)) # shape: (T, num_states)
# Clean the sequence
sequence = clean_text(sequence)
# Iterate over whole sequence
for t, character in enumerate(sequence):
# Run forward algorithm
alpha = forward(
alpha, character, state_transition_matrix, emmision_propabilities
)
# Store current alpha for later
alpha_matrix[t, :] = alpha
# Visualize alpha vectors as heat map
states = ["Text 1", "Text 2", "Text 3", "Text 4", "Text 5"]
df = pd.DataFrame(
alpha_matrix.T, index=states, columns=[f"t{t+1}" for t in range(len(sequence))]
)
plt.ioff()
plt.figure(figsize=(10, 4))
sns.heatmap(df, annot=False, cmap="YlGnBu", cbar=True)
plt.title("Alpha-Werte pro Zustand ΓΌber die Zeit")
plt.xlabel("Zeit (Position in Sequenz)")
plt.ylabel("Zustand")
plt.tight_layout()
plt.show()