2021-10-17 14:54:25 +00:00
|
|
|
from utils import AlphabetUtils as au
|
|
|
|
|
|
|
|
GERMAN_FREQUENCY_PROFILE = [
|
|
|
|
0.0651,
|
|
|
|
0.0189,
|
|
|
|
0.0306,
|
|
|
|
0.0508,
|
|
|
|
0.1740,
|
|
|
|
0.0166,
|
|
|
|
0.0301,
|
|
|
|
0.0476,
|
|
|
|
0.0755,
|
|
|
|
0.0027,
|
|
|
|
0.0121,
|
|
|
|
0.0344,
|
|
|
|
0.0253,
|
|
|
|
0.0978,
|
|
|
|
0.0251,
|
|
|
|
0.0079,
|
|
|
|
0.0002,
|
|
|
|
0.0700,
|
|
|
|
0.0727,
|
|
|
|
0.0615,
|
|
|
|
0.0435,
|
|
|
|
0.0067,
|
|
|
|
0.0189,
|
|
|
|
0.0003,
|
|
|
|
0.0004,
|
|
|
|
0.0113
|
|
|
|
]
|
2021-10-15 16:22:31 +00:00
|
|
|
|
|
|
|
|
|
|
|
def calculate_frequency(text: str, fancy_printing: bool = False):
|
|
|
|
"""
|
|
|
|
Calculates the frequency of every letter in the german alphabet for the given text
|
|
|
|
:param text: The text to calculate the letter frequency for
|
2021-10-17 14:54:25 +00:00
|
|
|
:param fancy_printing: Whether to print the frequencies to the console
|
2021-10-15 16:22:31 +00:00
|
|
|
:return: A list of frequencies, where index 0 contains the frequency of a in percent and so on.
|
|
|
|
"""
|
|
|
|
occurrence_count = [0 for i in range(26)]
|
|
|
|
|
|
|
|
for char in text:
|
|
|
|
if au.is_letter_of_alphabet(char):
|
|
|
|
char_index = au.get_index_of_letter(char)
|
|
|
|
occurrence_count[char_index] += 1
|
|
|
|
|
|
|
|
occurrence_frequency = []
|
|
|
|
|
|
|
|
for count in occurrence_count:
|
|
|
|
occurrence_frequency.append(count / len(text))
|
|
|
|
|
|
|
|
if fancy_printing:
|
|
|
|
for i in range(26):
|
|
|
|
print(f'{au.get_letter_at_index(i, True)}: {occurrence_frequency[i] * 100}%')
|
|
|
|
|
|
|
|
return occurrence_frequency
|
|
|
|
|
|
|
|
|
2021-10-17 14:54:25 +00:00
|
|
|
def transform_invalid_chars(input: str) -> str:
|
|
|
|
"""
|
|
|
|
Transforms invalid characters like german umlauts into their allowed alternatives
|
|
|
|
:param input: The text to check
|
|
|
|
:return: The improved text
|
|
|
|
"""
|
|
|
|
res = input
|
|
|
|
res = res.replace('ä', 'ae')
|
|
|
|
res = res.replace('A', 'Ae')
|
|
|
|
res = res.replace('ö', 'oe')
|
|
|
|
res = res.replace('Ö', 'Oe')
|
|
|
|
res = res.replace('ü', 'ue')
|
|
|
|
res = res.replace('Ü', 'Ue')
|
|
|
|
res = res.replace('ß', 'ss')
|
|
|
|
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
2021-10-15 16:22:31 +00:00
|
|
|
if __name__ == '__main__':
|
|
|
|
print(
|
|
|
|
calculate_frequency('Hier den Text eingeben, für den die Wahrscheinlichkeiten berechnet werden sollen', True))
|