2021-10-17 14:54:25 +00:00
|
|
|
from utils import AlphabetUtils as au
|
|
|
|
|
|
|
|
GERMAN_FREQUENCY_PROFILE = [
|
2021-10-18 12:19:29 +00:00
|
|
|
0.0651,
|
|
|
|
0.0189,
|
|
|
|
0.0306,
|
|
|
|
0.0508,
|
|
|
|
0.1740,
|
|
|
|
0.0166,
|
|
|
|
0.0301,
|
|
|
|
0.0476,
|
|
|
|
0.0755,
|
|
|
|
0.0027,
|
|
|
|
0.0121,
|
|
|
|
0.0344,
|
|
|
|
0.0253,
|
|
|
|
0.0978,
|
|
|
|
0.0251,
|
|
|
|
0.0079,
|
|
|
|
0.0002,
|
|
|
|
0.0700,
|
|
|
|
0.0727,
|
|
|
|
0.0615,
|
|
|
|
0.0435,
|
|
|
|
0.0067,
|
|
|
|
0.0189,
|
|
|
|
0.0003,
|
|
|
|
0.0004,
|
|
|
|
0.0113
|
|
|
|
]
|
2021-10-15 16:22:31 +00:00
|
|
|
|
|
|
|
|
|
|
|
def calculate_frequency(text: str, fancy_printing: bool = False):
|
2021-10-18 12:19:29 +00:00
|
|
|
"""
|
|
|
|
Calculates the frequency of every letter in the german alphabet for the given text
|
|
|
|
:param text: The text to calculate the letter frequency for
|
|
|
|
:param fancy_printing: Whether to print the frequencies to the console
|
|
|
|
:return: A list of frequencies, where index 0 contains the frequency of a in percent and so on.
|
|
|
|
"""
|
|
|
|
occurrence_count = [0 for i in range(26)]
|
2021-10-15 16:22:31 +00:00
|
|
|
|
2021-10-18 12:19:29 +00:00
|
|
|
for char in text:
|
|
|
|
if au.is_letter_of_alphabet(char):
|
|
|
|
char_index = au.get_index_of_letter(char)
|
|
|
|
occurrence_count[char_index] += 1
|
2021-10-15 16:22:31 +00:00
|
|
|
|
2021-10-18 12:19:29 +00:00
|
|
|
occurrence_frequency = []
|
2021-10-15 16:22:31 +00:00
|
|
|
|
2021-10-18 12:19:29 +00:00
|
|
|
for count in occurrence_count:
|
|
|
|
occurrence_frequency.append(count / len(text))
|
2021-10-15 16:22:31 +00:00
|
|
|
|
2021-10-18 12:19:29 +00:00
|
|
|
if fancy_printing:
|
|
|
|
for i in range(26):
|
|
|
|
print(f'{au.get_letter_at_index(i, True)}: {occurrence_frequency[i] * 100}%')
|
2021-10-15 16:22:31 +00:00
|
|
|
|
2021-10-18 12:19:29 +00:00
|
|
|
return occurrence_frequency
|
2021-10-15 16:22:31 +00:00
|
|
|
|
|
|
|
|
2021-10-17 14:54:25 +00:00
|
|
|
def transform_invalid_chars(input: str) -> str:
|
2021-10-18 12:19:29 +00:00
|
|
|
"""
|
|
|
|
Transforms invalid characters like german umlauts into their allowed alternatives
|
|
|
|
:param input: The text to check
|
|
|
|
:return: The improved text
|
|
|
|
"""
|
|
|
|
res = input
|
|
|
|
res = res.replace('ä', 'ae')
|
|
|
|
res = res.replace('A', 'Ae')
|
|
|
|
res = res.replace('ö', 'oe')
|
|
|
|
res = res.replace('Ö', 'Oe')
|
|
|
|
res = res.replace('ü', 'ue')
|
|
|
|
res = res.replace('Ü', 'Ue')
|
|
|
|
res = res.replace('ß', 'ss')
|
|
|
|
|
|
|
|
return res
|
|
|
|
|
|
|
|
|
|
|
|
def shift_char(char, incrementation: int = 1):
|
|
|
|
"""
|
|
|
|
This method shifts one char by the value of incrementation
|
|
|
|
|
|
|
|
:param char: Char to be shifted
|
|
|
|
:param incrementation: How much the char should be shifted
|
|
|
|
:return: Shifted letter
|
|
|
|
"""
|
|
|
|
# converting character to byte
|
|
|
|
char_in_bytes = bytes(char, 'utf-8')[0]
|
|
|
|
if char_in_bytes + incrementation >= 91 and char_in_bytes < 91 \
|
|
|
|
or char_in_bytes + incrementation >= 123: # z -> 122 | 90 -> Z so go backwards
|
|
|
|
new_char_in_bytes = bytes([char_in_bytes - (26 - incrementation)])
|
|
|
|
else:
|
|
|
|
new_char_in_bytes = bytes([char_in_bytes + incrementation])
|
|
|
|
|
|
|
|
return str(new_char_in_bytes)[2]
|
2021-10-17 14:54:25 +00:00
|
|
|
|
|
|
|
|
2021-10-15 16:22:31 +00:00
|
|
|
if __name__ == '__main__':
|
2021-10-18 12:19:29 +00:00
|
|
|
print(
|
|
|
|
calculate_frequency('Hier den Text eingeben, für den die Wahrscheinlichkeiten berechnet werden sollen', True))
|