Adding util method to replace invalid input characters

- Also better type hints
This commit is contained in:
2021-10-17 16:54:25 +02:00
parent d4bd9638a4
commit 2bd8dbc6db
4 changed files with 74 additions and 11 deletions
+49 -1
View File
@@ -1,10 +1,40 @@
import AlphabetUtils as au
from utils import AlphabetUtils as au
GERMAN_FREQUENCY_PROFILE = [
0.0651,
0.0189,
0.0306,
0.0508,
0.1740,
0.0166,
0.0301,
0.0476,
0.0755,
0.0027,
0.0121,
0.0344,
0.0253,
0.0978,
0.0251,
0.0079,
0.0002,
0.0700,
0.0727,
0.0615,
0.0435,
0.0067,
0.0189,
0.0003,
0.0004,
0.0113
]
def calculate_frequency(text: str, fancy_printing: bool = False):
"""
Calculates the frequency of every letter in the german alphabet for the given text
:param text: The text to calculate the letter frequency for
:param fancy_printing: Whether to print the frequencies to the console
:return: A list of frequencies, where index 0 contains the frequency of a in percent and so on.
"""
occurrence_count = [0 for i in range(26)]
@@ -26,6 +56,24 @@ def calculate_frequency(text: str, fancy_printing: bool = False):
return occurrence_frequency
def transform_invalid_chars(input: str) -> str:
"""
Transforms invalid characters like german umlauts into their allowed alternatives
:param input: The text to check
:return: The improved text
"""
res = input
res = res.replace('ä', 'ae')
res = res.replace('A', 'Ae')
res = res.replace('ö', 'oe')
res = res.replace('Ö', 'Oe')
res = res.replace('ü', 'ue')
res = res.replace('Ü', 'Ue')
res = res.replace('ß', 'ss')
return res
if __name__ == '__main__':
print(
calculate_frequency('Hier den Text eingeben, für den die Wahrscheinlichkeiten berechnet werden sollen', True))