Adding util method to replace invalid input characters
- Also better type hints
This commit is contained in:
+49
-1
@@ -1,10 +1,40 @@
|
||||
import AlphabetUtils as au
|
||||
from utils import AlphabetUtils as au
|
||||
|
||||
GERMAN_FREQUENCY_PROFILE = [
|
||||
0.0651,
|
||||
0.0189,
|
||||
0.0306,
|
||||
0.0508,
|
||||
0.1740,
|
||||
0.0166,
|
||||
0.0301,
|
||||
0.0476,
|
||||
0.0755,
|
||||
0.0027,
|
||||
0.0121,
|
||||
0.0344,
|
||||
0.0253,
|
||||
0.0978,
|
||||
0.0251,
|
||||
0.0079,
|
||||
0.0002,
|
||||
0.0700,
|
||||
0.0727,
|
||||
0.0615,
|
||||
0.0435,
|
||||
0.0067,
|
||||
0.0189,
|
||||
0.0003,
|
||||
0.0004,
|
||||
0.0113
|
||||
]
|
||||
|
||||
|
||||
def calculate_frequency(text: str, fancy_printing: bool = False):
|
||||
"""
|
||||
Calculates the frequency of every letter in the german alphabet for the given text
|
||||
:param text: The text to calculate the letter frequency for
|
||||
:param fancy_printing: Whether to print the frequencies to the console
|
||||
:return: A list of frequencies, where index 0 contains the frequency of a in percent and so on.
|
||||
"""
|
||||
occurrence_count = [0 for i in range(26)]
|
||||
@@ -26,6 +56,24 @@ def calculate_frequency(text: str, fancy_printing: bool = False):
|
||||
return occurrence_frequency
|
||||
|
||||
|
||||
def transform_invalid_chars(input: str) -> str:
|
||||
"""
|
||||
Transforms invalid characters like german umlauts into their allowed alternatives
|
||||
:param input: The text to check
|
||||
:return: The improved text
|
||||
"""
|
||||
res = input
|
||||
res = res.replace('ä', 'ae')
|
||||
res = res.replace('A', 'Ae')
|
||||
res = res.replace('ö', 'oe')
|
||||
res = res.replace('Ö', 'Oe')
|
||||
res = res.replace('ü', 'ue')
|
||||
res = res.replace('Ü', 'Ue')
|
||||
res = res.replace('ß', 'ss')
|
||||
|
||||
return res
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print(
|
||||
calculate_frequency('Hier den Text eingeben, für den die Wahrscheinlichkeiten berechnet werden sollen', True))
|
||||
|
||||
Reference in New Issue
Block a user