diff options
Diffstat (limited to 'quality_check.py')
-rwxr-xr-x | quality_check.py | 26 |
1 files changed, 26 insertions, 0 deletions
diff --git a/quality_check.py b/quality_check.py new file mode 100755 index 0000000..6b55432 --- /dev/null +++ b/quality_check.py @@ -0,0 +1,26 @@ +#!/usr/bin/env python + +import sys +import enchant + +textfile = sys.argv[1] + +# use example: ./quality_check.py text/ffmpeg_2.txt + +non_words = "\n,.;!?'" +ge_dict = enchant.Dict('de_DE') + +def word_ratio(words): + sane = [ge_dict.check(w) for w in words if w not in non_words] + if len(sane) == 0: + return 0, 0 + return sum(sane) / len(sane), len(sane) + +def read_into_list(filename): + with open(filename, "r") as lf: + return lf.read().split() + +if __name__ == "__main__": + words = read_into_list(textfile) + ratio, length = word_ratio(words) + print(f"{ratio}:{length}:{textfile}") |