summaryrefslogtreecommitdiff
path: root/quality_check.py
diff options
context:
space:
mode:
Diffstat (limited to 'quality_check.py')
-rwxr-xr-xquality_check.py26
1 files changed, 26 insertions, 0 deletions
diff --git a/quality_check.py b/quality_check.py
new file mode 100755
index 0000000..6b55432
--- /dev/null
+++ b/quality_check.py
@@ -0,0 +1,26 @@
+#!/usr/bin/env python
+
+import sys
+import enchant
+
+textfile = sys.argv[1]
+
+# use example: ./quality_check.py text/ffmpeg_2.txt
+
+non_words = "\n ,.;!?'"
+ge_dict = enchant.Dict('de_DE')
+
+def word_ratio(words):
+ sane = [ge_dict.check(w) for w in words if w not in non_words]
+ if len(sane) == 0:
+ return 0, 0
+ return sum(sane) / len(sane), len(sane)
+
+def read_into_list(filename):
+ with open(filename, "r") as lf:
+ return lf.read().split()
+
+if __name__ == "__main__":
+ words = read_into_list(textfile)
+ ratio, length = word_ratio(words)
+ print(f"{ratio}:{length}:{textfile}")