#!/usr/bin/env python import sys import enchant textfile = sys.argv[1] # use example: ./quality_check.py text/ffmpeg_2.txt non_words = "\n ,.;!?'-" ge_dict = enchant.Dict('de_DE') def word_ratio(words): sane = [ge_dict.check(w) for w in words if w not in non_words] if len(sane) == 0: return 0, 0 return sum(sane) / len(sane), len(sane) def read_into_list(filename): with open(filename, "r") as lf: return lf.read().split() if __name__ == "__main__": words = read_into_list(textfile) ratio, length = word_ratio(words) print(f"{ratio}:{length}:{textfile}")