diff options
author | Grail Finder <wohilas@gmail.com> | 2023-01-21 11:58:08 +0600 |
---|---|---|
committer | Grail Finder <wohilas@gmail.com> | 2023-01-21 11:58:08 +0600 |
commit | faf2369a17e4f118f5c65fa3a7c5d954068e0757 (patch) | |
tree | 72d7b6e7f179f22c3b567dcd1f61abab170c7a44 /extract_text.sh | |
parent | 3e42d4fb500811c992cd91f089525d66cd1a2fe2 (diff) |
Feat(extraction): allow to work with other dirs
Diffstat (limited to 'extract_text.sh')
-rwxr-xr-x | extract_text.sh | 19 |
1 files changed, 12 insertions, 7 deletions
diff --git a/extract_text.sh b/extract_text.sh index 7b7174c..bb22de2 100755 --- a/extract_text.sh +++ b/extract_text.sh @@ -2,12 +2,16 @@ set -e +currDir=$pwd framespath=${1:-} lang=${2:-deu} [ -z "$framespath" ] && echo "no framespath provided" && exit 1 -mkdir -p text result +parentDIR=$(dirname "$framespath") +textDIR="$parentDIR"/text +grayDIR="$parentDIR"/gray +mkdir -p ${textDIR} ${grayDIR} # split video on frames (every 90 seconds) # ffmpeg -i "$videopath" -r 0.011 frames/ffmpeg_%0d.jpg @@ -16,13 +20,14 @@ mkdir -p text result # lets say we dont know extention # ext=".jpg" + # convert each frame to grayscale then crop to text if any for img in "$framespath"/* do iname=$(basename "$img") - sub_img_name="frames/sub_${iname}" - sub_out="text/$(echo "$iname" | sed 's/.\(png\|gif\|jpg\|jpeg\|bmp\)//')" - magick convert "frames/$img" -chop 220x0 -gravity East -chop 220x0 \ + sub_img_name=$"${grayDIR}/sub_${iname}" + sub_out="$textDIR/$(echo "$iname" | sed 's/.\(png\|gif\|jpg\|jpeg\|bmp\)//')" + magick convert "$img" -chop 220x0 -gravity East -chop 220x0 \ -gravity South -chop 0x50 \ -colorspace Gray -resize 600x "$sub_img_name" # dpi=$(magick identify -format '%x' $sub_img_name) @@ -37,10 +42,10 @@ do done resultfile=$(basename "$framespath") -find text -type f | sort -n -k 1.13,1.15 | xargs cat > "../result/${resultfile}.txt" -cd ../ +find "$textDIR" -type f | sort -n | xargs cat > "${parentDIR}/${resultfile}.txt" +cd $currDir -./remove_dups.py text "result/${resultfile}_clean" +./remove_dups.py "$textDIR" "${parentDIR}/${resultfile}_clean" # rm -rf text # rm -rf frames |