From 3f99abb502d373cb4b520b212afd4be9ed4da07f Mon Sep 17 00:00:00 2001 From: Grail Finder Date: Sun, 26 Feb 2023 14:48:15 +0600 Subject: Feat: use md5 hash to save on space with shorter names --- ffmpeg.go | 14 ++++++++++++-- main.go | 37 +++++++++++++++++++++++-------------- main_test.go | 26 ++++++++++++++++++++++++++ workers.go | 3 ++- 4 files changed, 63 insertions(+), 17 deletions(-) diff --git a/ffmpeg.go b/ffmpeg.go index 7c5b6fd..4dead58 100644 --- a/ffmpeg.go +++ b/ffmpeg.go @@ -6,6 +6,16 @@ import ( ffmpeg "github.com/u2takey/ffmpeg-go" ) +func buildFFmpegCall(ut *Utterance) string { + return fmt.Sprintf( + `yes no | ffmpeg -i "%s" -ss %s -to %s \ + -metadata text_source="%s" \ + -ar 22050 "%s"`, + ut.FD.AudioPath, ut.LeftTime, ut.RightTime, + ut.FD.VttPath, ut.OutPath, + ) +} + func cutoutClipAndTranscode(ut *Utterance) error { err := ffmpeg.Input(ut.FD.AudioPath, ffmpeg.KwArgs{ @@ -19,9 +29,9 @@ func cutoutClipAndTranscode(ut *Utterance) error { return err } -func cutOnEqualParts(filepath, segment string) error { +func cutOnEqualParts(filepath, outname, segment string) error { err := ffmpeg.Input(filepath). - Output(filepath+"%03d.opus", + Output(outname+"%02d.opus", ffmpeg.KwArgs{ "c": "copy", "map": 0, diff --git a/main.go b/main.go index fe7ac06..4cfa197 100644 --- a/main.go +++ b/main.go @@ -1,6 +1,7 @@ package main import ( + "crypto/md5" "flag" "fmt" "os" @@ -12,7 +13,6 @@ import ( const ( subExt = ".vtt" - outdir = "/mnt/nvme/data" ffCmdOut = "data/ff_commands" timeSep = "-->" metadataPath = "data/metadata.json" @@ -20,6 +20,10 @@ const ( segmentSize = "00:08:00" ) +var ( + outdir = "/mnt/nvme/data" +) + type Utterance struct { LeftTime string RightTime string @@ -59,6 +63,16 @@ func mapToCSV(req map[string]string) [][]string { return resp } +func hashStr(req string) string { + return fmt.Sprintf("%x", md5.Sum([]byte(req))) +} + +func fpathToOutname(req string) string { + basename := filepath.Base(req) + h := hashStr(basename) + return path.Join(outdir, h+".opus") +} + func linesToUtterances(lines []string, fd *FileData) []*Utterance { resp := []*Utterance{} for i, line := range lines { @@ -107,16 +121,6 @@ func adequateTimes(left, right string) bool { return false } -func buildFFmpegCall(ut *Utterance) string { - return fmt.Sprintf( - `yes no | ffmpeg -i "%s" -ss %s -to %s \ - -metadata text_source="%s" \ - -ar 22050 "%s"`, - ut.FD.AudioPath, ut.LeftTime, ut.RightTime, - ut.FD.VttPath, ut.OutPath, - ) -} - func utterancesToFileTextMap(utterances []*Utterance) map[string]string { resp := make(map[string]string) for _, ut := range utterances { @@ -163,7 +167,7 @@ func getFileList(dirpath string, filter string) []string { return resp } -func equalSlice(dirpath string) { +func equalSliceRun(dirpath string) { auFiles := getFileList(dirpath, "opus") fQueue := make(chan string, len(auFiles)) @@ -171,7 +175,7 @@ func equalSlice(dirpath string) { fQueue <- fpath } - workers := 3 + workers := 20 for i := 0; i < workers; i++ { go cutterQueue(fQueue, i) } @@ -190,17 +194,22 @@ func equalSlice(dirpath string) { func main() { vttFilepath := flag.String("f", "", "path to a vtt file") vttDir := flag.String("d", "", "path to a vtt dir") + outDirArg := flag.String("o", "", "output dir") sliceAudioDir := flag.String("slice-audio-dir", "", "for equal segmentation only without subs") flag.Parse() + if outDirArg != nil && *outDirArg != "" { + outdir = *outDirArg + } + utterances := []*Utterance{} if vttDir != nil && *vttDir != "" { utterances = dirRun(*vttDir) } else if vttFilepath != nil && *vttFilepath != "" { utterances = oneFileRun(*vttFilepath) } else if sliceAudioDir != nil && *sliceAudioDir != "" { - equalSlice(*sliceAudioDir) + equalSliceRun(*sliceAudioDir) return } else { fmt.Println("no flags provided;") diff --git a/main_test.go b/main_test.go index e494dc6..83a1649 100644 --- a/main_test.go +++ b/main_test.go @@ -63,5 +63,31 @@ func TestFullyIncludes(t *testing.T) { } }) } +} +func TestHashStr(t *testing.T) { + cases := []struct { + Input string + Want string + Description string + }{ + { + Input: "Let's Play Reļ¼šKinder [Rpg Maker Horror] #5 - Im Meer der traurigen Erinnerungen [iU9Un035p3A].opus", + Want: "b047af2cb102a2a6b15007108f99cfef", + Description: "sum of filename example", + }, + { + Input: "", + Want: "d41d8cd98f00b204e9800998ecf8427e", + Description: "sum of empty string", + }, + } + for i, tc := range cases { + t.Run(fmt.Sprintf("run: #%d; %q", i, tc.Description), func(t *testing.T) { + got := hashStr(tc.Input) + if got != tc.Want { + t.Errorf("want: %v; got: %v", tc.Want, got) + } + }) + } } diff --git a/workers.go b/workers.go index 91f9eaf..4255e33 100644 --- a/workers.go +++ b/workers.go @@ -28,6 +28,7 @@ func cutterQueue(fQueue chan string, workerID int) { return } fpath := <-fQueue - cutOnEqualParts(fpath, segmentSize) + outname := fpathToOutname(fpath) + cutOnEqualParts(fpath, outname, segmentSize) } } -- cgit v1.2.3