summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGrail Finder <wohilas@gmail.com>2023-02-26 14:48:15 +0600
committerGrail Finder <wohilas@gmail.com>2023-02-26 14:48:15 +0600
commit3f99abb502d373cb4b520b212afd4be9ed4da07f (patch)
tree221c7ccc943a4e4262d536cb268fd7dd04c59074
parentf60be7df2d741b73863df63c8a275d1b9471db58 (diff)
Feat: use md5 hash to save on space with shorter names
-rw-r--r--ffmpeg.go14
-rw-r--r--main.go37
-rw-r--r--main_test.go26
-rw-r--r--workers.go3
4 files changed, 63 insertions, 17 deletions
diff --git a/ffmpeg.go b/ffmpeg.go
index 7c5b6fd..4dead58 100644
--- a/ffmpeg.go
+++ b/ffmpeg.go
@@ -6,6 +6,16 @@ import (
ffmpeg "github.com/u2takey/ffmpeg-go"
)
+func buildFFmpegCall(ut *Utterance) string {
+ return fmt.Sprintf(
+ `yes no | ffmpeg -i "%s" -ss %s -to %s \
+ -metadata text_source="%s" \
+ -ar 22050 "%s"`,
+ ut.FD.AudioPath, ut.LeftTime, ut.RightTime,
+ ut.FD.VttPath, ut.OutPath,
+ )
+}
+
func cutoutClipAndTranscode(ut *Utterance) error {
err := ffmpeg.Input(ut.FD.AudioPath,
ffmpeg.KwArgs{
@@ -19,9 +29,9 @@ func cutoutClipAndTranscode(ut *Utterance) error {
return err
}
-func cutOnEqualParts(filepath, segment string) error {
+func cutOnEqualParts(filepath, outname, segment string) error {
err := ffmpeg.Input(filepath).
- Output(filepath+"%03d.opus",
+ Output(outname+"%02d.opus",
ffmpeg.KwArgs{
"c": "copy",
"map": 0,
diff --git a/main.go b/main.go
index fe7ac06..4cfa197 100644
--- a/main.go
+++ b/main.go
@@ -1,6 +1,7 @@
package main
import (
+ "crypto/md5"
"flag"
"fmt"
"os"
@@ -12,7 +13,6 @@ import (
const (
subExt = ".vtt"
- outdir = "/mnt/nvme/data"
ffCmdOut = "data/ff_commands"
timeSep = "-->"
metadataPath = "data/metadata.json"
@@ -20,6 +20,10 @@ const (
segmentSize = "00:08:00"
)
+var (
+ outdir = "/mnt/nvme/data"
+)
+
type Utterance struct {
LeftTime string
RightTime string
@@ -59,6 +63,16 @@ func mapToCSV(req map[string]string) [][]string {
return resp
}
+func hashStr(req string) string {
+ return fmt.Sprintf("%x", md5.Sum([]byte(req)))
+}
+
+func fpathToOutname(req string) string {
+ basename := filepath.Base(req)
+ h := hashStr(basename)
+ return path.Join(outdir, h+".opus")
+}
+
func linesToUtterances(lines []string, fd *FileData) []*Utterance {
resp := []*Utterance{}
for i, line := range lines {
@@ -107,16 +121,6 @@ func adequateTimes(left, right string) bool {
return false
}
-func buildFFmpegCall(ut *Utterance) string {
- return fmt.Sprintf(
- `yes no | ffmpeg -i "%s" -ss %s -to %s \
- -metadata text_source="%s" \
- -ar 22050 "%s"`,
- ut.FD.AudioPath, ut.LeftTime, ut.RightTime,
- ut.FD.VttPath, ut.OutPath,
- )
-}
-
func utterancesToFileTextMap(utterances []*Utterance) map[string]string {
resp := make(map[string]string)
for _, ut := range utterances {
@@ -163,7 +167,7 @@ func getFileList(dirpath string, filter string) []string {
return resp
}
-func equalSlice(dirpath string) {
+func equalSliceRun(dirpath string) {
auFiles := getFileList(dirpath, "opus")
fQueue := make(chan string, len(auFiles))
@@ -171,7 +175,7 @@ func equalSlice(dirpath string) {
fQueue <- fpath
}
- workers := 3
+ workers := 20
for i := 0; i < workers; i++ {
go cutterQueue(fQueue, i)
}
@@ -190,17 +194,22 @@ func equalSlice(dirpath string) {
func main() {
vttFilepath := flag.String("f", "", "path to a vtt file")
vttDir := flag.String("d", "", "path to a vtt dir")
+ outDirArg := flag.String("o", "", "output dir")
sliceAudioDir := flag.String("slice-audio-dir", "",
"for equal segmentation only without subs")
flag.Parse()
+ if outDirArg != nil && *outDirArg != "" {
+ outdir = *outDirArg
+ }
+
utterances := []*Utterance{}
if vttDir != nil && *vttDir != "" {
utterances = dirRun(*vttDir)
} else if vttFilepath != nil && *vttFilepath != "" {
utterances = oneFileRun(*vttFilepath)
} else if sliceAudioDir != nil && *sliceAudioDir != "" {
- equalSlice(*sliceAudioDir)
+ equalSliceRun(*sliceAudioDir)
return
} else {
fmt.Println("no flags provided;")
diff --git a/main_test.go b/main_test.go
index e494dc6..83a1649 100644
--- a/main_test.go
+++ b/main_test.go
@@ -63,5 +63,31 @@ func TestFullyIncludes(t *testing.T) {
}
})
}
+}
+func TestHashStr(t *testing.T) {
+ cases := []struct {
+ Input string
+ Want string
+ Description string
+ }{
+ {
+ Input: "Let's Play Reļ¼šKinder [Rpg Maker Horror] #5 - Im Meer der traurigen Erinnerungen [iU9Un035p3A].opus",
+ Want: "b047af2cb102a2a6b15007108f99cfef",
+ Description: "sum of filename example",
+ },
+ {
+ Input: "",
+ Want: "d41d8cd98f00b204e9800998ecf8427e",
+ Description: "sum of empty string",
+ },
+ }
+ for i, tc := range cases {
+ t.Run(fmt.Sprintf("run: #%d; %q", i, tc.Description), func(t *testing.T) {
+ got := hashStr(tc.Input)
+ if got != tc.Want {
+ t.Errorf("want: %v; got: %v", tc.Want, got)
+ }
+ })
+ }
}
diff --git a/workers.go b/workers.go
index 91f9eaf..4255e33 100644
--- a/workers.go
+++ b/workers.go
@@ -28,6 +28,7 @@ func cutterQueue(fQueue chan string, workerID int) {
return
}
fpath := <-fQueue
- cutOnEqualParts(fpath, segmentSize)
+ outname := fpathToOutname(fpath)
+ cutOnEqualParts(fpath, outname, segmentSize)
}
}