From c3ee8c6c88e6f879c7dce7316629e1fe18f08c71 Mon Sep 17 00:00:00 2001 From: GrailFinder Date: Sun, 12 Feb 2023 09:55:01 +0300 Subject: Feat: add support for directory arg --- .gitignore | 2 ++ go.mod | 3 +++ main.go | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-------- readme.md | 1 + 4 files changed, 64 insertions(+), 8 deletions(-) create mode 100644 .gitignore create mode 100644 go.mod diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c924002 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +prep-dataset +ff_commands diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..88466ed --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module prep-dataset + +go 1.20 diff --git a/main.go b/main.go index 0b5d981..a0fe70a 100644 --- a/main.go +++ b/main.go @@ -3,11 +3,13 @@ package main import ( "bufio" "encoding/json" + "flag" "fmt" "io/ioutil" "log" "os" "path" + "path/filepath" "strings" ) @@ -84,7 +86,9 @@ func readLines(filepath string) []string { // writeLines writes the lines to the given file. func writeLines(lines []string, path string) error { - file, err := os.Create(path) + // file, err := os.Create(path) + file, err := os.OpenFile(path, + os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0666) if err != nil { return err } @@ -117,7 +121,7 @@ func writeJson(data map[string]string) { func buildFFmpegCall(fd *FileData, ut *Utterance) string { return fmt.Sprintf( - `ffmpeg -i %s -ss %s -to %s \ + `yes no | ffmpeg -i %s -ss %s -to %s \ -metadata text_source="%s" \ -ar 22050 %s`, fd.AudioPath, ut.LeftTime, ut.RightTime, @@ -128,18 +132,16 @@ func buildFFmpegCall(fd *FileData, ut *Utterance) string { func utterancesToFileTextMap(utterances []*Utterance) map[string]string { resp := make(map[string]string) for _, ut := range utterances { - resp[ut.OutPath] = ut.Text + resp[path.Base(ut.OutPath)] = ut.Text } return resp } -func main() { - vttFilepath := os.Args[1] - - fd := NewFileData(vttFilepath) +func oneFileRun(filepath string) []*Utterance { + fd := NewFileData(filepath) fmt.Println("working with:", fd) - lines := readLines(vttFilepath) + lines := readLines(fd.VttPath) utterances := linesToUtterances(lines, fd) ffmpegCommands := make([]string, len(utterances)) @@ -149,6 +151,54 @@ func main() { fmt.Println("utterances len:", len(utterances)) writeLines(ffmpegCommands, ffCmdOut) + return utterances +} + +func dirRun(dirpath string) []*Utterance { + resp := []*Utterance{} + vttFiles := getVttList(dirpath) + for _, vtt := range vttFiles { + utterances := oneFileRun(vtt) + resp = append(resp, utterances...) + } + return resp +} + +func getVttList(dirpath string) []string { + resp := []string{} + err := filepath.Walk(dirpath, + func(path string, info os.FileInfo, err error) error { + if err != nil { + fmt.Printf("prevent panic by handling failure accessing a path %q: %v\n", path, err) + return err + } + if strings.Contains(info.Name(), subExt) { + resp = append(resp, info.Name()) + } + return nil + }) + if err != nil { + fmt.Println(err) + } + return resp +} + +func main() { + vttFilepath := flag.String("vttfile", "", "path to a vtt file") + vttDir := flag.String("vttdir", "", "path to a vtt dir") + // vttFilepath := os.Args[1] + flag.Parse() + + utterances := []*Utterance{} + if vttDir != nil && *vttDir != "" { + utterances = dirRun(*vttDir) + } else if vttFilepath != nil && *vttFilepath != "" { + utterances = oneFileRun(*vttFilepath) + } else { + fmt.Println("no flags provided;") + return + } + metadata := readJson(metadataPath) newMeta := utterancesToFileTextMap(utterances) diff --git a/readme.md b/readme.md index f6db8f6..9a159b7 100644 --- a/readme.md +++ b/readme.md @@ -3,3 +3,4 @@ - key-pair in metadata doesnt guarantee existence of audiofile; - metadata gets generated per vtt file, but it rather should be common between; - instead of writing ffmpeg commands to file maybe better to run them in go; +- support directory call -- cgit v1.2.3