diff options
-rw-r--r-- | .gitignore | 2 | ||||
-rw-r--r-- | go.mod | 3 | ||||
-rw-r--r-- | main.go | 66 | ||||
-rw-r--r-- | readme.md | 1 |
4 files changed, 64 insertions, 8 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c924002 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +prep-dataset +ff_commands @@ -0,0 +1,3 @@ +module prep-dataset + +go 1.20 @@ -3,11 +3,13 @@ package main import ( "bufio" "encoding/json" + "flag" "fmt" "io/ioutil" "log" "os" "path" + "path/filepath" "strings" ) @@ -84,7 +86,9 @@ func readLines(filepath string) []string { // writeLines writes the lines to the given file. func writeLines(lines []string, path string) error { - file, err := os.Create(path) + // file, err := os.Create(path) + file, err := os.OpenFile(path, + os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0666) if err != nil { return err } @@ -117,7 +121,7 @@ func writeJson(data map[string]string) { func buildFFmpegCall(fd *FileData, ut *Utterance) string { return fmt.Sprintf( - `ffmpeg -i %s -ss %s -to %s \ + `yes no | ffmpeg -i %s -ss %s -to %s \ -metadata text_source="%s" \ -ar 22050 %s`, fd.AudioPath, ut.LeftTime, ut.RightTime, @@ -128,18 +132,16 @@ func buildFFmpegCall(fd *FileData, ut *Utterance) string { func utterancesToFileTextMap(utterances []*Utterance) map[string]string { resp := make(map[string]string) for _, ut := range utterances { - resp[ut.OutPath] = ut.Text + resp[path.Base(ut.OutPath)] = ut.Text } return resp } -func main() { - vttFilepath := os.Args[1] - - fd := NewFileData(vttFilepath) +func oneFileRun(filepath string) []*Utterance { + fd := NewFileData(filepath) fmt.Println("working with:", fd) - lines := readLines(vttFilepath) + lines := readLines(fd.VttPath) utterances := linesToUtterances(lines, fd) ffmpegCommands := make([]string, len(utterances)) @@ -149,6 +151,54 @@ func main() { fmt.Println("utterances len:", len(utterances)) writeLines(ffmpegCommands, ffCmdOut) + return utterances +} + +func dirRun(dirpath string) []*Utterance { + resp := []*Utterance{} + vttFiles := getVttList(dirpath) + for _, vtt := range vttFiles { + utterances := oneFileRun(vtt) + resp = append(resp, utterances...) + } + return resp +} + +func getVttList(dirpath string) []string { + resp := []string{} + err := filepath.Walk(dirpath, + func(path string, info os.FileInfo, err error) error { + if err != nil { + fmt.Printf("prevent panic by handling failure accessing a path %q: %v\n", path, err) + return err + } + if strings.Contains(info.Name(), subExt) { + resp = append(resp, info.Name()) + } + return nil + }) + if err != nil { + fmt.Println(err) + } + return resp +} + +func main() { + vttFilepath := flag.String("vttfile", "", "path to a vtt file") + vttDir := flag.String("vttdir", "", "path to a vtt dir") + // vttFilepath := os.Args[1] + flag.Parse() + + utterances := []*Utterance{} + if vttDir != nil && *vttDir != "" { + utterances = dirRun(*vttDir) + } else if vttFilepath != nil && *vttFilepath != "" { + utterances = oneFileRun(*vttFilepath) + } else { + fmt.Println("no flags provided;") + return + } + metadata := readJson(metadataPath) newMeta := utterancesToFileTextMap(utterances) @@ -3,3 +3,4 @@ - key-pair in metadata doesnt guarantee existence of audiofile; - metadata gets generated per vtt file, but it rather should be common between; - instead of writing ffmpeg commands to file maybe better to run them in go; +- support directory call |