summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore2
-rw-r--r--go.mod3
-rw-r--r--main.go66
-rw-r--r--readme.md1
4 files changed, 64 insertions, 8 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..c924002
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,2 @@
+prep-dataset
+ff_commands
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..88466ed
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,3 @@
+module prep-dataset
+
+go 1.20
diff --git a/main.go b/main.go
index 0b5d981..a0fe70a 100644
--- a/main.go
+++ b/main.go
@@ -3,11 +3,13 @@ package main
import (
"bufio"
"encoding/json"
+ "flag"
"fmt"
"io/ioutil"
"log"
"os"
"path"
+ "path/filepath"
"strings"
)
@@ -84,7 +86,9 @@ func readLines(filepath string) []string {
// writeLines writes the lines to the given file.
func writeLines(lines []string, path string) error {
- file, err := os.Create(path)
+ // file, err := os.Create(path)
+ file, err := os.OpenFile(path,
+ os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0666)
if err != nil {
return err
}
@@ -117,7 +121,7 @@ func writeJson(data map[string]string) {
func buildFFmpegCall(fd *FileData, ut *Utterance) string {
return fmt.Sprintf(
- `ffmpeg -i %s -ss %s -to %s \
+ `yes no | ffmpeg -i %s -ss %s -to %s \
-metadata text_source="%s" \
-ar 22050 %s`,
fd.AudioPath, ut.LeftTime, ut.RightTime,
@@ -128,18 +132,16 @@ func buildFFmpegCall(fd *FileData, ut *Utterance) string {
func utterancesToFileTextMap(utterances []*Utterance) map[string]string {
resp := make(map[string]string)
for _, ut := range utterances {
- resp[ut.OutPath] = ut.Text
+ resp[path.Base(ut.OutPath)] = ut.Text
}
return resp
}
-func main() {
- vttFilepath := os.Args[1]
-
- fd := NewFileData(vttFilepath)
+func oneFileRun(filepath string) []*Utterance {
+ fd := NewFileData(filepath)
fmt.Println("working with:", fd)
- lines := readLines(vttFilepath)
+ lines := readLines(fd.VttPath)
utterances := linesToUtterances(lines, fd)
ffmpegCommands := make([]string, len(utterances))
@@ -149,6 +151,54 @@ func main() {
fmt.Println("utterances len:", len(utterances))
writeLines(ffmpegCommands, ffCmdOut)
+ return utterances
+}
+
+func dirRun(dirpath string) []*Utterance {
+ resp := []*Utterance{}
+ vttFiles := getVttList(dirpath)
+ for _, vtt := range vttFiles {
+ utterances := oneFileRun(vtt)
+ resp = append(resp, utterances...)
+ }
+ return resp
+}
+
+func getVttList(dirpath string) []string {
+ resp := []string{}
+ err := filepath.Walk(dirpath,
+ func(path string, info os.FileInfo, err error) error {
+ if err != nil {
+ fmt.Printf("prevent panic by handling failure accessing a path %q: %v\n", path, err)
+ return err
+ }
+ if strings.Contains(info.Name(), subExt) {
+ resp = append(resp, info.Name())
+ }
+ return nil
+ })
+ if err != nil {
+ fmt.Println(err)
+ }
+ return resp
+}
+
+func main() {
+ vttFilepath := flag.String("vttfile", "", "path to a vtt file")
+ vttDir := flag.String("vttdir", "", "path to a vtt dir")
+ // vttFilepath := os.Args[1]
+ flag.Parse()
+
+ utterances := []*Utterance{}
+ if vttDir != nil && *vttDir != "" {
+ utterances = dirRun(*vttDir)
+ } else if vttFilepath != nil && *vttFilepath != "" {
+ utterances = oneFileRun(*vttFilepath)
+ } else {
+ fmt.Println("no flags provided;")
+ return
+ }
+
metadata := readJson(metadataPath)
newMeta := utterancesToFileTextMap(utterances)
diff --git a/readme.md b/readme.md
index f6db8f6..9a159b7 100644
--- a/readme.md
+++ b/readme.md
@@ -3,3 +3,4 @@
- key-pair in metadata doesnt guarantee existence of audiofile;
- metadata gets generated per vtt file, but it rather should be common between;
- instead of writing ffmpeg commands to file maybe better to run them in go;
+- support directory call