summaryrefslogtreecommitdiff
path: root/main.go
diff options
context:
space:
mode:
Diffstat (limited to 'main.go')
-rw-r--r--main.go139
1 files changed, 139 insertions, 0 deletions
diff --git a/main.go b/main.go
new file mode 100644
index 0000000..c09e0e3
--- /dev/null
+++ b/main.go
@@ -0,0 +1,139 @@
+package main
+
+import (
+ "bufio"
+ "encoding/json"
+ "fmt"
+ "io/ioutil"
+ "log"
+ "os"
+ "path"
+ "strings"
+)
+
+const (
+ subExt = ".vtt"
+ outdir = "/mnt/desktop/media/datasets/nesfatelp_voice/utterances"
+ ffCmdOut = "./ff_commands"
+ timeSep = "-->"
+)
+
+type Utterance struct {
+ LeftTime string
+ RightTime string
+ Text string
+ OutPath string
+}
+
+type FileData struct {
+ VttPath string
+ AudioPath string
+ AudioBase string
+}
+
+func NewFileData(vttPath string) *FileData {
+ fd := &FileData{
+ VttPath: vttPath,
+ AudioPath: strings.Trim(vttPath, subExt),
+ }
+ fd.AudioBase = path.Base(fd.AudioPath)
+ return fd
+}
+
+func linesToUtterances(lines []string, fd *FileData) []*Utterance {
+ resp := []*Utterance{}
+ for i, line := range lines {
+ if !strings.Contains(line, timeSep) {
+ continue
+ }
+ // get times
+ splitted := strings.Split(line, timeSep)
+
+ u := &Utterance{
+ Text: lines[i+1],
+ LeftTime: strings.TrimSpace(splitted[0]),
+ RightTime: strings.TrimSpace(splitted[1]),
+ }
+ u.OutPath = fmt.Sprintf("%s/%s_%s_%s.wav", outdir, fd.AudioBase,
+ u.LeftTime, u.RightTime)
+ resp = append(resp, u)
+
+ }
+ return resp
+}
+
+func readLines(filepath string) []string {
+ file, err := os.Open(filepath)
+ if err != nil {
+ log.Fatal(err)
+ }
+ defer file.Close()
+
+ resp := []string{}
+ scanner := bufio.NewScanner(file)
+ for scanner.Scan() {
+ resp = append(resp, scanner.Text())
+ }
+
+ if err := scanner.Err(); err != nil {
+ log.Fatal(err)
+ }
+ return resp
+}
+
+// writeLines writes the lines to the given file.
+func writeLines(lines []string, path string) error {
+ file, err := os.Create(path)
+ if err != nil {
+ return err
+ }
+ defer file.Close()
+
+ w := bufio.NewWriter(file)
+ for _, line := range lines {
+ fmt.Fprintln(w, line)
+ }
+ return w.Flush()
+}
+
+func buildFFmpegCall(fd *FileData, ut *Utterance) string {
+ return fmt.Sprintf(
+ `ffmpeg -i %s -ss %s -to %s \
+ -metadata text_source="%s" \
+ -ar 22050 %s`,
+ fd.AudioPath, ut.LeftTime, ut.RightTime,
+ fd.VttPath, ut.OutPath,
+ )
+}
+
+func utterancesToFileTextMap(utterances []*Utterance) map[string]string {
+ resp := make(map[string]string)
+ for _, ut := range utterances {
+ resp[ut.OutPath] = ut.Text
+ }
+ return resp
+}
+
+func main() {
+ vttFilepath := os.Args[1]
+
+ fd := NewFileData(vttFilepath)
+ fmt.Println("working with:", fd)
+
+ lines := readLines(vttFilepath)
+ utterances := linesToUtterances(lines, fd)
+
+ ffmpegCommands := make([]string, len(utterances))
+ for i, ut := range utterances {
+ ffmpegCommands[i] = buildFFmpegCall(fd, ut)
+ }
+ fmt.Println("utterances len:", len(utterances))
+ writeLines(ffmpegCommands, ffCmdOut)
+
+ metadata := utterancesToFileTextMap(utterances)
+ metadataJson, _ := json.MarshalIndent(metadata, "", " ")
+ err := ioutil.WriteFile("metadata.json", metadataJson, 0644)
+ if err != nil {
+ log.Fatal(err)
+ }
+}