summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--.gitignore1
-rw-r--r--main.go70
-rw-r--r--readme.md1
3 files changed, 56 insertions, 16 deletions
diff --git a/.gitignore b/.gitignore
index c924002..382e7a7 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,2 +1,3 @@
prep-dataset
ff_commands
+data
diff --git a/main.go b/main.go
index 2c9d3a4..32a49e5 100644
--- a/main.go
+++ b/main.go
@@ -2,6 +2,7 @@ package main
import (
"bufio"
+ "encoding/csv"
"encoding/json"
"flag"
"fmt"
@@ -13,11 +14,12 @@ import (
)
const (
- subExt = ".vtt"
- outdir = "/mnt/desktop/media/datasets/nesfatelp_voice/utterances"
- ffCmdOut = "./ff_commands"
- timeSep = "-->"
- metadataPath = "/mnt/desktop/media/datasets/nesfatelp_voice/metadata.json"
+ subExt = ".vtt"
+ outdir = "data/utterances"
+ ffCmdOut = "data/ff_commands"
+ timeSep = "-->"
+ metadataPath = "data/metadata.json"
+ metadataPathCSV = "data/metadata.tsv"
)
type Utterance struct {
@@ -25,6 +27,7 @@ type Utterance struct {
RightTime string
Text string
OutPath string
+ FD *FileData
}
type FileData struct {
@@ -42,6 +45,22 @@ func NewFileData(vttPath string) *FileData {
return fd
}
+func keysToSlice(req map[string]struct{}) []string {
+ resp := make([]string, len(req))
+ for k := range req {
+ resp = append(resp, k)
+ }
+ return resp
+}
+
+func mapToCSV(req map[string]string) [][]string {
+ resp := [][]string{}
+ for k, v := range req {
+ resp = append(resp, []string{k, v})
+ }
+ return resp
+}
+
func linesToUtterances(lines []string, fd *FileData) []*Utterance {
resp := []*Utterance{}
for i, line := range lines {
@@ -55,6 +74,7 @@ func linesToUtterances(lines []string, fd *FileData) []*Utterance {
Text: lines[i+1],
LeftTime: strings.TrimSpace(splitted[0]),
RightTime: strings.TrimSpace(splitted[1]),
+ FD: fd,
}
u.OutPath = fmt.Sprintf("%s/%s_%s_%s.wav", outdir, fd.AudioBase,
u.LeftTime, u.RightTime)
@@ -121,13 +141,30 @@ func writeJson(data map[string]string) {
}
}
-func buildFFmpegCall(fd *FileData, ut *Utterance) string {
+func writeCSV(data [][]string) {
+ f, err := os.Create(metadataPathCSV)
+ defer f.Close()
+
+ if err != nil {
+ panic(err)
+ }
+
+ w := csv.NewWriter(f)
+ w.Comma = '\t'
+ defer w.Flush()
+
+ if err := w.WriteAll(data); err != nil {
+ panic(err)
+ }
+}
+
+func buildFFmpegCall(ut *Utterance) string {
return fmt.Sprintf(
`yes no | ffmpeg -i %s -ss %s -to %s \
-metadata text_source="%s" \
-ar 22050 %s`,
- fd.AudioPath, ut.LeftTime, ut.RightTime,
- fd.VttPath, ut.OutPath,
+ ut.FD.AudioPath, ut.LeftTime, ut.RightTime,
+ ut.FD.VttPath, ut.OutPath,
)
}
@@ -141,18 +178,10 @@ func utterancesToFileTextMap(utterances []*Utterance) map[string]string {
func oneFileRun(filepath string) []*Utterance {
fd := NewFileData(filepath)
- fmt.Println("working with:", filepath)
lines := readLines(fd.VttPath)
utterances := linesToUtterances(lines, fd)
- ffmpegCommands := make([]string, len(utterances))
- for i, ut := range utterances {
- ffmpegCommands[i] = buildFFmpegCall(fd, ut)
- }
- fmt.Println("utterances len:", len(utterances))
- writeLines(ffmpegCommands, ffCmdOut)
-
return utterances
}
@@ -201,6 +230,14 @@ func main() {
return
}
+ fmt.Println("sum of utterances:", len(utterances))
+
+ ffmpegCommands := make(map[string]struct{})
+ for _, ut := range utterances {
+ ffmpegCommands[buildFFmpegCall(ut)] = struct{}{}
+ }
+ writeLines(keysToSlice(ffmpegCommands), ffCmdOut)
+
metadata := readJson(metadataPath)
newMeta := utterancesToFileTextMap(utterances)
@@ -209,4 +246,5 @@ func main() {
}
writeJson(metadata)
+ writeCSV(mapToCSV(metadata))
}
diff --git a/readme.md b/readme.md
index f5a6239..fde427b 100644
--- a/readme.md
+++ b/readme.md
@@ -4,3 +4,4 @@
- instead of writing ffmpeg commands to file maybe better to run them in go;
- change metadata format from json to csv for lower memory consumption;
- add config file; move constance to config file;
+- ffmpeg call should be unique;