diff options
-rw-r--r-- | .gitignore | 1 | ||||
-rw-r--r-- | main.go | 70 | ||||
-rw-r--r-- | readme.md | 1 |
3 files changed, 56 insertions, 16 deletions
@@ -1,2 +1,3 @@ prep-dataset ff_commands +data @@ -2,6 +2,7 @@ package main import ( "bufio" + "encoding/csv" "encoding/json" "flag" "fmt" @@ -13,11 +14,12 @@ import ( ) const ( - subExt = ".vtt" - outdir = "/mnt/desktop/media/datasets/nesfatelp_voice/utterances" - ffCmdOut = "./ff_commands" - timeSep = "-->" - metadataPath = "/mnt/desktop/media/datasets/nesfatelp_voice/metadata.json" + subExt = ".vtt" + outdir = "data/utterances" + ffCmdOut = "data/ff_commands" + timeSep = "-->" + metadataPath = "data/metadata.json" + metadataPathCSV = "data/metadata.tsv" ) type Utterance struct { @@ -25,6 +27,7 @@ type Utterance struct { RightTime string Text string OutPath string + FD *FileData } type FileData struct { @@ -42,6 +45,22 @@ func NewFileData(vttPath string) *FileData { return fd } +func keysToSlice(req map[string]struct{}) []string { + resp := make([]string, len(req)) + for k := range req { + resp = append(resp, k) + } + return resp +} + +func mapToCSV(req map[string]string) [][]string { + resp := [][]string{} + for k, v := range req { + resp = append(resp, []string{k, v}) + } + return resp +} + func linesToUtterances(lines []string, fd *FileData) []*Utterance { resp := []*Utterance{} for i, line := range lines { @@ -55,6 +74,7 @@ func linesToUtterances(lines []string, fd *FileData) []*Utterance { Text: lines[i+1], LeftTime: strings.TrimSpace(splitted[0]), RightTime: strings.TrimSpace(splitted[1]), + FD: fd, } u.OutPath = fmt.Sprintf("%s/%s_%s_%s.wav", outdir, fd.AudioBase, u.LeftTime, u.RightTime) @@ -121,13 +141,30 @@ func writeJson(data map[string]string) { } } -func buildFFmpegCall(fd *FileData, ut *Utterance) string { +func writeCSV(data [][]string) { + f, err := os.Create(metadataPathCSV) + defer f.Close() + + if err != nil { + panic(err) + } + + w := csv.NewWriter(f) + w.Comma = '\t' + defer w.Flush() + + if err := w.WriteAll(data); err != nil { + panic(err) + } +} + +func buildFFmpegCall(ut *Utterance) string { return fmt.Sprintf( `yes no | ffmpeg -i %s -ss %s -to %s \ -metadata text_source="%s" \ -ar 22050 %s`, - fd.AudioPath, ut.LeftTime, ut.RightTime, - fd.VttPath, ut.OutPath, + ut.FD.AudioPath, ut.LeftTime, ut.RightTime, + ut.FD.VttPath, ut.OutPath, ) } @@ -141,18 +178,10 @@ func utterancesToFileTextMap(utterances []*Utterance) map[string]string { func oneFileRun(filepath string) []*Utterance { fd := NewFileData(filepath) - fmt.Println("working with:", filepath) lines := readLines(fd.VttPath) utterances := linesToUtterances(lines, fd) - ffmpegCommands := make([]string, len(utterances)) - for i, ut := range utterances { - ffmpegCommands[i] = buildFFmpegCall(fd, ut) - } - fmt.Println("utterances len:", len(utterances)) - writeLines(ffmpegCommands, ffCmdOut) - return utterances } @@ -201,6 +230,14 @@ func main() { return } + fmt.Println("sum of utterances:", len(utterances)) + + ffmpegCommands := make(map[string]struct{}) + for _, ut := range utterances { + ffmpegCommands[buildFFmpegCall(ut)] = struct{}{} + } + writeLines(keysToSlice(ffmpegCommands), ffCmdOut) + metadata := readJson(metadataPath) newMeta := utterancesToFileTextMap(utterances) @@ -209,4 +246,5 @@ func main() { } writeJson(metadata) + writeCSV(mapToCSV(metadata)) } @@ -4,3 +4,4 @@ - instead of writing ffmpeg commands to file maybe better to run them in go; - change metadata format from json to csv for lower memory consumption; - add config file; move constance to config file; +- ffmpeg call should be unique; |