package main import ( "bufio" "encoding/json" "fmt" "io/ioutil" "log" "os" "path" "strings" ) const ( subExt = ".vtt" outdir = "/mnt/desktop/media/datasets/nesfatelp_voice/utterances" ffCmdOut = "./ff_commands" timeSep = "-->" metadataPath = "/mnt/desktop/media/datasets/nesfatelp_voice/metadata.json" ) type Utterance struct { LeftTime string RightTime string Text string OutPath string } type FileData struct { VttPath string AudioPath string AudioBase string } func NewFileData(vttPath string) *FileData { fd := &FileData{ VttPath: vttPath, AudioPath: strings.Trim(vttPath, subExt), } fd.AudioBase = path.Base(fd.AudioPath) return fd } func linesToUtterances(lines []string, fd *FileData) []*Utterance { resp := []*Utterance{} for i, line := range lines { if !strings.Contains(line, timeSep) { continue } // get times splitted := strings.Split(line, timeSep) u := &Utterance{ Text: lines[i+1], LeftTime: strings.TrimSpace(splitted[0]), RightTime: strings.TrimSpace(splitted[1]), } u.OutPath = fmt.Sprintf("%s/%s_%s_%s.wav", outdir, fd.AudioBase, u.LeftTime, u.RightTime) resp = append(resp, u) } return resp } func readLines(filepath string) []string { file, err := os.Open(filepath) if err != nil { log.Fatal(err) } defer file.Close() resp := []string{} scanner := bufio.NewScanner(file) for scanner.Scan() { resp = append(resp, scanner.Text()) } if err := scanner.Err(); err != nil { log.Fatal(err) } return resp } // writeLines writes the lines to the given file. func writeLines(lines []string, path string) error { file, err := os.Create(path) if err != nil { return err } defer file.Close() w := bufio.NewWriter(file) for _, line := range lines { fmt.Fprintln(w, line) } return w.Flush() } func readJson(filepath string) map[string]string { plan, _ := ioutil.ReadFile(filepath) data := make(map[string]string) err := json.Unmarshal(plan, &data) if err != nil { log.Fatal(err) } return data } func writeJson(data map[string]string) { metadataJson, _ := json.MarshalIndent(data, "", " ") err := ioutil.WriteFile(metadataPath, metadataJson, 0644) if err != nil { log.Fatal(err) } } func buildFFmpegCall(fd *FileData, ut *Utterance) string { return fmt.Sprintf( `ffmpeg -i %s -ss %s -to %s \ -metadata text_source="%s" \ -ar 22050 %s`, fd.AudioPath, ut.LeftTime, ut.RightTime, fd.VttPath, ut.OutPath, ) } func utterancesToFileTextMap(utterances []*Utterance) map[string]string { resp := make(map[string]string) for _, ut := range utterances { resp[ut.OutPath] = ut.Text } return resp } func main() { vttFilepath := os.Args[1] fd := NewFileData(vttFilepath) fmt.Println("working with:", fd) lines := readLines(vttFilepath) utterances := linesToUtterances(lines, fd) ffmpegCommands := make([]string, len(utterances)) for i, ut := range utterances { ffmpegCommands[i] = buildFFmpegCall(fd, ut) } fmt.Println("utterances len:", len(utterances)) writeLines(ffmpegCommands, ffCmdOut) metadata := readJson(metadataPath) newMeta := utterancesToFileTextMap(utterances) for k, v := range newMeta { metadata[k] = v } writeJson(metadata) }