package main import ( "bufio" "encoding/json" "flag" "fmt" "io/ioutil" "log" "os" "path" "path/filepath" "strings" ) const ( subExt = ".vtt" outdir = "/mnt/desktop/media/datasets/nesfatelp_voice/utterances" ffCmdOut = "./ff_commands" timeSep = "-->" metadataPath = "/mnt/desktop/media/datasets/nesfatelp_voice/metadata.json" ) type Utterance struct { LeftTime string RightTime string Text string OutPath string } type FileData struct { VttPath string AudioPath string AudioBase string } func NewFileData(vttPath string) *FileData { fd := &FileData{ VttPath: vttPath, AudioPath: strings.Trim(vttPath, subExt), } fd.AudioBase = path.Base(fd.AudioPath) return fd } func linesToUtterances(lines []string, fd *FileData) []*Utterance { resp := []*Utterance{} for i, line := range lines { if !strings.Contains(line, timeSep) { continue } // get times splitted := strings.Split(line, timeSep) u := &Utterance{ Text: lines[i+1], LeftTime: strings.TrimSpace(splitted[0]), RightTime: strings.TrimSpace(splitted[1]), } u.OutPath = fmt.Sprintf("%s/%s_%s_%s.wav", outdir, fd.AudioBase, u.LeftTime, u.RightTime) resp = append(resp, u) } return resp } func readLines(filepath string) []string { file, err := os.Open(filepath) if err != nil { log.Fatal(err) } defer file.Close() resp := []string{} scanner := bufio.NewScanner(file) for scanner.Scan() { resp = append(resp, scanner.Text()) } if err := scanner.Err(); err != nil { log.Fatal(err) } return resp } // writeLines writes the lines to the given file. func writeLines(lines []string, path string) error { // file, err := os.Create(path) file, err := os.OpenFile(path, os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0666) if err != nil { return err } defer file.Close() w := bufio.NewWriter(file) for _, line := range lines { fmt.Fprintln(w, line) } return w.Flush() } func readJson(filepath string) map[string]string { plan, _ := ioutil.ReadFile(filepath) data := make(map[string]string) err := json.Unmarshal(plan, &data) if err != nil { log.Fatal(err) } return data } func writeJson(data map[string]string) { metadataJson, _ := json.MarshalIndent(data, "", " ") err := ioutil.WriteFile(metadataPath, metadataJson, 0644) if err != nil { log.Fatal(err) } } func buildFFmpegCall(fd *FileData, ut *Utterance) string { return fmt.Sprintf( `yes no | ffmpeg -i %s -ss %s -to %s \ -metadata text_source="%s" \ -ar 22050 %s`, fd.AudioPath, ut.LeftTime, ut.RightTime, fd.VttPath, ut.OutPath, ) } func utterancesToFileTextMap(utterances []*Utterance) map[string]string { resp := make(map[string]string) for _, ut := range utterances { resp[path.Base(ut.OutPath)] = ut.Text } return resp } func oneFileRun(filepath string) []*Utterance { fd := NewFileData(filepath) fmt.Println("working with:", fd) lines := readLines(fd.VttPath) utterances := linesToUtterances(lines, fd) ffmpegCommands := make([]string, len(utterances)) for i, ut := range utterances { ffmpegCommands[i] = buildFFmpegCall(fd, ut) } fmt.Println("utterances len:", len(utterances)) writeLines(ffmpegCommands, ffCmdOut) return utterances } func dirRun(dirpath string) []*Utterance { resp := []*Utterance{} vttFiles := getVttList(dirpath) for _, vtt := range vttFiles { utterances := oneFileRun(vtt) resp = append(resp, utterances...) } return resp } func getVttList(dirpath string) []string { resp := []string{} err := filepath.Walk(dirpath, func(path string, info os.FileInfo, err error) error { if err != nil { fmt.Printf("prevent panic by handling failure accessing a path %q: %v\n", path, err) return err } if strings.Contains(info.Name(), subExt) { resp = append(resp, info.Name()) } return nil }) if err != nil { fmt.Println(err) } return resp } func main() { vttFilepath := flag.String("vttfile", "", "path to a vtt file") vttDir := flag.String("vttdir", "", "path to a vtt dir") // vttFilepath := os.Args[1] flag.Parse() utterances := []*Utterance{} if vttDir != nil && *vttDir != "" { utterances = dirRun(*vttDir) } else if vttFilepath != nil && *vttFilepath != "" { utterances = oneFileRun(*vttFilepath) } else { fmt.Println("no flags provided;") return } metadata := readJson(metadataPath) newMeta := utterancesToFileTextMap(utterances) for k, v := range newMeta { metadata[k] = v } writeJson(metadata) }