package main import ( "bufio" "encoding/csv" "encoding/json" "flag" "fmt" "io/ioutil" "os" "path" "path/filepath" "strings" ) const ( subExt = ".vtt" outdir = "data/utterances" ffCmdOut = "data/ff_commands" timeSep = "-->" metadataPath = "data/metadata.json" metadataPathCSV = "data/metadata.tsv" ) type Utterance struct { LeftTime string RightTime string Text string OutPath string FD *FileData } type FileData struct { VttPath string AudioPath string AudioBase string } func NewFileData(vttPath string) *FileData { fd := &FileData{ VttPath: vttPath, AudioPath: strings.Trim(vttPath, subExt), } fd.AudioBase = path.Base(fd.AudioPath) return fd } func keysToSlice(req map[string]struct{}) []string { resp := []string{} for k := range req { resp = append(resp, k) } return resp } func mapToCSV(req map[string]string) [][]string { resp := [][]string{} for k, v := range req { resp = append(resp, []string{k, v}) } return resp } func linesToUtterances(lines []string, fd *FileData) []*Utterance { resp := []*Utterance{} for i, line := range lines { if !strings.Contains(line, timeSep) { continue } // get times splitted := strings.Split(line, timeSep) u := &Utterance{ Text: lines[i+1], LeftTime: strings.TrimSpace(splitted[0]), RightTime: strings.TrimSpace(splitted[1]), FD: fd, } u.OutPath = fmt.Sprintf("%s/%s_%s_%s.wav", outdir, fd.AudioBase, u.LeftTime, u.RightTime) if u.LeftTime == u.RightTime { continue } resp = append(resp, u) } return resp } func readLines(filepath string) []string { file, err := os.Open(filepath) if err != nil { panic(err) } defer file.Close() resp := []string{} scanner := bufio.NewScanner(file) for scanner.Scan() { resp = append(resp, scanner.Text()) } if err := scanner.Err(); err != nil { panic(err) } return resp } // writeLines writes the lines to the given file. func writeLines(lines []string, path string) error { file, err := os.Create(path) // file, err := os.OpenFile(path, // os.O_APPEND|os.O_WRONLY|os.O_CREATE, 0666) if err != nil { return err } defer file.Close() w := bufio.NewWriter(file) for _, line := range lines { fmt.Fprintln(w, line) } return w.Flush() } func readJson(filepath string) map[string]string { data := make(map[string]string) plan, err := ioutil.ReadFile(filepath) if err != nil { return data } err = json.Unmarshal(plan, &data) if err != nil { panic(err) } return data } func writeJson(data map[string]string) { metadataJson, _ := json.MarshalIndent(data, "", " ") err := ioutil.WriteFile(metadataPath, metadataJson, 0644) if err != nil { panic(err) } } func writeCSV(data [][]string) { f, err := os.Create(metadataPathCSV) defer f.Close() if err != nil { panic(err) } w := csv.NewWriter(f) w.Comma = '\t' defer w.Flush() if err := w.WriteAll(data); err != nil { panic(err) } } func buildFFmpegCall(ut *Utterance) string { return fmt.Sprintf( `yes no | ffmpeg -i "%s" -ss %s -to %s \ -metadata text_source="%s" \ -ar 22050 "%s"`, ut.FD.AudioPath, ut.LeftTime, ut.RightTime, ut.FD.VttPath, ut.OutPath, ) } func utterancesToFileTextMap(utterances []*Utterance) map[string]string { resp := make(map[string]string) for _, ut := range utterances { resp[path.Base(ut.OutPath)] = ut.Text } return resp } func oneFileRun(filepath string) []*Utterance { fd := NewFileData(filepath) lines := readLines(fd.VttPath) utterances := linesToUtterances(lines, fd) return utterances } func dirRun(dirpath string) []*Utterance { resp := []*Utterance{} vttFiles := getVttList(dirpath) for _, vtt := range vttFiles { utterances := oneFileRun(vtt) resp = append(resp, utterances...) } return resp } func getVttList(dirpath string) []string { resp := []string{} err := filepath.Walk(dirpath, func(path string, info os.FileInfo, err error) error { if err != nil { fmt.Printf("prevent panic by handling failure accessing a path %q: %v\n", path, err) return err } if strings.Contains(info.Name(), subExt) { resp = append(resp, path) } return nil }) if err != nil { fmt.Println(err) } return resp } func main() { vttFilepath := flag.String("f", "", "path to a vtt file") vttDir := flag.String("d", "", "path to a vtt dir") flag.Parse() utterances := []*Utterance{} if vttDir != nil && *vttDir != "" { utterances = dirRun(*vttDir) } else if vttFilepath != nil && *vttFilepath != "" { utterances = oneFileRun(*vttFilepath) } else { fmt.Println("no flags provided;") return } fmt.Println("sum of utterances:", len(utterances)) if err := os.MkdirAll(outdir, 0755); err != nil { panic(err) } filteredUtterances := []*Utterance{} for _, ut := range utterances { if _, err := os.Stat(ut.OutPath); os.IsNotExist(err) { if err := cutoutClipAndTranscode(ut); err == nil { filteredUtterances = append(filteredUtterances, ut) } } } newMeta := utterancesToFileTextMap(filteredUtterances) writeCSV(mapToCSV(newMeta)) }