diff options
author | GrailFinder <wohilas@gmail.com> | 2023-02-11 10:32:45 +0300 |
---|---|---|
committer | GrailFinder <wohilas@gmail.com> | 2023-02-11 10:32:45 +0300 |
commit | 6bb47e668bd929cbc8edec6824cc729857ad3c9a (patch) | |
tree | 2027ec4210c30b086f691b321b517b8af1d08057 /main.go | |
parent | faa4e859d23670c5750304418c245004fe59e102 (diff) |
Feat: add go version
Diffstat (limited to 'main.go')
-rw-r--r-- | main.go | 139 |
1 files changed, 139 insertions, 0 deletions
@@ -0,0 +1,139 @@ +package main + +import ( + "bufio" + "encoding/json" + "fmt" + "io/ioutil" + "log" + "os" + "path" + "strings" +) + +const ( + subExt = ".vtt" + outdir = "/mnt/desktop/media/datasets/nesfatelp_voice/utterances" + ffCmdOut = "./ff_commands" + timeSep = "-->" +) + +type Utterance struct { + LeftTime string + RightTime string + Text string + OutPath string +} + +type FileData struct { + VttPath string + AudioPath string + AudioBase string +} + +func NewFileData(vttPath string) *FileData { + fd := &FileData{ + VttPath: vttPath, + AudioPath: strings.Trim(vttPath, subExt), + } + fd.AudioBase = path.Base(fd.AudioPath) + return fd +} + +func linesToUtterances(lines []string, fd *FileData) []*Utterance { + resp := []*Utterance{} + for i, line := range lines { + if !strings.Contains(line, timeSep) { + continue + } + // get times + splitted := strings.Split(line, timeSep) + + u := &Utterance{ + Text: lines[i+1], + LeftTime: strings.TrimSpace(splitted[0]), + RightTime: strings.TrimSpace(splitted[1]), + } + u.OutPath = fmt.Sprintf("%s/%s_%s_%s.wav", outdir, fd.AudioBase, + u.LeftTime, u.RightTime) + resp = append(resp, u) + + } + return resp +} + +func readLines(filepath string) []string { + file, err := os.Open(filepath) + if err != nil { + log.Fatal(err) + } + defer file.Close() + + resp := []string{} + scanner := bufio.NewScanner(file) + for scanner.Scan() { + resp = append(resp, scanner.Text()) + } + + if err := scanner.Err(); err != nil { + log.Fatal(err) + } + return resp +} + +// writeLines writes the lines to the given file. +func writeLines(lines []string, path string) error { + file, err := os.Create(path) + if err != nil { + return err + } + defer file.Close() + + w := bufio.NewWriter(file) + for _, line := range lines { + fmt.Fprintln(w, line) + } + return w.Flush() +} + +func buildFFmpegCall(fd *FileData, ut *Utterance) string { + return fmt.Sprintf( + `ffmpeg -i %s -ss %s -to %s \ + -metadata text_source="%s" \ + -ar 22050 %s`, + fd.AudioPath, ut.LeftTime, ut.RightTime, + fd.VttPath, ut.OutPath, + ) +} + +func utterancesToFileTextMap(utterances []*Utterance) map[string]string { + resp := make(map[string]string) + for _, ut := range utterances { + resp[ut.OutPath] = ut.Text + } + return resp +} + +func main() { + vttFilepath := os.Args[1] + + fd := NewFileData(vttFilepath) + fmt.Println("working with:", fd) + + lines := readLines(vttFilepath) + utterances := linesToUtterances(lines, fd) + + ffmpegCommands := make([]string, len(utterances)) + for i, ut := range utterances { + ffmpegCommands[i] = buildFFmpegCall(fd, ut) + } + fmt.Println("utterances len:", len(utterances)) + writeLines(ffmpegCommands, ffCmdOut) + + metadata := utterancesToFileTextMap(utterances) + metadataJson, _ := json.MarshalIndent(metadata, "", " ") + err := ioutil.WriteFile("metadata.json", metadataJson, 0644) + if err != nil { + log.Fatal(err) + } +} |