summaryrefslogtreecommitdiff
path: root/extra
diff options
context:
space:
mode:
authorGrail Finder <wohilas@gmail.com>2026-01-10 10:16:10 +0300
committerGrail Finder <wohilas@gmail.com>2026-01-10 10:16:10 +0300
commit8474b87c43650b53aa562a62aaac12d760aa5fc7 (patch)
tree5ea6f1b351d3e5f3ca9648ef2fa04de0dcc4400f /extra
parent505477b8e388ee351f724e1b389db549bb4ce003 (diff)
Enha: clean text before sending to tts
Diffstat (limited to 'extra')
-rw-r--r--extra/tts.go68
-rw-r--r--extra/tts_test.go40
2 files changed, 95 insertions, 13 deletions
diff --git a/extra/tts.go b/extra/tts.go
index 0209072..e87afad 100644
--- a/extra/tts.go
+++ b/extra/tts.go
@@ -13,6 +13,7 @@ import (
"log/slog"
"net/http"
"os"
+ "regexp"
"strings"
"time"
@@ -31,6 +32,44 @@ var (
// endsWithPunctuation = regexp.MustCompile(`[;.!?]$`)
)
+// cleanText removes markdown and special characters that are not suitable for TTS
+func cleanText(text string) string {
+ // Remove markdown-like characters that might interfere with TTS
+ text = strings.ReplaceAll(text, "*", "") // Bold/italic markers
+ text = strings.ReplaceAll(text, "#", "") // Headers
+ text = strings.ReplaceAll(text, "_", "") // Underline/italic markers
+ text = strings.ReplaceAll(text, "~", "") // Strikethrough markers
+ text = strings.ReplaceAll(text, "`", "") // Code markers
+ text = strings.ReplaceAll(text, "[", "") // Link brackets
+ text = strings.ReplaceAll(text, "]", "") // Link brackets
+ text = strings.ReplaceAll(text, "!", "") // Exclamation marks (if not punctuation)
+
+ // Remove HTML tags using regex
+ htmlTagRegex := regexp.MustCompile(`<[^>]*>`)
+ text = htmlTagRegex.ReplaceAllString(text, "")
+
+ // Split text into lines to handle table separators
+ lines := strings.Split(text, "\n")
+ var filteredLines []string
+
+ for _, line := range lines {
+ // Check if the line looks like a table separator (e.g., |----|, |===|, | - - - |)
+ // A table separator typically contains only |, -, =, and spaces
+ isTableSeparator := regexp.MustCompile(`^\s*\|\s*[-=\s]+\|\s*$`).MatchString(strings.TrimSpace(line))
+
+ if !isTableSeparator {
+ // If it's not a table separator, remove vertical bars but keep the content
+ processedLine := strings.ReplaceAll(line, "|", "")
+ filteredLines = append(filteredLines, processedLine)
+ }
+ // If it is a table separator, skip it (don't add to filteredLines)
+ }
+
+ text = strings.Join(filteredLines, "\n")
+ text = strings.TrimSpace(text) // Remove leading/trailing whitespace
+ return text
+}
+
type Orator interface {
Speak(text string) error
Stop()
@@ -97,9 +136,13 @@ func (o *KokoroOrator) readroutine() {
}
continue // if only one (often incomplete) sentence; wait for next chunk
}
- o.logger.Debug("calling Speak with sentence", "sent", sentence.Text)
- if err := o.Speak(sentence.Text); err != nil {
- o.logger.Error("tts failed", "sentence", sentence.Text, "error", err)
+ cleanedText := cleanText(sentence.Text)
+ if cleanedText == "" {
+ continue // Skip empty text after cleaning
+ }
+ o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
+ if err := o.Speak(cleanedText); err != nil {
+ o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
}
}
case <-TTSFlushChan:
@@ -122,6 +165,7 @@ func (o *KokoroOrator) readroutine() {
// but keepinig in mind that remainder could be ommited by tokenizer
// Flush remaining text
remaining := o.textBuffer.String()
+ remaining = cleanText(remaining)
o.textBuffer.Reset()
if remaining != "" {
o.logger.Debug("calling Speak with remainder", "rem", remaining)
@@ -138,14 +182,12 @@ func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
if provider == "" {
provider = "kokoro"
}
-
switch strings.ToLower(provider) {
case "google", "google-translate", "google_translate":
language := cfg.TTS_LANGUAGE
if language == "" {
language = "en"
}
-
speech := &google_translate_tts.Speech{
Folder: os.TempDir() + "/gf-lt-tts", // Temporary directory for caching
Language: language,
@@ -153,7 +195,6 @@ func NewOrator(log *slog.Logger, cfg *config.Config) Orator {
Speed: cfg.TTS_SPEED,
Handler: &handlers.Beep{},
}
-
orator := &GoogleTranslateOrator{
logger: log,
speech: speech,
@@ -287,9 +328,13 @@ func (o *GoogleTranslateOrator) readroutine() {
}
continue // if only one (often incomplete) sentence; wait for next chunk
}
- o.logger.Debug("calling Speak with sentence", "sent", sentence.Text)
- if err := o.Speak(sentence.Text); err != nil {
- o.logger.Error("tts failed", "sentence", sentence.Text, "error", err)
+ cleanedText := cleanText(sentence.Text)
+ if cleanedText == "" {
+ continue // Skip empty text after cleaning
+ }
+ o.logger.Debug("calling Speak with sentence", "sent", cleanedText)
+ if err := o.Speak(cleanedText); err != nil {
+ o.logger.Error("tts failed", "sentence", cleanedText, "error", err)
}
}
case <-TTSFlushChan:
@@ -307,11 +352,8 @@ func (o *GoogleTranslateOrator) readroutine() {
}
}
}
- // INFO: if there is a lot of text it will take some time to make with tts at once
- // to avoid this pause, it might be better to keep splitting on sentences
- // but keepinig in mind that remainder could be ommited by tokenizer
- // Flush remaining text
remaining := o.textBuffer.String()
+ remaining = cleanText(remaining)
o.textBuffer.Reset()
if remaining != "" {
o.logger.Debug("calling Speak with remainder", "rem", remaining)
diff --git a/extra/tts_test.go b/extra/tts_test.go
new file mode 100644
index 0000000..a21d9b8
--- /dev/null
+++ b/extra/tts_test.go
@@ -0,0 +1,40 @@
+//go:build extra
+// +build extra
+
+package extra
+
+import (
+ "testing"
+)
+
+func TestCleanText(t *testing.T) {
+ tests := []struct {
+ input string
+ expected string
+ }{
+ {"Hello world", "Hello world"},
+ {"**Bold text**", "Bold text"},
+ {"*Italic text*", "Italic text"},
+ {"# Header", "Header"},
+ {"_Underlined text_", "Underlined text"},
+ {"~Strikethrough text~", "Strikethrough text"},
+ {"`Code text`", "Code text"},
+ {"[Link text](url)", "Link text(url)"},
+ {"Mixed *markdown* and #headers#!", "Mixed markdown and headers"},
+ {"<html>tags</html>", "tags"},
+ {"|---|", ""}, // Table separator
+ {"|====|", ""}, // Table separator with equals
+ {"| - - - |", ""}, // Table separator with spaced dashes
+ {"| cell1 | cell2 |", "cell1 cell2"}, // Table row with content
+ {" Trailing spaces ", "Trailing spaces"},
+ {"", ""},
+ {"***", ""},
+ }
+
+ for _, test := range tests {
+ result := cleanText(test.input)
+ if result != test.expected {
+ t.Errorf("cleanText(%q) = %q; expected %q", test.input, result, test.expected)
+ }
+ }
+} \ No newline at end of file