summaryrefslogtreecommitdiff
path: root/models/extra.go
blob: 5c60a26b17ea65142b94b5095f76feada9db1df2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
package models

import (
	"regexp"
	"strings"
)

type AudioFormat string

const (
	AFWav AudioFormat = "wav"
	AFMP3 AudioFormat = "mp3"
)

var threeOrMoreDashesRE = regexp.MustCompile(`-{3,}`)

// CleanText removes markdown and special characters that are not suitable for TTS
func CleanText(text string) string {
	// Remove markdown-like characters that might interfere with TTS
	text = strings.ReplaceAll(text, "*", "") // Bold/italic markers
	text = strings.ReplaceAll(text, "#", "") // Headers
	text = strings.ReplaceAll(text, "_", "") // Underline/italic markers
	text = strings.ReplaceAll(text, "~", "") // Strikethrough markers
	text = strings.ReplaceAll(text, "`", "") // Code markers
	text = strings.ReplaceAll(text, "[", "") // Link brackets
	text = strings.ReplaceAll(text, "]", "") // Link brackets
	text = strings.ReplaceAll(text, "!", "") // Exclamation marks (if not punctuation)
	// Remove HTML tags using regex
	htmlTagRegex := regexp.MustCompile(`<[^>]*>`)
	text = htmlTagRegex.ReplaceAllString(text, "")
	// Split text into lines to handle table separators
	lines := strings.Split(text, "\n")
	var filteredLines []string
	for _, line := range lines {
		// Check if the line looks like a table separator (e.g., |----|, |===|, | - - - |)
		// A table separator typically contains only |, -, =, and spaces
		isTableSeparator := regexp.MustCompile(`^\s*\|\s*[-=\s]+\|\s*$`).MatchString(strings.TrimSpace(line))
		if !isTableSeparator {
			// If it's not a table separator, remove vertical bars but keep the content
			processedLine := strings.ReplaceAll(line, "|", "")
			filteredLines = append(filteredLines, processedLine)
		}
		// If it is a table separator, skip it (don't add to filteredLines)
	}
	text = strings.Join(filteredLines, "\n")
	text = threeOrMoreDashesRE.ReplaceAllString(text, "")
	text = strings.TrimSpace(text) // Remove leading/trailing whitespace
	return text
}