diff --git a/poll/ctl.go b/poll/ctl.go index a5760c7..09d8af5 100644 --- a/poll/ctl.go +++ b/poll/ctl.go @@ -8,12 +8,9 @@ import ( "log" "io" "os" - "github.com/microcosm-cc/bluemonday" ) func startctl(reportPostChan chan ReportPost) { - p = bluemonday.NewPolicy() - log.Print("Starting ctl listener on 127.0.0.1:5555") l, err := net.Listen("tcp", "127.0.0.1:5555") if err != nil { diff --git a/poll/instance.go b/poll/instance.go index e79b91f..ef13bf8 100644 --- a/poll/instance.go +++ b/poll/instance.go @@ -7,6 +7,7 @@ import ( "io/ioutil" "net/http" "strings" + "regexp" "html" "time" "fmt" @@ -14,6 +15,7 @@ import ( ) var p *bluemonday.Policy +var spaceReg *regexp.Regexp func PollMastodonPleroma(endpoint string, reportPostChan chan ReportPost) { newposts := make([]ReportPost, 0) @@ -163,6 +165,8 @@ func PollMastodonPleroma(endpoint string, reportPostChan chan ReportPost) { newpost.posthash = posthash.Sum(nil) newpost.normalized = html.UnescapeString(strings.ToLower(p.Sanitize(newpost.Content))) + newpost.normalized = strings.ReplaceAll(newpost.normalized, "\t", " ") + newpost.normalized = spaceReg.ReplaceAllString(newpost.normalized, " ") // Validate time t, err := time.Parse(time.RFC3339, newpost.Created_at) diff --git a/poll/main.go b/poll/main.go index b33f8ae..de293b3 100644 --- a/poll/main.go +++ b/poll/main.go @@ -4,7 +4,9 @@ import ( _ "net/http/pprof" "net/http" "sync" + "regexp" "log" + "github.com/microcosm-cc/bluemonday" ) // Current instances @@ -26,6 +28,8 @@ func main() { pool := get_db_pool() + p = bluemonday.NewPolicy() + spaceReg = regexp.MustCompile(`\s+`) go startctl(reportPostChan)