diff --git a/fedilogue.go b/fedilogue.go index 913cfb3..0e12385 100644 --- a/fedilogue.go +++ b/fedilogue.go @@ -32,6 +32,7 @@ func main() { p = bluemonday.NewPolicy() spaceReg = regexp.MustCompile(`[\s\t\.]+`) + removeHTMLReg = regexp.MustCompile(`<\/?\s*br\s*>`) re = regexp.MustCompile("^https?://([^/]*)/(.*)$") for _, endpoint := range settings.Autostart { diff --git a/instance.go b/instance.go index 6cd81fa..6961817 100644 --- a/instance.go +++ b/instance.go @@ -2,19 +2,13 @@ package main import ( "encoding/json" - "github.com/microcosm-cc/bluemonday" "io/ioutil" "net/http" - "regexp" "strings" "time" "net" ) -var p *bluemonday.Policy -var spaceReg *regexp.Regexp -var re *regexp.Regexp - func DoTries(o *RunningInstance, req *http.Request) (*http.Response, error) { var resp *http.Response var err error diff --git a/retrieve.go b/retrieve.go index 00e2abd..5597a25 100644 --- a/retrieve.go +++ b/retrieve.go @@ -10,8 +10,15 @@ import ( "net/http" "strings" "time" + "regexp" + "github.com/microcosm-cc/bluemonday" ) +var p *bluemonday.Policy +var spaceReg *regexp.Regexp +var removeHTMLReg *regexp.Regexp +var re *regexp.Regexp + type ImageType struct { // Type string `json:"type"` Url string `json:"url"` @@ -137,7 +144,8 @@ func check_post(uri string) (PostJson, error) { return postjson, err } - postjson.normalized = html.UnescapeString(strings.ToLower(p.Sanitize(postjson.Content))) + postjson.normalized = removeHTMLReg.ReplaceAllString(postjson.Content, " ") + postjson.normalized = html.UnescapeString(strings.ToLower(p.Sanitize(postjson.normalized))) postjson.normalized = spaceReg.ReplaceAllString(postjson.normalized, " ") _, err = pool.Exec(context.Background(), "INSERT INTO posts (id, inreplyto, published, summary, content, normalized, attributedto, posthash, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9)", postjson.ID, postjson.InReplyTo, postjson.Published, postjson.Summary, postjson.Content, postjson.normalized, postjson.AttributedTo, postjson.posthash, postjson.instance)