removing urls from normalization

This commit is contained in:
farhan 2021-01-25 20:28:13 -05:00
parent 1203d4f164
commit 1bebd9064c
2 changed files with 3 additions and 0 deletions

View File

@ -34,6 +34,7 @@ func main() {
spaceReg = regexp.MustCompile(`[\s\t\.]+`) spaceReg = regexp.MustCompile(`[\s\t\.]+`)
removeHTMLReg = regexp.MustCompile(`<\/?\s*br\s*>`) removeHTMLReg = regexp.MustCompile(`<\/?\s*br\s*>`)
re = regexp.MustCompile("^https?://([^/]*)/(.*)$") re = regexp.MustCompile("^https?://([^/]*)/(.*)$")
matchurl = regexp.MustCompile("http?s://[\\w\\-]+\\.[\\w\\-]+\\S*")
for _, endpoint := range settings.Autostart { for _, endpoint := range settings.Autostart {
logInfo.Print("Autostarting " + endpoint) logInfo.Print("Autostarting " + endpoint)

View File

@ -18,6 +18,7 @@ var p *bluemonday.Policy
var spaceReg *regexp.Regexp var spaceReg *regexp.Regexp
var removeHTMLReg *regexp.Regexp var removeHTMLReg *regexp.Regexp
var re *regexp.Regexp var re *regexp.Regexp
var matchurl *regexp.Regexp
type ImageType struct { type ImageType struct {
// Type string `json:"type"` // Type string `json:"type"`
@ -146,6 +147,7 @@ func check_post(uri string) (PostJson, error) {
postjson.normalized = removeHTMLReg.ReplaceAllString(postjson.Content, " ") postjson.normalized = removeHTMLReg.ReplaceAllString(postjson.Content, " ")
postjson.normalized = html.UnescapeString(strings.ToLower(p.Sanitize(postjson.normalized))) postjson.normalized = html.UnescapeString(strings.ToLower(p.Sanitize(postjson.normalized)))
postjson.normalized = matchurl.ReplaceAllString(postjson.normalized, "")
postjson.normalized = spaceReg.ReplaceAllString(postjson.normalized, " ") postjson.normalized = spaceReg.ReplaceAllString(postjson.normalized, " ")
_, err = pool.Exec(context.Background(), "INSERT INTO posts (id, inreplyto, published, summary, content, normalized, attributedto, posthash, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9)", postjson.ID, postjson.InReplyTo, postjson.Published, postjson.Summary, postjson.Content, postjson.normalized, postjson.AttributedTo, postjson.posthash, postjson.instance) _, err = pool.Exec(context.Background(), "INSERT INTO posts (id, inreplyto, published, summary, content, normalized, attributedto, posthash, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9)", postjson.ID, postjson.InReplyTo, postjson.Published, postjson.Summary, postjson.Content, postjson.normalized, postjson.AttributedTo, postjson.posthash, postjson.instance)