removing urls from normalization
This commit is contained in:
parent
1203d4f164
commit
1bebd9064c
@ -34,6 +34,7 @@ func main() {
|
||||
spaceReg = regexp.MustCompile(`[\s\t\.]+`)
|
||||
removeHTMLReg = regexp.MustCompile(`<\/?\s*br\s*>`)
|
||||
re = regexp.MustCompile("^https?://([^/]*)/(.*)$")
|
||||
matchurl = regexp.MustCompile("http?s://[\\w\\-]+\\.[\\w\\-]+\\S*")
|
||||
|
||||
for _, endpoint := range settings.Autostart {
|
||||
logInfo.Print("Autostarting " + endpoint)
|
||||
|
@ -18,6 +18,7 @@ var p *bluemonday.Policy
|
||||
var spaceReg *regexp.Regexp
|
||||
var removeHTMLReg *regexp.Regexp
|
||||
var re *regexp.Regexp
|
||||
var matchurl *regexp.Regexp
|
||||
|
||||
type ImageType struct {
|
||||
// Type string `json:"type"`
|
||||
@ -146,6 +147,7 @@ func check_post(uri string) (PostJson, error) {
|
||||
|
||||
postjson.normalized = removeHTMLReg.ReplaceAllString(postjson.Content, " ")
|
||||
postjson.normalized = html.UnescapeString(strings.ToLower(p.Sanitize(postjson.normalized)))
|
||||
postjson.normalized = matchurl.ReplaceAllString(postjson.normalized, "")
|
||||
postjson.normalized = spaceReg.ReplaceAllString(postjson.normalized, " ")
|
||||
|
||||
_, err = pool.Exec(context.Background(), "INSERT INTO posts (id, inreplyto, published, summary, content, normalized, attributedto, posthash, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9)", postjson.ID, postjson.InReplyTo, postjson.Published, postjson.Summary, postjson.Content, postjson.normalized, postjson.AttributedTo, postjson.posthash, postjson.instance)
|
||||
|
Loading…
x
Reference in New Issue
Block a user