fedilogue/retrieve.go
2021-02-01 12:52:42 +00:00

223 lines
7.2 KiB
Go

package main
import (
"context"
"encoding/json"
"errors"
"html"
"io/ioutil"
"io"
"net/http"
"strings"
"time"
"regexp"
"github.com/microcosm-cc/bluemonday"
)
var p *bluemonday.Policy
var spaceReg *regexp.Regexp
var removeHTMLReg *regexp.Regexp
var re *regexp.Regexp
var matchurl *regexp.Regexp
type ImageType struct {
// Type string `json:"type"`
Url string `json:"url"`
}
type PublicKeyType struct {
PublicKeyPem string `json:"publicKeyPem"`
}
type ActorJson struct {
ID string `json:"id"`
Type string `json:"type"`
Inbox string `json:"inbox"`
Outbox string `json:"outbox"`
Followers string `json:"followers"`
Following string `json:"following"`
Url string `json:"url"`
PreferredUsername string `json:"preferredUsername"`
Name string `json:"name"`
Summary string `json:"summary"`
Icon ImageType `json:"icon"`
Image ImageType `json:"image"`
PublicKey PublicKeyType `json:"publicKey"`
instance string
}
type PostJson struct {
ID string `json:"id"`
InReplyTo string `json:"inReplyTo"`
normalized string
receivedAt time.Time `json:"created_at"`
Content string `json:"content"`
Conversation string `json:"conversation"`
Published time.Time `json:"published"`
Source string `json:"source"`
Summary string `json:"summary"`
// Ignoring tag for now
To []string `json:"to"`
Type string `json:"type"`
Actor string `json:"actor"`
AttributedTo string `json:"attributedTo"`
instance string
}
func check_activity(uri string) (PostJson, error) {
var activityjson PostJson
// Ignore banned
for _, banned := range settings.Banned {
if strings.Index(uri, "https://"+banned+"/") == 0 {
return activityjson, errors.New("Banned instance")
}
}
// Ignore invalid URIs
endslash := strings.Index(uri[8:], "/")
if endslash == -1 {
return activityjson, errors.New("Invalid URI " + uri)
}
activityjson.instance = uri[8 : endslash+8]
o, _ := GetRunner(activityjson.instance)
// Check if there were any recent requests on this
if o.recenturis.Add(uri) != -1 {
return activityjson, errors.New("Recently requested within local cache")
}
selectRet := pool.QueryRow(context.Background(), "SELECT id, inReplyTo, published, summary, content, normalized, attributedto, received_at FROM activities WHERE id = $1", uri)
err := selectRet.Scan(&activityjson.ID, &activityjson.InReplyTo, &activityjson.Published, &activityjson.Summary, &activityjson.Content, &activityjson.normalized, &activityjson.AttributedTo, &activityjson.receivedAt)
if err == nil {
return activityjson, nil
}
req, _ := http.NewRequest("GET", uri, nil)
req.Header.Set("User-Agent", "Tusky")
req.Header.Add("Accept", "application/ld+json")
resp, err := DoTries(&o, req)
if err != nil {
return activityjson, errors.New("Connection error to " + uri)
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return activityjson, errors.New("Read error on " + uri)
}
resp.Body.Close()
err = json.Unmarshal(body, &activityjson)
if err != nil {
return activityjson, err
}
if activityjson.InReplyTo != "" && activityjson.InReplyTo != uri {
if activityjson.InReplyTo != uri {
go check_actor(activityjson.InReplyTo)
}
}
// If AttributedTo is blank, this is likely an authentication failure
// For now, skip it...
if activityjson.AttributedTo == "" {
return activityjson, errors.New("Invalid AttributedTo value on " + uri)
}
_, err = check_actor(activityjson.AttributedTo) // This must be done BEFORE the `INSERT INTO activities'` below
if err != nil {
return activityjson, err
}
activityjson.normalized = removeHTMLReg.ReplaceAllString(activityjson.Content, " ")
activityjson.normalized = html.UnescapeString(strings.ToLower(p.Sanitize(activityjson.normalized)))
activityjson.normalized = matchurl.ReplaceAllString(activityjson.normalized, "")
activityjson.normalized = spaceReg.ReplaceAllString(activityjson.normalized, " ")
_, err = pool.Exec(context.Background(), "INSERT INTO activities (id, inreplyto, published, summary, content, normalized, attributedto, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8)", activityjson.ID, activityjson.InReplyTo, activityjson.Published, activityjson.Summary, activityjson.Content, activityjson.normalized, activityjson.AttributedTo, activityjson.instance)
if err != nil {
logDebug.Print(err)
return activityjson, err
}
for _, to := range activityjson.To {
if to != "https://www.w3.org/ns/activitystreams#Public" && to != "" {
if strings.HasSuffix(to, "/followers") == true {
// This check is very much a bad solution, may consider removing the entire for-loop
continue
}
go check_actor(to)
}
}
return activityjson, nil
}
func check_actor(uri string) (ActorJson, error) {
var actorjson ActorJson
for _, banned := range settings.Banned {
if strings.Index(uri, "https://"+banned+"/") == 0 {
return actorjson, errors.New("Banned instance")
}
}
selectRet := pool.QueryRow(context.Background(), "SELECT id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance FROM actors WHERE id = $1", uri)
err := selectRet.Scan(&actorjson.ID, &actorjson.Type, &actorjson.Inbox, &actorjson.Outbox, &actorjson.Followers, &actorjson.Following, &actorjson.Url, &actorjson.PreferredUsername, &actorjson.Name, &actorjson.Summary, &actorjson.Icon.Url, &actorjson.Image.Url, &actorjson.PublicKey.PublicKeyPem, &actorjson.instance)
if err == nil {
return actorjson, nil
}
endslash := strings.Index(uri[8:], "/")
if endslash == -1 {
return actorjson, errors.New("Invalid user: " + uri)
}
actorjson.instance = uri[8 : endslash+8]
o, _ := GetRunner(actorjson.instance)
req, _ := http.NewRequest("GET", uri, nil)
req.Header.Set("User-Agent", "Tusky")
req.Header.Add("Accept", "application/ld+json")
var resp *http.Response
tries := 0
for {
resp, err = o.client.Do(req)
if err != nil {
if tries > 10 {
logErr.Print("Unable to connect to "+uri+" attempt 10/10, giving up.")
return actorjson, err
}
logWarn.Print("Unable to connect to "+uri+", attempt ",tries+1,"+/10 sleeping for 30 seconds.")
time.Sleep(time.Second * 30)
tries = tries + 1
continue
}
break
}
err = json.NewDecoder(resp.Body).Decode(&actorjson)
if err != nil {
// Going forward, this might need to be double-checked, but for now just die
tries = tries + 1
return actorjson, err
}
io.Copy(ioutil.Discard, resp.Body)
resp.Body.Close()
_, err = pool.Exec(context.Background(), "INSERT INTO actors (id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)", actorjson.ID, actorjson.Type, actorjson.Inbox, actorjson.Outbox, actorjson.Followers, actorjson.Following, actorjson.Url, actorjson.PreferredUsername, actorjson.Name, actorjson.Summary, actorjson.Icon.Url, actorjson.Image.Url, actorjson.PublicKey.PublicKeyPem, actorjson.instance)
if err != nil {
return actorjson, err
}
return actorjson, nil
}