fedilogue/retrieve.go

228 lines
7.0 KiB
Go
Raw Normal View History

package main
import (
"context"
"encoding/json"
2020-12-29 20:20:02 +00:00
"errors"
"html"
2020-12-23 08:20:03 +00:00
"io/ioutil"
"io"
"net/http"
2020-12-29 20:20:02 +00:00
"strings"
"time"
2021-01-14 20:43:20 +00:00
"regexp"
"github.com/microcosm-cc/bluemonday"
)
2021-01-14 20:43:20 +00:00
var p *bluemonday.Policy
var spaceReg *regexp.Regexp
var removeHTMLReg *regexp.Regexp
var re *regexp.Regexp
type ImageType struct {
2020-12-29 20:20:02 +00:00
// Type string `json:"type"`
Url string `json:"url"`
}
type PublicKeyType struct {
2020-12-29 20:20:02 +00:00
PublicKeyPem string `json:"publicKeyPem"`
}
type UserJson struct {
2020-12-29 20:20:02 +00:00
ID string `json:"id"`
Type string `json:"type"`
Inbox string `json:"inbox"`
Outbox string `json:"outbox"`
Followers string `json:"followers"`
Following string `json:"following"`
Url string `json:"url"`
PreferredUsername string `json:"preferredUsername"`
Name string `json:"name"`
Summary string `json:"summary"`
Icon ImageType `json:"icon"`
Image ImageType `json:"image"`
PublicKey PublicKeyType `json:"publicKey"`
instance string
}
type PostJson struct {
2020-12-29 20:20:02 +00:00
ID string `json:"id"`
InReplyTo string `json:"inReplyTo"`
normalized string
posthash []byte
receivedAt time.Time `json:"created_at"`
2020-12-29 20:20:02 +00:00
Content string `json:"content"`
Conversation string `json:"conversation"`
Published time.Time `json:"published"`
Source string `json:"source"`
Summary string `json:"summary"`
// Ignoring tag for now
To []string `json:"to"`
Type string `json:"type"`
2020-12-29 20:20:02 +00:00
Actor string `json:"actor"`
AttributedTo string `json:"attributedTo"`
2020-12-29 20:20:02 +00:00
instance string
}
2020-12-29 20:20:02 +00:00
func GetHTTPSession(endpoint string) RunningInstance {
2020-12-22 19:46:34 +00:00
ri_mutex.Lock()
o, exist := runninginstances[endpoint]
ri_mutex.Unlock()
if exist == false {
o := RunningInstance{}
new_client := http.Client{}
o.client = new_client
o.Status = KEEPALIVE
ri_mutex.Lock()
runninginstances[endpoint] = o
ri_mutex.Unlock()
}
return o
}
func check_post(uri string) (PostJson, error) {
var postjson PostJson
2020-12-29 15:47:49 +00:00
for _, banned := range settings.Banned {
2020-12-29 20:20:02 +00:00
if strings.Index(uri, "https://"+banned+"/") == 0 {
2020-12-29 15:47:49 +00:00
return postjson, errors.New("Banned instance")
}
}
selectRet := pool.QueryRow(context.Background(), "SELECT id, inReplyTo, published, summary, content, normalized, attributedto, posthash, received_at FROM posts WHERE id = $1", uri)
err := selectRet.Scan(&postjson.ID, &postjson.InReplyTo, &postjson.Published, &postjson.Summary, &postjson.Content, &postjson.normalized, &postjson.AttributedTo, &postjson.posthash, &postjson.receivedAt)
if err == nil {
return postjson, nil
}
endslash := strings.Index(uri[8:], "/")
2020-12-23 08:20:03 +00:00
if endslash == -1 {
2020-12-29 15:47:49 +00:00
return postjson, errors.New("Invalid URI " + uri)
2020-12-23 08:20:03 +00:00
}
2020-12-29 20:20:02 +00:00
postjson.instance = uri[8 : endslash+8]
2020-12-22 19:46:34 +00:00
o := GetHTTPSession(postjson.instance)
req, _ := http.NewRequest("GET", uri, nil)
req.Header.Add("Accept", "application/ld+json")
2020-12-22 19:46:34 +00:00
resp, err := o.client.Do(req)
2020-12-23 08:20:03 +00:00
if err != nil {
2020-12-29 15:47:49 +00:00
return postjson, errors.New("Connection error to " + uri)
2020-12-23 08:20:03 +00:00
}
2020-12-23 08:20:03 +00:00
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
2020-12-29 15:47:49 +00:00
return postjson, errors.New("Read error on " + uri)
2020-12-23 08:20:03 +00:00
}
resp.Body.Close()
2020-12-23 08:20:03 +00:00
err = json.Unmarshal(body, &postjson)
if err != nil {
2020-12-29 15:47:49 +00:00
return postjson, err
2020-12-23 08:20:03 +00:00
}
if postjson.InReplyTo != "" && postjson.InReplyTo != uri {
if postjson.InReplyTo != uri {
go check_post(postjson.InReplyTo)
}
}
2020-12-23 08:20:03 +00:00
// If AttributedTo is blank, this is likely an authentication failure
// For now, skip it...
if postjson.AttributedTo == "" {
2020-12-29 15:47:49 +00:00
return postjson, errors.New("Invalid AttributedTo value on " + uri)
2020-12-23 08:20:03 +00:00
}
_, err = check_user(postjson.AttributedTo) // This must be done BEFORE the `INSERT INTO posts` below
if err != nil {
return postjson, err
}
2021-01-14 20:43:20 +00:00
postjson.normalized = removeHTMLReg.ReplaceAllString(postjson.Content, " ")
postjson.normalized = html.UnescapeString(strings.ToLower(p.Sanitize(postjson.normalized)))
postjson.normalized = spaceReg.ReplaceAllString(postjson.normalized, " ")
2020-12-25 05:45:08 +00:00
_, err = pool.Exec(context.Background(), "INSERT INTO posts (id, inreplyto, published, summary, content, normalized, attributedto, posthash, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9)", postjson.ID, postjson.InReplyTo, postjson.Published, postjson.Summary, postjson.Content, postjson.normalized, postjson.AttributedTo, postjson.posthash, postjson.instance)
if err != nil {
return postjson, err
}
2020-12-29 20:20:02 +00:00
for _, to := range postjson.To {
2020-12-23 08:20:03 +00:00
if to != "https://www.w3.org/ns/activitystreams#Public" && to != "" {
if strings.HasSuffix(to, "/followers") == true {
// This check is very much a bad solution, may consider removing the entire for-loop
continue
}
go check_user(to)
}
}
return postjson, nil
}
func check_user(uri string) (UserJson, error) {
var userjson UserJson
2020-12-29 15:47:49 +00:00
for _, banned := range settings.Banned {
2020-12-29 20:20:02 +00:00
if strings.Index(uri, "https://"+banned+"/") == 0 {
2020-12-29 15:47:49 +00:00
return userjson, errors.New("Banned instance")
}
}
selectRet := pool.QueryRow(context.Background(), "SELECT id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance FROM accounts WHERE id = $1", uri)
err := selectRet.Scan(&userjson.ID, &userjson.Type, &userjson.Inbox, &userjson.Outbox, &userjson.Followers, &userjson.Following, &userjson.Url, &userjson.PreferredUsername, &userjson.Name, &userjson.Summary, &userjson.Icon.Url, &userjson.Image.Url, &userjson.PublicKey.PublicKeyPem, &userjson.instance)
if err == nil {
2020-12-29 17:30:26 +00:00
return userjson, nil
}
endslash := strings.Index(uri[8:], "/")
if endslash == -1 {
2020-12-29 15:47:49 +00:00
return userjson, errors.New("Invalid user: " + uri)
}
2020-12-29 20:20:02 +00:00
userjson.instance = uri[8 : endslash+8]
2020-12-22 19:46:34 +00:00
o := GetHTTPSession(userjson.instance)
req, _ := http.NewRequest("GET", uri, nil)
req.Header.Add("Accept", "application/ld+json")
var resp *http.Response
tries := 0
for {
resp, err = o.client.Do(req)
if err != nil {
if tries > 10 {
logErr.Print("Unable to connect to "+uri+" attempt 10/10, giving up.")
return userjson, err
}
logWarn.Print("Unable to connect to "+uri+", attempt ",tries+1,"+/10 sleeping for 30 seconds.")
time.Sleep(time.Second * 30)
tries = tries + 1
continue
}
break
}
err = json.NewDecoder(resp.Body).Decode(&userjson)
if err != nil {
// Going forward, this might need to be double-checked, but for now just die
// log.Fatal("Retrieval error 2: ", err)
tries = tries + 1
return userjson, err
}
io.Copy(ioutil.Discard, resp.Body)
resp.Body.Close()
_, err = pool.Exec(context.Background(), "INSERT INTO accounts (id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)", userjson.ID, userjson.Type, userjson.Inbox, userjson.Outbox, userjson.Followers, userjson.Following, userjson.Url, userjson.PreferredUsername, userjson.Name, userjson.Summary, userjson.Icon.Url, userjson.Image.Url, userjson.PublicKey.PublicKeyPem, userjson.instance)
if err != nil {
return userjson, err
}
return userjson, nil
}