2020-12-22 17:35:05 +00:00
package main
import (
"context"
"encoding/json"
2020-12-29 20:20:02 +00:00
"errors"
"html"
2020-12-23 08:20:03 +00:00
"io/ioutil"
2021-01-04 18:40:11 +00:00
"io"
2020-12-22 17:35:05 +00:00
"net/http"
2020-12-29 20:20:02 +00:00
"strings"
"time"
2021-01-14 20:43:20 +00:00
"regexp"
"github.com/microcosm-cc/bluemonday"
2020-12-22 17:35:05 +00:00
)
2021-01-14 20:43:20 +00:00
var p * bluemonday . Policy
var spaceReg * regexp . Regexp
var removeHTMLReg * regexp . Regexp
var re * regexp . Regexp
2021-01-25 20:28:13 -05:00
var matchurl * regexp . Regexp
2021-01-14 20:43:20 +00:00
2020-12-22 17:35:05 +00:00
type ImageType struct {
2020-12-29 20:20:02 +00:00
// Type string `json:"type"`
Url string ` json:"url" `
2020-12-22 17:35:05 +00:00
}
type PublicKeyType struct {
2020-12-29 20:20:02 +00:00
PublicKeyPem string ` json:"publicKeyPem" `
2020-12-22 17:35:05 +00:00
}
2021-02-01 12:52:42 +00:00
type ActorJson struct {
2020-12-29 20:20:02 +00:00
ID string ` json:"id" `
Type string ` json:"type" `
Inbox string ` json:"inbox" `
Outbox string ` json:"outbox" `
Followers string ` json:"followers" `
Following string ` json:"following" `
Url string ` json:"url" `
PreferredUsername string ` json:"preferredUsername" `
Name string ` json:"name" `
Summary string ` json:"summary" `
Icon ImageType ` json:"icon" `
Image ImageType ` json:"image" `
PublicKey PublicKeyType ` json:"publicKey" `
instance string
2020-12-22 17:35:05 +00:00
}
type PostJson struct {
2020-12-29 20:20:02 +00:00
ID string ` json:"id" `
InReplyTo string ` json:"inReplyTo" `
2020-12-22 17:35:05 +00:00
normalized string
receivedAt time . Time ` json:"created_at" `
2020-12-29 20:20:02 +00:00
Content string ` json:"content" `
Conversation string ` json:"conversation" `
Published time . Time ` json:"published" `
Source string ` json:"source" `
Summary string ` json:"summary" `
// Ignoring tag for now
To [ ] string ` json:"to" `
Type string ` json:"type" `
2020-12-22 17:35:05 +00:00
2020-12-29 20:20:02 +00:00
Actor string ` json:"actor" `
AttributedTo string ` json:"attributedTo" `
2020-12-22 17:35:05 +00:00
2020-12-29 20:20:02 +00:00
instance string
2020-12-22 17:35:05 +00:00
}
2021-02-01 12:52:42 +00:00
func check_activity ( uri string ) ( PostJson , error ) {
var activityjson PostJson
2021-02-01 00:28:20 +00:00
// Ignore banned
2020-12-29 15:47:49 +00:00
for _ , banned := range settings . Banned {
2020-12-29 20:20:02 +00:00
if strings . Index ( uri , "https://" + banned + "/" ) == 0 {
2021-02-01 12:52:42 +00:00
return activityjson , errors . New ( "Banned instance" )
2020-12-29 15:47:49 +00:00
}
}
2020-12-22 17:35:05 +00:00
2021-02-01 00:28:20 +00:00
// Ignore invalid URIs
2020-12-22 17:35:05 +00:00
endslash := strings . Index ( uri [ 8 : ] , "/" )
2020-12-23 08:20:03 +00:00
if endslash == - 1 {
2021-02-01 12:52:42 +00:00
return activityjson , errors . New ( "Invalid URI " + uri )
2020-12-23 08:20:03 +00:00
}
2021-02-01 12:52:42 +00:00
activityjson . instance = uri [ 8 : endslash + 8 ]
2020-12-22 17:35:05 +00:00
2021-02-01 12:52:42 +00:00
o , _ := GetRunner ( activityjson . instance )
2021-02-01 00:28:20 +00:00
// Check if there were any recent requests on this
if o . recenturis . Add ( uri ) != - 1 {
2021-02-01 12:52:42 +00:00
return activityjson , errors . New ( "Recently requested within local cache" )
2021-02-01 00:28:20 +00:00
}
2021-02-01 12:52:42 +00:00
selectRet := pool . QueryRow ( context . Background ( ) , "SELECT id, inReplyTo, published, summary, content, normalized, attributedto, received_at FROM activities WHERE id = $1" , uri )
err := selectRet . Scan ( & activityjson . ID , & activityjson . InReplyTo , & activityjson . Published , & activityjson . Summary , & activityjson . Content , & activityjson . normalized , & activityjson . AttributedTo , & activityjson . receivedAt )
2021-02-01 00:28:20 +00:00
if err == nil {
2021-02-01 12:52:42 +00:00
return activityjson , nil
2021-02-01 00:28:20 +00:00
}
2020-12-22 17:35:05 +00:00
req , _ := http . NewRequest ( "GET" , uri , nil )
2021-01-25 21:06:47 -05:00
req . Header . Set ( "User-Agent" , "Tusky" )
2020-12-22 17:35:05 +00:00
req . Header . Add ( "Accept" , "application/ld+json" )
2021-01-25 21:06:47 -05:00
resp , err := DoTries ( & o , req )
2020-12-23 08:20:03 +00:00
if err != nil {
2021-02-01 12:52:42 +00:00
return activityjson , errors . New ( "Connection error to " + uri )
2020-12-23 08:20:03 +00:00
}
2020-12-22 17:35:05 +00:00
2020-12-23 08:20:03 +00:00
body , err := ioutil . ReadAll ( resp . Body )
if err != nil {
2021-02-01 12:52:42 +00:00
return activityjson , errors . New ( "Read error on " + uri )
2020-12-23 08:20:03 +00:00
}
2021-01-02 07:00:21 +00:00
resp . Body . Close ( )
2021-02-01 12:52:42 +00:00
err = json . Unmarshal ( body , & activityjson )
2020-12-23 08:20:03 +00:00
if err != nil {
2021-02-01 12:52:42 +00:00
return activityjson , err
2020-12-23 08:20:03 +00:00
}
2020-12-22 17:35:05 +00:00
2021-02-01 12:52:42 +00:00
if activityjson . InReplyTo != "" && activityjson . InReplyTo != uri {
if activityjson . InReplyTo != uri {
go check_actor ( activityjson . InReplyTo )
2020-12-29 15:03:52 +00:00
}
2020-12-22 17:35:05 +00:00
}
2020-12-23 08:20:03 +00:00
// If AttributedTo is blank, this is likely an authentication failure
// For now, skip it...
2021-02-01 12:52:42 +00:00
if activityjson . AttributedTo == "" {
return activityjson , errors . New ( "Invalid AttributedTo value on " + uri )
2020-12-23 08:20:03 +00:00
}
2020-12-29 15:03:52 +00:00
2021-02-01 12:52:42 +00:00
_ , err = check_actor ( activityjson . AttributedTo ) // This must be done BEFORE the `INSERT INTO activities'` below
2020-12-29 15:03:52 +00:00
if err != nil {
2021-02-01 12:52:42 +00:00
return activityjson , err
2020-12-29 15:03:52 +00:00
}
2020-12-22 17:35:05 +00:00
2021-02-01 12:52:42 +00:00
activityjson . normalized = removeHTMLReg . ReplaceAllString ( activityjson . Content , " " )
activityjson . normalized = html . UnescapeString ( strings . ToLower ( p . Sanitize ( activityjson . normalized ) ) )
activityjson . normalized = matchurl . ReplaceAllString ( activityjson . normalized , "" )
activityjson . normalized = spaceReg . ReplaceAllString ( activityjson . normalized , " " )
2020-12-25 05:45:08 +00:00
2021-02-01 12:52:42 +00:00
_ , err = pool . Exec ( context . Background ( ) , "INSERT INTO activities (id, inreplyto, published, summary, content, normalized, attributedto, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8)" , activityjson . ID , activityjson . InReplyTo , activityjson . Published , activityjson . Summary , activityjson . Content , activityjson . normalized , activityjson . AttributedTo , activityjson . instance )
2020-12-22 17:35:05 +00:00
if err != nil {
2021-02-01 12:52:42 +00:00
logDebug . Print ( err )
return activityjson , err
2020-12-22 17:35:05 +00:00
}
2021-02-01 12:52:42 +00:00
for _ , to := range activityjson . To {
2020-12-23 08:20:03 +00:00
if to != "https://www.w3.org/ns/activitystreams#Public" && to != "" {
2021-01-04 18:40:11 +00:00
if strings . HasSuffix ( to , "/followers" ) == true {
// This check is very much a bad solution, may consider removing the entire for-loop
continue
}
2021-02-01 12:52:42 +00:00
go check_actor ( to )
2020-12-22 17:35:05 +00:00
}
}
2021-02-01 12:52:42 +00:00
return activityjson , nil
2020-12-22 17:35:05 +00:00
}
2021-02-01 12:52:42 +00:00
func check_actor ( uri string ) ( ActorJson , error ) {
var actorjson ActorJson
2020-12-29 15:47:49 +00:00
for _ , banned := range settings . Banned {
2020-12-29 20:20:02 +00:00
if strings . Index ( uri , "https://" + banned + "/" ) == 0 {
2021-02-01 12:52:42 +00:00
return actorjson , errors . New ( "Banned instance" )
2020-12-29 15:47:49 +00:00
}
}
2020-12-22 17:35:05 +00:00
2021-02-01 12:52:42 +00:00
selectRet := pool . QueryRow ( context . Background ( ) , "SELECT id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance FROM actors WHERE id = $1" , uri )
err := selectRet . Scan ( & actorjson . ID , & actorjson . Type , & actorjson . Inbox , & actorjson . Outbox , & actorjson . Followers , & actorjson . Following , & actorjson . Url , & actorjson . PreferredUsername , & actorjson . Name , & actorjson . Summary , & actorjson . Icon . Url , & actorjson . Image . Url , & actorjson . PublicKey . PublicKeyPem , & actorjson . instance )
2020-12-22 17:35:05 +00:00
if err == nil {
2021-02-01 12:52:42 +00:00
return actorjson , nil
2020-12-22 17:35:05 +00:00
}
endslash := strings . Index ( uri [ 8 : ] , "/" )
2020-12-29 15:03:52 +00:00
if endslash == - 1 {
2021-02-01 12:52:42 +00:00
return actorjson , errors . New ( "Invalid user: " + uri )
2020-12-29 15:03:52 +00:00
}
2021-02-01 12:52:42 +00:00
actorjson . instance = uri [ 8 : endslash + 8 ]
2020-12-22 17:35:05 +00:00
2021-02-01 12:52:42 +00:00
o , _ := GetRunner ( actorjson . instance )
2020-12-22 17:35:05 +00:00
req , _ := http . NewRequest ( "GET" , uri , nil )
2021-01-25 21:06:47 -05:00
req . Header . Set ( "User-Agent" , "Tusky" )
2020-12-22 17:35:05 +00:00
req . Header . Add ( "Accept" , "application/ld+json" )
2021-01-14 19:51:42 +00:00
var resp * http . Response
tries := 0
for {
resp , err = o . client . Do ( req )
if err != nil {
if tries > 10 {
logErr . Print ( "Unable to connect to " + uri + " attempt 10/10, giving up." )
2021-02-01 12:52:42 +00:00
return actorjson , err
2021-01-14 19:51:42 +00:00
}
logWarn . Print ( "Unable to connect to " + uri + ", attempt " , tries + 1 , "+/10 sleeping for 30 seconds." )
time . Sleep ( time . Second * 30 )
tries = tries + 1
continue
}
break
2020-12-22 17:35:05 +00:00
}
2021-02-01 12:52:42 +00:00
err = json . NewDecoder ( resp . Body ) . Decode ( & actorjson )
2020-12-22 17:35:05 +00:00
if err != nil {
2021-01-14 19:51:42 +00:00
// Going forward, this might need to be double-checked, but for now just die
tries = tries + 1
2021-02-01 12:52:42 +00:00
return actorjson , err
2020-12-22 17:35:05 +00:00
}
2021-01-04 18:40:11 +00:00
io . Copy ( ioutil . Discard , resp . Body )
2021-01-02 07:00:21 +00:00
resp . Body . Close ( )
2021-02-01 12:52:42 +00:00
_ , err = pool . Exec ( context . Background ( ) , "INSERT INTO actors (id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)" , actorjson . ID , actorjson . Type , actorjson . Inbox , actorjson . Outbox , actorjson . Followers , actorjson . Following , actorjson . Url , actorjson . PreferredUsername , actorjson . Name , actorjson . Summary , actorjson . Icon . Url , actorjson . Image . Url , actorjson . PublicKey . PublicKeyPem , actorjson . instance )
2020-12-22 17:35:05 +00:00
if err != nil {
2021-02-01 12:52:42 +00:00
return actorjson , err
2020-12-22 17:35:05 +00:00
}
2021-02-01 12:52:42 +00:00
return actorjson , nil
2020-12-22 17:35:05 +00:00
}