fedilogue/poll.go

341 lines
8.7 KiB
Go
Raw Normal View History

package main
import (
"crypto/sha1"
2020-12-17 04:23:25 +00:00
"encoding/json"
"fmt"
"html"
"io/ioutil"
2020-12-17 04:23:25 +00:00
"log"
"net/http"
"strings"
"time"
)
type ImageData struct {
2020-12-17 04:23:25 +00:00
Type string `"json:type"`
Url string `"json:url"`
}
type PublicKeyData struct {
2020-12-17 04:23:25 +00:00
Id string `"json:id"`
Owner string `"json:owner"`
PublicKeyPem string `"json:publicKeyPem"`
}
type UserInfo struct {
2020-12-17 04:23:25 +00:00
Id string `"json:id"`
Type string `"json:type"`
Following string `"json:following"`
Followers string `"json:followers"`
Inbox string `"json:inbox"`
Outbox string `"json:outbox"`
Featured string `"json:featured"`
PreferredUsername string `"json:preferredUsername"`
PublicKey PublicKeyData `"json:publicKeyPem"`
Name string `"json:name"`
Summary string `"json:summary"`
Url string `"json:Url"`
Icon ImageData `"json:icon"`
Image ImageData `"json:image"`
2020-12-17 04:23:25 +00:00
// ManuallyApprovesFollowers string `"json:manuallyApprovesFollowers"`
// Discoverable bool `"json:discoverable"`
}
type PostInfo struct {
2020-12-17 04:23:25 +00:00
Id string `"json:id"`
Type string `"json:type"`
Published string `"json:published"`
Url string `"json:Url"`
Content string `"json:content"`
}
func fetch_user_info(http_client http.Client, uri string) (UserInfo, error) {
var userinfo UserInfo
2020-12-18 06:06:32 +00:00
accounttype, err := check_user(uri)
if err == nil {
userinfo.Id = uri
userinfo.Type = uri
if accounttype.Bot {
userinfo.Type = "Bot"
} else {
userinfo.Type = "Person"
}
userinfo.PreferredUsername = accounttype.Display_name
// userInfo.Url = Icon =
// userInfo.iconData = ImageData
userinfo.Icon.Type = "Image"
userinfo.Icon.Url = accounttype.Avatar
log.Print("This exit path!!!")
return userinfo, nil
}
req, err := http.NewRequest(http.MethodGet, uri, nil)
if err != nil {
return UserInfo{}, err
}
req.Header.Set("Accept", "application/ld+json")
resp, err := http_client.Do(req)
if err != nil {
return UserInfo{}, err
}
defer resp.Body.Close()
err = json.NewDecoder(resp.Body).Decode(&userinfo)
if err != nil {
return UserInfo{}, err
}
return userinfo, nil
}
func fetch_post(http_client http.Client, uri string) (PostInfo, error) {
var postinfo PostInfo
req, err := http.NewRequest(http.MethodGet, uri, nil)
if err != nil {
return PostInfo{}, err
}
req.Header.Set("Accept", "application/ld+json")
resp, err := http_client.Do(req)
if err != nil {
return PostInfo{}, err
}
defer resp.Body.Close()
err = json.NewDecoder(resp.Body).Decode(&postinfo)
if err != nil {
return PostInfo{}, err
}
return postinfo, nil
}
2020-12-18 06:06:32 +00:00
func PollMastodonPleroma(endpoint string, reportPostChan chan ReportPost, http_client http.Client) {
newposts := make([]ReportPost, 0)
min_id := ""
parsing_error := 0
unprocess_error := 0
use_auth := false
var last_refresh int64
var client_id string
var client_secret string
var oauthData OAuth
var err error
for _, extaccount := range settings.Externalaccounts {
if extaccount.Endpoint == endpoint {
use_auth = true
2020-12-17 04:23:25 +00:00
client_id, client_secret, err = get_client(endpoint, &http_client)
if err != nil {
log.Fatal("Unable to register client: ", err)
}
oauthData, err = oauth_login(endpoint, extaccount.Username, extaccount.Password, client_id, client_secret)
if err != nil {
log.Print("Unable to login: ", err)
return
}
last_refresh = time.Now().Unix()
}
}
for {
ri_mutex.Lock()
m := runninginstances[endpoint]
ri_mutex.Unlock()
api_timeline := "https://" + endpoint + "/api/v1/timelines/public?limit=40&since_id=" + min_id
req, err := http.NewRequest("GET", api_timeline, nil)
if err != nil {
log.Print("Unable to create new request")
return
}
if use_auth == true {
2020-12-17 04:23:25 +00:00
if time.Now().Unix() > last_refresh+oauthData.Expires_in {
oauthData, err = oauth_refresh(endpoint, client_id, client_secret, oauthData.Refresh_token)
if err != nil {
log.Print("Unable to refresh: ", err)
return
}
last_refresh = time.Now().Unix()
}
req.Header.Add("Authorization", oauthData.Access_token)
}
m.LastRun = time.Now().Format(time.RFC3339)
resp, err := http_client.Do(req)
if err != nil {
m.Status = CLIENT_ISSUE
ri_mutex.Lock()
runninginstances[endpoint] = m
ri_mutex.Unlock()
log.Fatal("Failure here", err.Error())
return
}
if resp.StatusCode == TOOMANYREQUESTS { // Short Delay, 30 seconds
2020-12-17 04:23:25 +00:00
log.Print("Delaying "+endpoint+", gave status ", resp.StatusCode, ", 1 hour delay")
_, _ = ioutil.ReadAll(resp.Body)
resp.Body.Close() // Release as soon as done
m.Status = resp.StatusCode
ri_mutex.Lock()
runninginstances[endpoint] = m
ri_mutex.Unlock()
if unprocess_error > 5 {
log.Print("Exiting for " + endpoint)
}
unprocess_error = unprocess_error + 1
time.Sleep(time.Second * 30)
continue
} else if resp.StatusCode == INTERNAL_ERROR { // Longer delay, 1 hour
2020-12-17 04:23:25 +00:00
log.Print("Suspending "+endpoint+", gave status ", resp.StatusCode, ", 1 hour delay")
_, _ = ioutil.ReadAll(resp.Body)
resp.Body.Close() // Release as soon as done
m.Status = 765
ri_mutex.Lock()
runninginstances[endpoint] = m
ri_mutex.Unlock()
time.Sleep(time.Second * 3600)
continue
} else if resp.StatusCode != 200 { // Crash
2020-12-17 04:23:25 +00:00
log.Print("Terminating "+endpoint+", gave status ", resp.StatusCode)
_, _ = ioutil.ReadAll(resp.Body)
resp.Body.Close() // Release as soon as done
m.Status = resp.StatusCode
ri_mutex.Lock()
runninginstances[endpoint] = m
ri_mutex.Unlock()
return
}
err = json.NewDecoder(resp.Body).Decode(&newposts)
if err != nil {
if parsing_error > 5 {
m.Status = BAD_RESPONSE
ri_mutex.Lock()
runninginstances[endpoint] = m
ri_mutex.Unlock()
log.Print("Giving up on " + endpoint)
return
}
parsing_error = parsing_error + 1
time.Sleep(time.Second * 30)
}
resp.Body.Close() // Release as soon as done
m.Status = RUNNING
ri_mutex.Lock()
runninginstances[endpoint] = m
ri_mutex.Unlock()
for _, newpost := range newposts {
if newpost.Account.Acct == "" {
continue
}
at_sign := strings.Index(newpost.Account.Acct, "@")
newinstance := newpost.Account.Acct[at_sign+1:]
// Trust the post if it comes from the same source
if newinstance != endpoint {
ri_mutex.Lock()
o, exist := runninginstances[newinstance]
ri_mutex.Unlock()
if exist == false {
o := RunningInstance{}
new_client := http.Client{}
o.client = new_client
o.Status = KEEPALIVE
ri_mutex.Lock()
runninginstances[newinstance] = o
ri_mutex.Unlock()
}
realuser, err := fetch_user_info(o.client, newpost.Account.Url)
if err != nil {
continue
}
realpost, err := fetch_post(o.client, newpost.Uri)
if err != nil {
continue
}
// Minor verification for now...
newpost.Account.Display_name = realuser.Name
newpost.Content = realpost.Content
newpost.Created_at = realpost.Published
}
posthash := sha1.New()
if at_sign == -1 {
at_sign = len(newpost.Account.Acct)
newpost.Account.Acct += "@" + endpoint
}
// Calculate the post hash
fmt.Fprint(posthash, newpost.Uri)
fmt.Fprint(posthash, newpost.normalized)
fmt.Fprint(posthash, newpost.Account.Acct)
fmt.Fprint(posthash, newpost.Account.Display_name)
newpost.posthash = posthash.Sum(nil)
newpost.normalized = html.UnescapeString(strings.ToLower(p.Sanitize(newpost.Content)))
newpost.normalized = strings.ReplaceAll(newpost.normalized, "\t", " ")
newpost.normalized = spaceReg.ReplaceAllString(newpost.normalized, " ")
// Validate time
t, err := time.Parse(time.RFC3339, newpost.Created_at)
if err != nil {
newpost.Created_at = time.Now().Format(time.RFC3339)
}
if t.Unix() < 0 {
newpost.Created_at = time.Now().Format(time.RFC3339)
}
t, err = time.Parse(time.RFC3339, newpost.Account.Created_at)
if err != nil {
newpost.Account.Created_at = time.Now().Format(time.RFC3339)
}
if t.Unix() < 0 {
newpost.Account.Created_at = time.Now().Format(time.RFC3339)
}
reportPostChan <- newpost
// Check min_id
if newpost.Id > min_id {
min_id = newpost.Id
}
// Only done if we are crawling
2020-12-17 04:23:25 +00:00
if settings.Crawl == true && stringexists(endpoint, settings.Banned) == false {
// Skip over this if its the same as the endpoint
if newinstance == endpoint {
continue
}
ri_mutex.Lock()
o, exists := runninginstances[newinstance]
2020-12-17 04:23:25 +00:00
if exists == false || o.Status == KEEPALIVE {
m := RunningInstance{}
runninginstances[newinstance] = m
2020-12-18 06:06:32 +00:00
go StartInstance(newinstance, reportPostChan)
}
ri_mutex.Unlock()
}
}
time.Sleep(time.Second * 10)
}
}