2020-11-10 21:53:46 -05:00
|
|
|
package main
|
|
|
|
|
|
|
|
import (
|
|
|
|
"github.com/microcosm-cc/bluemonday"
|
|
|
|
"encoding/json"
|
|
|
|
"crypto/sha1"
|
|
|
|
"io/ioutil"
|
|
|
|
"net/http"
|
|
|
|
"strings"
|
|
|
|
"html"
|
|
|
|
"time"
|
|
|
|
"fmt"
|
2020-11-24 20:36:47 -05:00
|
|
|
"log"
|
2020-11-10 21:53:46 -05:00
|
|
|
)
|
|
|
|
|
2020-11-17 22:35:59 -05:00
|
|
|
var p *bluemonday.Policy
|
|
|
|
|
2020-11-17 18:28:59 -05:00
|
|
|
func PollMastodonPleroma(endpoint string, reportPostChan chan ReportPost) {
|
2020-11-10 21:53:46 -05:00
|
|
|
newposts := make([]ReportPost, 0)
|
|
|
|
|
2020-11-17 18:28:59 -05:00
|
|
|
min_id := ""
|
2020-11-10 21:53:46 -05:00
|
|
|
|
2020-11-25 18:24:56 -05:00
|
|
|
http_client := http.Client{}
|
2020-11-28 11:42:31 -05:00
|
|
|
parsing_error := 0
|
|
|
|
unprocess_error := 0
|
2020-11-10 21:53:46 -05:00
|
|
|
|
2020-11-17 18:28:59 -05:00
|
|
|
for {
|
2020-11-18 12:53:25 -05:00
|
|
|
ri_mutex.Lock()
|
2020-11-17 19:57:39 -05:00
|
|
|
m := runninginstances[endpoint]
|
2020-11-18 12:53:25 -05:00
|
|
|
ri_mutex.Unlock()
|
2020-11-10 21:53:46 -05:00
|
|
|
|
2020-11-28 11:42:31 -05:00
|
|
|
api_timeline := "https://" + endpoint + "/api/v1/timelines/public?limit=40&since_id=" + min_id
|
2020-11-17 18:28:59 -05:00
|
|
|
resp, err := http_client.Get(api_timeline)
|
2020-11-28 11:42:31 -05:00
|
|
|
m.LastRun = time.Now().Format(time.RFC3339)
|
2020-11-17 18:28:59 -05:00
|
|
|
if err != nil {
|
|
|
|
ri_mutex.Lock()
|
|
|
|
m.Status = CLIENT_ISSUE
|
|
|
|
runninginstances[endpoint] = m
|
|
|
|
ri_mutex.Unlock()
|
2020-11-28 11:42:31 -05:00
|
|
|
log.Print("Failure here", err.Error())
|
2020-11-17 18:28:59 -05:00
|
|
|
return
|
|
|
|
}
|
2020-11-19 00:38:40 -05:00
|
|
|
|
2020-11-28 11:42:31 -05:00
|
|
|
if resp.StatusCode == TOOMANYREQUESTS { // Short Delay, 30 seconds
|
2020-11-30 19:50:21 -05:00
|
|
|
log.Print("Delaying " + endpoint + ", gave status ", resp.StatusCode, ", 1 hour delay")
|
2020-11-19 00:38:40 -05:00
|
|
|
_, _ = ioutil.ReadAll(resp.Body)
|
|
|
|
resp.Body.Close() // Release as soon as done
|
|
|
|
ri_mutex.Lock()
|
2020-11-28 11:42:31 -05:00
|
|
|
m.Status = resp.StatusCode
|
2020-11-17 19:57:39 -05:00
|
|
|
runninginstances[endpoint] = m
|
|
|
|
ri_mutex.Unlock()
|
2020-11-28 11:42:31 -05:00
|
|
|
if unprocess_error > 5 {
|
|
|
|
log.Print("Exiting for " + endpoint)
|
|
|
|
}
|
|
|
|
unprocess_error = unprocess_error + 1
|
2020-11-17 19:57:39 -05:00
|
|
|
time.Sleep(time.Second * 30)
|
|
|
|
continue
|
2020-11-30 19:50:21 -05:00
|
|
|
} else if resp.StatusCode == INTERNAL_ERROR { // Longer delay, 1 hour
|
|
|
|
log.Print("Suspending " + endpoint + ", gave status ", resp.StatusCode, ", 1 hour delay")
|
2020-11-25 18:24:56 -05:00
|
|
|
_, _ = ioutil.ReadAll(resp.Body)
|
|
|
|
resp.Body.Close() // Release as soon as done
|
|
|
|
ri_mutex.Lock()
|
2020-11-28 11:42:31 -05:00
|
|
|
m.Status = 765
|
2020-11-25 18:24:56 -05:00
|
|
|
runninginstances[endpoint] = m
|
|
|
|
ri_mutex.Unlock()
|
|
|
|
time.Sleep(time.Second * 3600)
|
|
|
|
continue
|
2020-11-28 11:42:31 -05:00
|
|
|
} else if resp.StatusCode != 200 { // Crash
|
|
|
|
log.Print("Terminating " + endpoint + ", gave status ", resp.StatusCode)
|
2020-11-25 18:24:56 -05:00
|
|
|
_, _ = ioutil.ReadAll(resp.Body)
|
|
|
|
resp.Body.Close() // Release as soon as done
|
|
|
|
ri_mutex.Lock()
|
2020-11-28 11:42:31 -05:00
|
|
|
m.Status = resp.StatusCode
|
2020-11-25 18:24:56 -05:00
|
|
|
runninginstances[endpoint] = m
|
|
|
|
ri_mutex.Unlock()
|
2020-11-28 11:42:31 -05:00
|
|
|
return
|
2020-11-17 19:57:39 -05:00
|
|
|
}
|
2020-11-25 18:24:56 -05:00
|
|
|
|
2020-11-19 00:38:40 -05:00
|
|
|
err = json.NewDecoder(resp.Body).Decode(&newposts)
|
2020-11-17 18:28:59 -05:00
|
|
|
if err != nil {
|
2020-11-28 11:42:31 -05:00
|
|
|
if parsing_error > 5 {
|
|
|
|
ri_mutex.Lock()
|
|
|
|
m.Status = BAD_RESPONSE
|
|
|
|
runninginstances[endpoint] = m
|
|
|
|
ri_mutex.Unlock()
|
|
|
|
log.Print("Giving up on " + endpoint)
|
|
|
|
return
|
|
|
|
}
|
|
|
|
parsing_error = parsing_error + 1
|
|
|
|
time.Sleep(time.Second * 30)
|
2020-11-10 21:53:46 -05:00
|
|
|
}
|
2020-11-17 19:57:39 -05:00
|
|
|
resp.Body.Close() // Release as soon as done
|
2020-11-10 21:53:46 -05:00
|
|
|
|
2020-11-17 19:57:39 -05:00
|
|
|
ri_mutex.Lock()
|
|
|
|
m.Status = RUNNING
|
|
|
|
runninginstances[endpoint] = m
|
|
|
|
ri_mutex.Unlock()
|
2020-11-10 21:53:46 -05:00
|
|
|
|
2020-11-17 18:28:59 -05:00
|
|
|
for _, newpost := range newposts {
|
|
|
|
if newpost.Account.Acct == "" {
|
|
|
|
continue
|
|
|
|
}
|
|
|
|
posthash := sha1.New()
|
2020-11-10 21:53:46 -05:00
|
|
|
|
2020-11-17 18:28:59 -05:00
|
|
|
at_sign := strings.Index(newpost.Account.Acct, "@")
|
|
|
|
|
|
|
|
if at_sign == -1 {
|
|
|
|
at_sign = len(newpost.Account.Acct)
|
|
|
|
newpost.Account.Acct += "@" + endpoint
|
|
|
|
}
|
|
|
|
|
|
|
|
// Calculate the post hash
|
|
|
|
fmt.Fprint(posthash, newpost.Url)
|
|
|
|
fmt.Fprint(posthash, newpost.normalized)
|
|
|
|
fmt.Fprint(posthash, newpost.Account.Acct)
|
|
|
|
fmt.Fprint(posthash, newpost.Account.Display_name)
|
|
|
|
newpost.posthash = posthash.Sum(nil)
|
|
|
|
|
|
|
|
newpost.normalized = html.UnescapeString(strings.ToLower(p.Sanitize(newpost.Content)))
|
|
|
|
|
2020-11-24 00:00:04 -05:00
|
|
|
// Validate time
|
2020-11-25 18:24:56 -05:00
|
|
|
t, err := time.Parse(time.RFC3339, newpost.Created_at)
|
2020-11-24 00:00:04 -05:00
|
|
|
if err != nil {
|
2020-11-25 18:24:56 -05:00
|
|
|
log.Print("Time was: " + newpost.Created_at)
|
|
|
|
newpost.Created_at = time.Now().Format(time.RFC3339)
|
|
|
|
log.Print("Set to : " + newpost.Created_at)
|
|
|
|
}
|
|
|
|
if t.Unix() < 0 {
|
|
|
|
log.Print("Time was: " + newpost.Created_at)
|
|
|
|
newpost.Created_at = time.Now().Format(time.RFC3339)
|
|
|
|
log.Print("Set to : " + newpost.Created_at)
|
|
|
|
}
|
|
|
|
|
2020-11-28 11:42:31 -05:00
|
|
|
t, err = time.Parse(time.RFC3339, newpost.Account.Created_at)
|
2020-11-25 18:24:56 -05:00
|
|
|
if err != nil {
|
|
|
|
log.Print("Time was: " + newpost.Account.Created_at)
|
|
|
|
newpost.Account.Created_at = time.Now().Format(time.RFC3339)
|
|
|
|
log.Print("Set to : " + newpost.Account.Created_at)
|
|
|
|
}
|
|
|
|
if t.Unix() < 0 {
|
|
|
|
log.Print("Time was: " + newpost.Account.Created_at)
|
|
|
|
newpost.Account.Created_at = time.Now().Format(time.RFC3339)
|
|
|
|
log.Print("Set to : " + newpost.Account.Created_at)
|
2020-11-24 00:00:04 -05:00
|
|
|
}
|
|
|
|
|
2020-11-17 18:28:59 -05:00
|
|
|
reportPostChan <- newpost
|
|
|
|
|
|
|
|
// Check min_id
|
|
|
|
if newpost.Id > min_id {
|
|
|
|
min_id = newpost.Id
|
|
|
|
}
|
|
|
|
|
2020-12-03 17:23:52 -05:00
|
|
|
// Only done if we are crawling
|
|
|
|
if settings.Crawl == true {
|
|
|
|
newinstance := newpost.Account.Acct[at_sign+1:]
|
|
|
|
ri_mutex.Lock()
|
|
|
|
_, exists := runninginstances[newinstance]
|
|
|
|
if exists == false {
|
|
|
|
m := RunningInstance{}
|
|
|
|
runninginstances[newinstance] = m
|
|
|
|
go StartInstance(newinstance, reportPostChan)
|
|
|
|
}
|
|
|
|
|
|
|
|
ri_mutex.Unlock()
|
|
|
|
}
|
2020-11-10 21:53:46 -05:00
|
|
|
}
|
2020-11-17 18:28:59 -05:00
|
|
|
time.Sleep(time.Second * 10)
|
2020-11-10 21:53:46 -05:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Change this to return a proper "err"
|
2020-11-17 18:28:59 -05:00
|
|
|
func GetNodeInfo(endpoint string) (NodeInfo) {
|
2020-11-25 18:24:56 -05:00
|
|
|
/* Checking order
|
|
|
|
* Mastodon/Pleroma
|
|
|
|
* Um..nothing else yet
|
|
|
|
*/
|
|
|
|
pleromastodon_nodeinfo_url := "https://" + endpoint + "/nodeinfo/2.0.json"
|
|
|
|
//http_client := http.Client{Timeout: 10 * time.Second}
|
|
|
|
http_client := http.Client{}
|
|
|
|
pleromastodon_api_resp, err := http_client.Get(pleromastodon_nodeinfo_url)
|
2020-11-10 21:53:46 -05:00
|
|
|
if err != nil {
|
2020-11-17 18:28:59 -05:00
|
|
|
return NodeInfo{}
|
2020-11-25 18:24:56 -05:00
|
|
|
} else {
|
|
|
|
defer pleromastodon_api_resp.Body.Close()
|
2020-11-10 21:53:46 -05:00
|
|
|
}
|
2020-11-19 00:38:40 -05:00
|
|
|
|
2020-11-25 18:24:56 -05:00
|
|
|
if pleromastodon_api_resp.StatusCode == 200 {
|
|
|
|
var nodeinfo NodeInfo
|
|
|
|
err = json.NewDecoder(pleromastodon_api_resp.Body).Decode(&nodeinfo)
|
2020-11-28 11:42:31 -05:00
|
|
|
if err == nil {
|
|
|
|
defer pleromastodon_api_resp.Body.Close()
|
|
|
|
return nodeinfo
|
2020-11-19 00:38:40 -05:00
|
|
|
}
|
|
|
|
}
|
2020-11-25 18:24:56 -05:00
|
|
|
|
|
|
|
// Check the front page
|
|
|
|
index_url := "https://" + endpoint + "/"
|
|
|
|
resp_index, err := http_client.Get(index_url)
|
|
|
|
if err != nil {
|
2020-11-28 11:42:31 -05:00
|
|
|
log.Print("Unable to connect to " + endpoint + ", giving up")
|
|
|
|
return NodeInfo{}
|
2020-11-25 18:24:56 -05:00
|
|
|
}
|
|
|
|
defer resp_index.Body.Close()
|
|
|
|
indexbin, err := ioutil.ReadAll(resp_index.Body)
|
|
|
|
if err != nil {
|
2020-11-28 11:42:31 -05:00
|
|
|
log.Print("Unable to read index of " + endpoint + ", giving up")
|
|
|
|
return NodeInfo{}
|
2020-11-25 18:24:56 -05:00
|
|
|
}
|
|
|
|
indexstr := string(indexbin)
|
|
|
|
nodeinfo := NodeInfo{}
|
|
|
|
if strings.Contains(indexstr, "Pleroma") || strings.Contains(indexstr, "Soapbox") {
|
|
|
|
log.Print("Manual view: Pleroma" + endpoint)
|
|
|
|
nodeinfo.Software.Name = "pleroma"
|
|
|
|
nodeinfo.Software.Version = "guess"
|
|
|
|
} else if strings.Contains(indexstr, "Mastodon") {
|
|
|
|
log.Print("Manual view: Mastodon" + endpoint)
|
|
|
|
nodeinfo.Software.Name = "mastodon"
|
|
|
|
nodeinfo.Software.Version = "guess"
|
|
|
|
} else if strings.Contains(indexstr, "Gab") {
|
|
|
|
log.Print("Manual view: Gab" + endpoint)
|
|
|
|
nodeinfo.Software.Name = "gab"
|
|
|
|
nodeinfo.Software.Version = "guess"
|
2020-11-19 00:38:40 -05:00
|
|
|
}
|
2020-11-10 21:53:46 -05:00
|
|
|
|
2020-11-17 18:28:59 -05:00
|
|
|
return nodeinfo
|
|
|
|
}
|
|
|
|
|
|
|
|
func StartInstance(endpoint string, reportPostChan chan ReportPost) {
|
|
|
|
nodeinfo := GetNodeInfo(endpoint)
|
|
|
|
if nodeinfo.Software.Name == "" {
|
|
|
|
var m = runninginstances[endpoint]
|
|
|
|
m.Software = ""
|
2020-11-25 18:24:56 -05:00
|
|
|
m.LastRun = time.Now().Format(time.RFC3339)
|
2020-11-17 19:57:39 -05:00
|
|
|
m.Status = UNSUPPORTED_INSTANCE
|
|
|
|
ri_mutex.Lock()
|
2020-11-17 18:28:59 -05:00
|
|
|
runninginstances[endpoint] = m
|
|
|
|
ri_mutex.Unlock()
|
2020-11-10 21:53:46 -05:00
|
|
|
return
|
|
|
|
}
|
2020-11-17 18:28:59 -05:00
|
|
|
|
|
|
|
if nodeinfo.Software.Name == "pleroma" || nodeinfo.Software.Name == "mastodon" {
|
2020-11-24 20:36:47 -05:00
|
|
|
log.Print("Starting " + endpoint + " as Mastodon/Pleroma instance")
|
2020-11-17 18:28:59 -05:00
|
|
|
go PollMastodonPleroma(endpoint, reportPostChan)
|
|
|
|
}
|
|
|
|
|
2020-11-10 21:53:46 -05:00
|
|
|
}
|