package main import ( "context" "encoding/json" "html" "io/ioutil" "net/http" "regexp" "strings" "time" "github.com/microcosm-cc/bluemonday" ) var p *bluemonday.Policy var spaceReg *regexp.Regexp var removeHTMLReg *regexp.Regexp var re *regexp.Regexp var matchurl *regexp.Regexp type ImageType struct { Url string `json:"url"` } type PublicKeyType struct { PublicKeyPem string `json:"publicKeyPem"` } type ActorJson struct { id int Uri string `json:"id"` Type string `json:"type"` Inbox string `json:"inbox"` Outbox string `json:"outbox"` Followers string `json:"followers"` Following string `json:"following"` Url string `json:"url"` PreferredUsername string `json:"preferredUsername"` Name string `json:"name"` Summary string `json:"summary"` Icon ImageType `json:"icon"` Image ImageType `json:"image"` PublicKey PublicKeyType `json:"publicKey"` bot bool instance string } type TagType struct { Type string `json:"type"` Name string `json:"name"` } type PostJson struct { id int Uri string `json:"id"` InReplyTo string `json:"inReplyTo"` normalized string receivedAt time.Time `json:"created_at"` Content string `json:"content"` Conversation string `json:"conversation"` Published time.Time `json:"published"` Summary string `json:"summary"` Tag []TagType `json:"tag"` To []string `json:"to"` Type string `json:"type"` Actor string `json:"actor"` AttributedTo string `json:"attributedTo"` bot bool instance string } func check_activity(uri string) { logDebug("Retrieving: " + uri) var activityjson PostJson // Ignore invalid URIs endslash := strings.Index(uri[8:], "/") if endslash == -1 { return } activityjson.instance = uri[8 : endslash+8] o, _ := GetRunner(activityjson.instance) if o.Banned == true { logDebug("Ignoring banned instance: ", uri) return // Banned instance } // Check if there were any recent requests on this o.Recentactivities.Mu.Lock() i, _ := o.Recentactivities.Contains(uri) if i != -1 { logDebug("Ignoring cached recent request: ", uri) o.Recentactivities.Mu.Unlock() return } o.Recentactivities.Add(uri, "") // Added blank entry o.Recentactivities.Mu.Unlock() var jsondocument string selectRet := pool.QueryRow(context.Background(), "SELECT FROM activities WHERE document->>'id' = $1", uri) err := selectRet.Scan() if err == nil { logDebug("Already in database, ignoring: ", uri) return } req, _ := http.NewRequest("GET", uri, nil) req.Header.Set("User-Agent", "Tusky") req.Header.Add("Accept", "application/ld+json") resp, err := DoTries(&o, req) if err != nil { logDebug("Gave up after multiple tries: ", uri) return } if resp.StatusCode != 200 { logDebug("Non-200 response code for ", uri, " was ", resp.StatusCode) resp.Body.Close() return } body, err := ioutil.ReadAll(resp.Body) if err != nil { logDebug("Failed to read the reply: ", uri) return } resp.Body.Close() jsondocument = string(body) err = json.Unmarshal(body, &activityjson) if err != nil { logDebug("Failed to Unmarshal, err: ", err, " uri: ", uri) return } if activityjson.InReplyTo != "" && activityjson.InReplyTo != uri { if activityjson.InReplyTo != uri { go check_activity(activityjson.InReplyTo) } } // If AttributedTo is blank, this is likely an authentication failure // For now, skip it... if activityjson.AttributedTo == "" { logDebug("AttributedTo field is blank, dropping for ", uri) return } // This must be done BEFORE the `INSERT INTO activities'` below actorjson := check_actor(activityjson.AttributedTo) if actorjson == nil { logDebug("Failed to add actor, dropping post: ", uri) return } if actorjson.bot || o.Alwaysbot { activityjson.bot = true } activityjson.normalized = removeHTMLReg.ReplaceAllString(activityjson.Content, " ") activityjson.normalized = html.UnescapeString(strings.ToLower(p.Sanitize(activityjson.normalized))) activityjson.normalized = matchurl.ReplaceAllString(activityjson.normalized, "") activityjson.normalized = spaceReg.ReplaceAllString(activityjson.normalized, " ") var hashtags []string for _, tag := range activityjson.Tag { if tag.Type == "Hashtag" { hashtags = append(hashtags, strings.ToLower(tag.Name)) } } _, err = pool.Exec(context.Background(), "INSERT INTO activities (document, normalized, instance, hashtags, bot) VALUES($1, $2, $3, $4, $5)", jsondocument, activityjson.normalized, activityjson.instance, hashtags, activityjson.bot) if err != nil { logWarn("Error inserting ", uri, " into `activities`: ", err) return } for _, to := range activityjson.To { if to != "https://www.w3.org/ns/activitystreams#Public" && to != "" { if strings.HasSuffix(to, "/followers") { // This check is very much a bad solution, may consider removing the entire for-loop continue } go check_actor(to) } } } /* Test: TestCheck_actor */ func check_actor(uri string) *ActorJson { actorjson := &ActorJson{} if len(uri) <= 7 { return nil // Bad actor } endslash := strings.Index(uri[8:], "/") if endslash == -1 { return nil // Bad actor } actorjson.instance = uri[8 : endslash+8] // Check if there were any recent requests on this o, _ := GetRunner(actorjson.instance) if o.Banned { logDebug("Banned actor: ", uri) return nil // Banned actor } o.Recentactors.Mu.Lock() i, cachedactorjson := o.Recentactors.Contains(uri) if i != -1 { o.Recentactors.Mu.Unlock() cachedactorjson := cachedactorjson.(*ActorJson) return cachedactorjson } o.Recentactors.Mu.Unlock() selectRet := pool.QueryRow(context.Background(), "SELECT document FROM actors WHERE document->>'id' = $1", uri) err := selectRet.Scan(&actorjson) if err == nil { return actorjson // Actor already in database, good! } req, _ := http.NewRequest("GET", uri, nil) req.Header.Set("User-Agent", "Tusky") req.Header.Add("Accept", "application/ld+json") var resp *http.Response tries := 0 for { resp, err = o.Client.Do(req) if err != nil { if tries > 10 { logErr("Unable to connect to " + uri + " attempt 10/10, giving up.") return nil // Unable to connect to host after 10 attempts } logWarn("Unable to connect to "+uri+", attempt ", tries+1, "+/10 sleeping for 30 seconds.") time.Sleep(time.Second * 30) tries = tries + 1 continue } break } body, err := ioutil.ReadAll(resp.Body) if err != nil { logWarn("Unable to read body from ", uri) return nil // Unable to read body of message } resp.Body.Close() jsondocument := string(body) err = json.Unmarshal(body, &actorjson) if err != nil { logWarn("Unable to unmarshal body from ", uri) return nil // Unable to unmarshal body of message } o.Recentactors.Add(uri, actorjson) var bot bool if actorjson.Type == "Service" { actorjson.bot = true } else { actorjson.bot = o.Alwaysbot // default on host's classification } _, err = pool.Exec(context.Background(), "INSERT INTO actors (document, instance, bot) VALUES($1, $2, $3)", jsondocument, actorjson.instance, bot) if err != nil { logWarn("Error inserting ", uri, " into `actors`: ", err) return nil // Unable to insert actor } o.Recentactors.Add(uri, actorjson) return actorjson // Successful }