package main import ( "context" "encoding/json" "errors" "html" "io/ioutil" "net/http" "strings" "time" "regexp" "github.com/microcosm-cc/bluemonday" ) var p *bluemonday.Policy var spaceReg *regexp.Regexp var removeHTMLReg *regexp.Regexp var re *regexp.Regexp var matchurl *regexp.Regexp type ImageType struct { Url string `json:"url"` } type PublicKeyType struct { PublicKeyPem string `json:"publicKeyPem"` } type ActorJson struct { ID string `json:"id"` Type string `json:"type"` Inbox string `json:"inbox"` Outbox string `json:"outbox"` Followers string `json:"followers"` Following string `json:"following"` Url string `json:"url"` PreferredUsername string `json:"preferredUsername"` Name string `json:"name"` Summary string `json:"summary"` Icon ImageType `json:"icon"` Image ImageType `json:"image"` PublicKey PublicKeyType `json:"publicKey"` instance string } type PostJson struct { ID string `json:"id"` InReplyTo string `json:"inReplyTo"` normalized string receivedAt time.Time `json:"created_at"` Content string `json:"content"` Conversation string `json:"conversation"` Published time.Time `json:"published"` Source string `json:"source"` Summary string `json:"summary"` // Ignoring tag for now To []string `json:"to"` Type string `json:"type"` Actor string `json:"actor"` AttributedTo string `json:"attributedTo"` instance string } func check_activity(uri string) (PostJson, error) { var activityjson PostJson // Ignore banned for _, banned := range settings.Banned { if strings.Index(uri, "https://"+banned+"/") == 0 { return activityjson, errors.New("Banned instance") } } // Ignore invalid URIs endslash := strings.Index(uri[8:], "/") if endslash == -1 { return activityjson, errors.New("Invalid URI " + uri) } activityjson.instance = uri[8 : endslash+8] o, _ := GetRunner(activityjson.instance) // Check if there were any recent requests on this if o.recenturis.Add(uri) != -1 { return activityjson, errors.New("Recently requested within local cache") } var jsondocument string selectRet := pool.QueryRow(context.Background(), "SELECT document, normalized FROM activities WHERE document->'id' = $1", uri) err := selectRet.Scan(&activityjson.ID, &jsondocument, &activityjson.normalized) if err == nil { return activityjson, nil } req, _ := http.NewRequest("GET", uri, nil) req.Header.Set("User-Agent", "Tusky") req.Header.Add("Accept", "application/ld+json") resp, err := DoTries(&o, req) if err != nil { return activityjson, errors.New("Connection error to " + uri) } body, err := ioutil.ReadAll(resp.Body) if err != nil { return activityjson, errors.New("Read error on " + uri) } resp.Body.Close() jsondocument = string(body) err = json.Unmarshal(body, &activityjson) if err != nil { return activityjson, err } if activityjson.InReplyTo != "" && activityjson.InReplyTo != uri { if activityjson.InReplyTo != uri { go check_actor(activityjson.InReplyTo) } } // If AttributedTo is blank, this is likely an authentication failure // For now, skip it... if activityjson.AttributedTo == "" { return activityjson, errors.New("Invalid AttributedTo value on " + uri) } _, err = check_actor(activityjson.AttributedTo) // This must be done BEFORE the `INSERT INTO activities'` below if err != nil { return activityjson, err } activityjson.normalized = removeHTMLReg.ReplaceAllString(activityjson.Content, " ") activityjson.normalized = html.UnescapeString(strings.ToLower(p.Sanitize(activityjson.normalized))) activityjson.normalized = matchurl.ReplaceAllString(activityjson.normalized, "") activityjson.normalized = spaceReg.ReplaceAllString(activityjson.normalized, " ") _, err = pool.Exec(context.Background(), "INSERT INTO activities (document, normalized, instance) VALUES($1, $2, $3)", jsondocument, activityjson.normalized, activityjson.instance) if err != nil { logDebug.Print(err) return activityjson, err } for _, to := range activityjson.To { if to != "https://www.w3.org/ns/activitystreams#Public" && to != "" { if strings.HasSuffix(to, "/followers") == true { // This check is very much a bad solution, may consider removing the entire for-loop continue } go check_actor(to) } } return activityjson, nil } func check_actor(uri string) (ActorJson, error) { var actorjson ActorJson for _, banned := range settings.Banned { if strings.Index(uri, "https://"+banned+"/") == 0 { return actorjson, errors.New("Banned instance") } } var jsondocument string selectRet := pool.QueryRow(context.Background(), "SELECT document, instance FROM actors WHERE document->'id' = $1", uri) err := selectRet.Scan(&actorjson.ID, &jsondocument, &actorjson.instance) if err == nil { return actorjson, nil } endslash := strings.Index(uri[8:], "/") if endslash == -1 { return actorjson, errors.New("Invalid user: " + uri) } actorjson.instance = uri[8 : endslash+8] logDebug.Print("CHECK: " + uri) o, _ := GetRunner(actorjson.instance) req, _ := http.NewRequest("GET", uri, nil) req.Header.Set("User-Agent", "Tusky") req.Header.Add("Accept", "application/ld+json") var resp *http.Response tries := 0 for { resp, err = o.client.Do(req) if err != nil { if tries > 10 { logErr.Print("Unable to connect to "+uri+" attempt 10/10, giving up.") return actorjson, err } logWarn.Print("Unable to connect to "+uri+", attempt ",tries+1,"+/10 sleeping for 30 seconds.") time.Sleep(time.Second * 30) tries = tries + 1 continue } break } body, err := ioutil.ReadAll(resp.Body) if err != nil { return actorjson, errors.New("Read error on " + uri) } resp.Body.Close() jsondocument = string(body) //logDebug.Print(string(jsondocument)) err = json.Unmarshal(body, &actorjson) if err != nil { return actorjson, err } _, err = pool.Exec(context.Background(), "INSERT INTO actors (document, instance) VALUES($1, $2)", jsondocument, actorjson.instance) if err != nil { logDebug.Print(err) return actorjson, err } return actorjson, nil }