package main import ( "context" "encoding/json" "errors" "html" "io/ioutil" "io" "net/http" "strings" "time" "regexp" "github.com/microcosm-cc/bluemonday" ) var p *bluemonday.Policy var spaceReg *regexp.Regexp var removeHTMLReg *regexp.Regexp var re *regexp.Regexp var matchurl *regexp.Regexp type ImageType struct { // Type string `json:"type"` Url string `json:"url"` } type PublicKeyType struct { PublicKeyPem string `json:"publicKeyPem"` } type ActorJson struct { ID string `json:"id"` Type string `json:"type"` Inbox string `json:"inbox"` Outbox string `json:"outbox"` Followers string `json:"followers"` Following string `json:"following"` Url string `json:"url"` PreferredUsername string `json:"preferredUsername"` Name string `json:"name"` Summary string `json:"summary"` Icon ImageType `json:"icon"` Image ImageType `json:"image"` PublicKey PublicKeyType `json:"publicKey"` instance string } type PostJson struct { ID string `json:"id"` InReplyTo string `json:"inReplyTo"` normalized string receivedAt time.Time `json:"created_at"` Content string `json:"content"` Conversation string `json:"conversation"` Published time.Time `json:"published"` Source string `json:"source"` Summary string `json:"summary"` // Ignoring tag for now To []string `json:"to"` Type string `json:"type"` Actor string `json:"actor"` AttributedTo string `json:"attributedTo"` instance string } func check_activity(uri string) (PostJson, error) { var activityjson PostJson // Ignore banned for _, banned := range settings.Banned { if strings.Index(uri, "https://"+banned+"/") == 0 { return activityjson, errors.New("Banned instance") } } // Ignore invalid URIs endslash := strings.Index(uri[8:], "/") if endslash == -1 { return activityjson, errors.New("Invalid URI " + uri) } activityjson.instance = uri[8 : endslash+8] o, _ := GetRunner(activityjson.instance) // Check if there were any recent requests on this if o.recenturis.Add(uri) != -1 { return activityjson, errors.New("Recently requested within local cache") } selectRet := pool.QueryRow(context.Background(), "SELECT id, inReplyTo, published, summary, content, normalized, attributedto, received_at FROM activities WHERE id = $1", uri) err := selectRet.Scan(&activityjson.ID, &activityjson.InReplyTo, &activityjson.Published, &activityjson.Summary, &activityjson.Content, &activityjson.normalized, &activityjson.AttributedTo, &activityjson.receivedAt) if err == nil { return activityjson, nil } req, _ := http.NewRequest("GET", uri, nil) req.Header.Set("User-Agent", "Tusky") req.Header.Add("Accept", "application/ld+json") resp, err := DoTries(&o, req) if err != nil { return activityjson, errors.New("Connection error to " + uri) } body, err := ioutil.ReadAll(resp.Body) if err != nil { return activityjson, errors.New("Read error on " + uri) } resp.Body.Close() err = json.Unmarshal(body, &activityjson) if err != nil { return activityjson, err } if activityjson.InReplyTo != "" && activityjson.InReplyTo != uri { if activityjson.InReplyTo != uri { go check_actor(activityjson.InReplyTo) } } // If AttributedTo is blank, this is likely an authentication failure // For now, skip it... if activityjson.AttributedTo == "" { return activityjson, errors.New("Invalid AttributedTo value on " + uri) } _, err = check_actor(activityjson.AttributedTo) // This must be done BEFORE the `INSERT INTO activities'` below if err != nil { return activityjson, err } activityjson.normalized = removeHTMLReg.ReplaceAllString(activityjson.Content, " ") activityjson.normalized = html.UnescapeString(strings.ToLower(p.Sanitize(activityjson.normalized))) activityjson.normalized = matchurl.ReplaceAllString(activityjson.normalized, "") activityjson.normalized = spaceReg.ReplaceAllString(activityjson.normalized, " ") _, err = pool.Exec(context.Background(), "INSERT INTO activities (id, inreplyto, published, summary, content, normalized, attributedto, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8)", activityjson.ID, activityjson.InReplyTo, activityjson.Published, activityjson.Summary, activityjson.Content, activityjson.normalized, activityjson.AttributedTo, activityjson.instance) if err != nil { logDebug.Print(err) return activityjson, err } for _, to := range activityjson.To { if to != "https://www.w3.org/ns/activitystreams#Public" && to != "" { if strings.HasSuffix(to, "/followers") == true { // This check is very much a bad solution, may consider removing the entire for-loop continue } go check_actor(to) } } return activityjson, nil } func check_actor(uri string) (ActorJson, error) { var actorjson ActorJson for _, banned := range settings.Banned { if strings.Index(uri, "https://"+banned+"/") == 0 { return actorjson, errors.New("Banned instance") } } selectRet := pool.QueryRow(context.Background(), "SELECT id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance FROM actors WHERE id = $1", uri) err := selectRet.Scan(&actorjson.ID, &actorjson.Type, &actorjson.Inbox, &actorjson.Outbox, &actorjson.Followers, &actorjson.Following, &actorjson.Url, &actorjson.PreferredUsername, &actorjson.Name, &actorjson.Summary, &actorjson.Icon.Url, &actorjson.Image.Url, &actorjson.PublicKey.PublicKeyPem, &actorjson.instance) if err == nil { return actorjson, nil } endslash := strings.Index(uri[8:], "/") if endslash == -1 { return actorjson, errors.New("Invalid user: " + uri) } actorjson.instance = uri[8 : endslash+8] o, _ := GetRunner(actorjson.instance) req, _ := http.NewRequest("GET", uri, nil) req.Header.Set("User-Agent", "Tusky") req.Header.Add("Accept", "application/ld+json") var resp *http.Response tries := 0 for { resp, err = o.client.Do(req) if err != nil { if tries > 10 { logErr.Print("Unable to connect to "+uri+" attempt 10/10, giving up.") return actorjson, err } logWarn.Print("Unable to connect to "+uri+", attempt ",tries+1,"+/10 sleeping for 30 seconds.") time.Sleep(time.Second * 30) tries = tries + 1 continue } break } err = json.NewDecoder(resp.Body).Decode(&actorjson) if err != nil { // Going forward, this might need to be double-checked, but for now just die tries = tries + 1 return actorjson, err } io.Copy(ioutil.Discard, resp.Body) resp.Body.Close() _, err = pool.Exec(context.Background(), "INSERT INTO actors (id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)", actorjson.ID, actorjson.Type, actorjson.Inbox, actorjson.Outbox, actorjson.Followers, actorjson.Following, actorjson.Url, actorjson.PreferredUsername, actorjson.Name, actorjson.Summary, actorjson.Icon.Url, actorjson.Image.Url, actorjson.PublicKey.PublicKeyPem, actorjson.instance) if err != nil { return actorjson, err } return actorjson, nil }