package main import ( "context" "encoding/json" //"errors" "html" "io/ioutil" "net/http" "strings" "time" "regexp" "github.com/microcosm-cc/bluemonday" ) var p *bluemonday.Policy var spaceReg *regexp.Regexp var removeHTMLReg *regexp.Regexp var re *regexp.Regexp var matchurl *regexp.Regexp type ImageType struct { Url string `json:"url"` } type PublicKeyType struct { PublicKeyPem string `json:"publicKeyPem"` } type ActorJson struct { id int uri string `json:"id"` Type string `json:"type"` Inbox string `json:"inbox"` Outbox string `json:"outbox"` Followers string `json:"followers"` Following string `json:"following"` Url string `json:"url"` PreferredUsername string `json:"preferredUsername"` Name string `json:"name"` Summary string `json:"summary"` Icon ImageType `json:"icon"` Image ImageType `json:"image"` PublicKey PublicKeyType `json:"publicKey"` instance string } type PostJson struct { id int uri string `json:"id"` InReplyTo string `json:"inReplyTo"` normalized string receivedAt time.Time `json:"created_at"` Content string `json:"content"` Conversation string `json:"conversation"` Published time.Time `json:"published"` Source string `json:"source"` Summary string `json:"summary"` // Ignoring tag for now To []string `json:"to"` Type string `json:"type"` Actor string `json:"actor"` AttributedTo string `json:"attributedTo"` instance string } func check_activity(uri string) { var activityjson PostJson // Ignore banned for _, banned := range settings.Banned { if strings.Index(uri, "https://"+banned+"/") == 0 { //return activityjson, errors.New("Banned instance") return } } // Ignore invalid URIs endslash := strings.Index(uri[8:], "/") if endslash == -1 { //return activityjson, errors.New("Invalid URI " + uri) return } activityjson.instance = uri[8 : endslash+8] o, _ := GetRunner(activityjson.instance) // Check if there were any recent requests on this o.recentactivities.mu.Lock() if o.recentactivities.Add(uri) == true { o.recentactivities.mu.Unlock() //return activityjson, errors.New("Recently requested within local cache") return } o.recentactivities.mu.Unlock() var jsondocument string jsonmap := make(map[string]interface{}) selectRet := pool.QueryRow(context.Background(), "SELECT id, document FROM activities WHERE document->>'id' = $1", uri) err := selectRet.Scan(&activityjson.id, &jsonmap) if err == nil { /////////// BETTER RETURN VALUES!!!!! //return activityjson, nil return } req, _ := http.NewRequest("GET", uri, nil) req.Header.Set("User-Agent", "Tusky") req.Header.Add("Accept", "application/ld+json") resp, err := DoTries(&o, req) if err != nil { //return activityjson, errors.New("Connection error to " + uri) return } body, err := ioutil.ReadAll(resp.Body) if err != nil { //return activityjson, errors.New("Read error on " + uri) return } resp.Body.Close() jsondocument = string(body) err = json.Unmarshal(body, &activityjson) if err != nil { return //return activityjson, err } if activityjson.InReplyTo != "" && activityjson.InReplyTo != uri { if activityjson.InReplyTo != uri { go check_activity(activityjson.InReplyTo) } } // If AttributedTo is blank, this is likely an authentication failure // For now, skip it... if activityjson.AttributedTo == "" { //return activityjson, errors.New("Invalid AttributedTo value on " + uri) return } go check_actor(activityjson.AttributedTo) // This must be done BEFORE the `INSERT INTO activities'` below activityjson.normalized = removeHTMLReg.ReplaceAllString(activityjson.Content, " ") activityjson.normalized = html.UnescapeString(strings.ToLower(p.Sanitize(activityjson.normalized))) activityjson.normalized = matchurl.ReplaceAllString(activityjson.normalized, "") activityjson.normalized = spaceReg.ReplaceAllString(activityjson.normalized, " ") _, err = pool.Exec(context.Background(), "INSERT INTO activities (document, normalized, instance) VALUES($1, $2, $3)", jsondocument, activityjson.normalized, activityjson.instance) if err != nil { logWarn.Printf("Error inserting %s into `activities`: %s", uri, err) //return activityjson, err return } for _, to := range activityjson.To { if to != "https://www.w3.org/ns/activitystreams#Public" && to != "" { if strings.HasSuffix(to, "/followers") == true { // This check is very much a bad solution, may consider removing the entire for-loop continue } go check_actor(to) } } //return activityjson, nil } func check_actor(uri string) { var actorjson ActorJson endslash := strings.Index(uri[8:], "/") if endslash == -1 { // return actorjson, errors.New("Invalid user: " + uri) return } actorjson.instance = uri[8 : endslash+8] for _, banned := range settings.Banned { if strings.Index(uri, "https://"+banned+"/") == 0 { // return actorjson, errors.New("Banned instance") return } } // Check if there were any recent requests on this o, _ := GetRunner(actorjson.instance) o.recentactors.mu.Lock() if o.recentactors.Add(uri) == true { o.recentactors.mu.Unlock() return // return actorjson, errors.New("Recently requested actor within local cache") } o.recentactors.mu.Unlock() jsonmap := make(map[string]interface{}) selectRet := pool.QueryRow(context.Background(), "SELECT id, document, instance FROM actors WHERE document->>'id' = $1", uri) err := selectRet.Scan(&actorjson.id, &jsonmap, &actorjson.instance) if err == nil { ///////// BETTER RETURN VALUES //////// // return actorjson, nil return } req, _ := http.NewRequest("GET", uri, nil) req.Header.Set("User-Agent", "Tusky") req.Header.Add("Accept", "application/ld+json") var resp *http.Response tries := 0 for { resp, err = o.client.Do(req) if err != nil { if tries > 10 { logErr.Print("Unable to connect to "+uri+" attempt 10/10, giving up.") // return actorjson, err return } logWarn.Print("Unable to connect to "+uri+", attempt ",tries+1,"+/10 sleeping for 30 seconds.") time.Sleep(time.Second * 30) tries = tries + 1 continue } break } body, err := ioutil.ReadAll(resp.Body) if err != nil { // return actorjson, errors.New("Read error on " + uri) return } resp.Body.Close() jsondocument := string(body) err = json.Unmarshal(body, &actorjson) if err != nil { // return actorjson, err return } _, err = pool.Exec(context.Background(), "INSERT INTO actors (document, instance) VALUES($1, $2)", jsondocument, actorjson.instance) if err != nil { logWarn.Printf("Error inserting %s into `actors`: %s", uri, err) // return actorjson, err return } // return actorjson, nil }