renaming posts to activities and accounts to actors

removed old posthash mechanism
This commit is contained in:
farhan 2021-02-01 12:52:42 +00:00
parent 102ddbe41c
commit 3662535b0d
6 changed files with 92 additions and 90 deletions

View File

@ -27,7 +27,7 @@ type ObjectType struct {
}
// Parsing Unmarshal JSON type
type ReportPost struct {
type ReportActivity struct {
// Retrieved values
Id string `json:"id"`
@ -38,7 +38,6 @@ type ReportPost struct {
// Derived values
normalized string
posthash []byte
}
type AccountType struct {

14
poll.go
View File

@ -47,7 +47,7 @@ type PostInfo struct {
}
func PollMastodonPleroma(endpoint string, o *RunningInstance) {
newposts := make([]ReportPost, 0)
newactivities := make([]ReportActivity, 0)
min_id := ""
@ -147,7 +147,7 @@ func PollMastodonPleroma(endpoint string, o *RunningInstance) {
return
}
err = json.NewDecoder(resp.Body).Decode(&newposts)
err = json.NewDecoder(resp.Body).Decode(&newactivities)
resp.Body.Close() // Release as soon as done
if err != nil {
if parsing_error > 5 {
@ -168,15 +168,15 @@ func PollMastodonPleroma(endpoint string, o *RunningInstance) {
runninginstances[endpoint] = m
ri_mutex.Unlock()
for _, newpost := range newposts {
go check_post(newpost.Uri)
matchset := re.FindStringSubmatch(newpost.Uri)
for _, newactivity := range newactivities {
go check_activity(newactivity.Uri)
matchset := re.FindStringSubmatch(newactivity.Uri)
if matchset != nil {
newinstance := matchset[1]
// Check min_id
if newpost.Id > min_id {
min_id = newpost.Id
if newactivity.Id > min_id {
min_id = newactivity.Id
}
go CheckInstance(newinstance, endpoint)

View File

@ -29,7 +29,7 @@ type PublicKeyType struct {
PublicKeyPem string `json:"publicKeyPem"`
}
type UserJson struct {
type ActorJson struct {
ID string `json:"id"`
Type string `json:"type"`
Inbox string `json:"inbox"`
@ -52,7 +52,6 @@ type PostJson struct {
InReplyTo string `json:"inReplyTo"`
normalized string
posthash []byte
receivedAt time.Time `json:"created_at"`
Content string `json:"content"`
@ -70,34 +69,34 @@ type PostJson struct {
instance string
}
func check_post(uri string) (PostJson, error) {
var postjson PostJson
func check_activity(uri string) (PostJson, error) {
var activityjson PostJson
// Ignore banned
for _, banned := range settings.Banned {
if strings.Index(uri, "https://"+banned+"/") == 0 {
return postjson, errors.New("Banned instance")
return activityjson, errors.New("Banned instance")
}
}
// Ignore invalid URIs
endslash := strings.Index(uri[8:], "/")
if endslash == -1 {
return postjson, errors.New("Invalid URI " + uri)
return activityjson, errors.New("Invalid URI " + uri)
}
postjson.instance = uri[8 : endslash+8]
activityjson.instance = uri[8 : endslash+8]
o, _ := GetRunner(postjson.instance)
o, _ := GetRunner(activityjson.instance)
// Check if there were any recent requests on this
if o.recenturis.Add(uri) != -1 {
return postjson, errors.New("Recently requested within local cache")
return activityjson, errors.New("Recently requested within local cache")
}
selectRet := pool.QueryRow(context.Background(), "SELECT id, inReplyTo, published, summary, content, normalized, attributedto, posthash, received_at FROM posts WHERE id = $1", uri)
err := selectRet.Scan(&postjson.ID, &postjson.InReplyTo, &postjson.Published, &postjson.Summary, &postjson.Content, &postjson.normalized, &postjson.AttributedTo, &postjson.posthash, &postjson.receivedAt)
selectRet := pool.QueryRow(context.Background(), "SELECT id, inReplyTo, published, summary, content, normalized, attributedto, received_at FROM activities WHERE id = $1", uri)
err := selectRet.Scan(&activityjson.ID, &activityjson.InReplyTo, &activityjson.Published, &activityjson.Summary, &activityjson.Content, &activityjson.normalized, &activityjson.AttributedTo, &activityjson.receivedAt)
if err == nil {
return postjson, nil
return activityjson, nil
}
req, _ := http.NewRequest("GET", uri, nil)
@ -106,81 +105,82 @@ func check_post(uri string) (PostJson, error) {
resp, err := DoTries(&o, req)
if err != nil {
return postjson, errors.New("Connection error to " + uri)
return activityjson, errors.New("Connection error to " + uri)
}
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return postjson, errors.New("Read error on " + uri)
return activityjson, errors.New("Read error on " + uri)
}
resp.Body.Close()
err = json.Unmarshal(body, &postjson)
err = json.Unmarshal(body, &activityjson)
if err != nil {
return postjson, err
return activityjson, err
}
if postjson.InReplyTo != "" && postjson.InReplyTo != uri {
if postjson.InReplyTo != uri {
go check_post(postjson.InReplyTo)
if activityjson.InReplyTo != "" && activityjson.InReplyTo != uri {
if activityjson.InReplyTo != uri {
go check_actor(activityjson.InReplyTo)
}
}
// If AttributedTo is blank, this is likely an authentication failure
// For now, skip it...
if postjson.AttributedTo == "" {
return postjson, errors.New("Invalid AttributedTo value on " + uri)
if activityjson.AttributedTo == "" {
return activityjson, errors.New("Invalid AttributedTo value on " + uri)
}
_, err = check_user(postjson.AttributedTo) // This must be done BEFORE the `INSERT INTO posts` below
_, err = check_actor(activityjson.AttributedTo) // This must be done BEFORE the `INSERT INTO activities'` below
if err != nil {
return postjson, err
return activityjson, err
}
postjson.normalized = removeHTMLReg.ReplaceAllString(postjson.Content, " ")
postjson.normalized = html.UnescapeString(strings.ToLower(p.Sanitize(postjson.normalized)))
postjson.normalized = matchurl.ReplaceAllString(postjson.normalized, "")
postjson.normalized = spaceReg.ReplaceAllString(postjson.normalized, " ")
activityjson.normalized = removeHTMLReg.ReplaceAllString(activityjson.Content, " ")
activityjson.normalized = html.UnescapeString(strings.ToLower(p.Sanitize(activityjson.normalized)))
activityjson.normalized = matchurl.ReplaceAllString(activityjson.normalized, "")
activityjson.normalized = spaceReg.ReplaceAllString(activityjson.normalized, " ")
_, err = pool.Exec(context.Background(), "INSERT INTO posts (id, inreplyto, published, summary, content, normalized, attributedto, posthash, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9)", postjson.ID, postjson.InReplyTo, postjson.Published, postjson.Summary, postjson.Content, postjson.normalized, postjson.AttributedTo, postjson.posthash, postjson.instance)
_, err = pool.Exec(context.Background(), "INSERT INTO activities (id, inreplyto, published, summary, content, normalized, attributedto, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8)", activityjson.ID, activityjson.InReplyTo, activityjson.Published, activityjson.Summary, activityjson.Content, activityjson.normalized, activityjson.AttributedTo, activityjson.instance)
if err != nil {
return postjson, err
logDebug.Print(err)
return activityjson, err
}
for _, to := range postjson.To {
for _, to := range activityjson.To {
if to != "https://www.w3.org/ns/activitystreams#Public" && to != "" {
if strings.HasSuffix(to, "/followers") == true {
// This check is very much a bad solution, may consider removing the entire for-loop
continue
}
go check_user(to)
go check_actor(to)
}
}
return postjson, nil
return activityjson, nil
}
func check_user(uri string) (UserJson, error) {
var userjson UserJson
func check_actor(uri string) (ActorJson, error) {
var actorjson ActorJson
for _, banned := range settings.Banned {
if strings.Index(uri, "https://"+banned+"/") == 0 {
return userjson, errors.New("Banned instance")
return actorjson, errors.New("Banned instance")
}
}
selectRet := pool.QueryRow(context.Background(), "SELECT id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance FROM accounts WHERE id = $1", uri)
err := selectRet.Scan(&userjson.ID, &userjson.Type, &userjson.Inbox, &userjson.Outbox, &userjson.Followers, &userjson.Following, &userjson.Url, &userjson.PreferredUsername, &userjson.Name, &userjson.Summary, &userjson.Icon.Url, &userjson.Image.Url, &userjson.PublicKey.PublicKeyPem, &userjson.instance)
selectRet := pool.QueryRow(context.Background(), "SELECT id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance FROM actors WHERE id = $1", uri)
err := selectRet.Scan(&actorjson.ID, &actorjson.Type, &actorjson.Inbox, &actorjson.Outbox, &actorjson.Followers, &actorjson.Following, &actorjson.Url, &actorjson.PreferredUsername, &actorjson.Name, &actorjson.Summary, &actorjson.Icon.Url, &actorjson.Image.Url, &actorjson.PublicKey.PublicKeyPem, &actorjson.instance)
if err == nil {
return userjson, nil
return actorjson, nil
}
endslash := strings.Index(uri[8:], "/")
if endslash == -1 {
return userjson, errors.New("Invalid user: " + uri)
return actorjson, errors.New("Invalid user: " + uri)
}
userjson.instance = uri[8 : endslash+8]
actorjson.instance = uri[8 : endslash+8]
o, _ := GetRunner(userjson.instance)
o, _ := GetRunner(actorjson.instance)
req, _ := http.NewRequest("GET", uri, nil)
req.Header.Set("User-Agent", "Tusky")
req.Header.Add("Accept", "application/ld+json")
@ -192,7 +192,7 @@ func check_user(uri string) (UserJson, error) {
if err != nil {
if tries > 10 {
logErr.Print("Unable to connect to "+uri+" attempt 10/10, giving up.")
return userjson, err
return actorjson, err
}
logWarn.Print("Unable to connect to "+uri+", attempt ",tries+1,"+/10 sleeping for 30 seconds.")
time.Sleep(time.Second * 30)
@ -202,21 +202,21 @@ func check_user(uri string) (UserJson, error) {
break
}
err = json.NewDecoder(resp.Body).Decode(&userjson)
err = json.NewDecoder(resp.Body).Decode(&actorjson)
if err != nil {
// Going forward, this might need to be double-checked, but for now just die
tries = tries + 1
return userjson, err
return actorjson, err
}
io.Copy(ioutil.Discard, resp.Body)
resp.Body.Close()
_, err = pool.Exec(context.Background(), "INSERT INTO accounts (id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)", userjson.ID, userjson.Type, userjson.Inbox, userjson.Outbox, userjson.Followers, userjson.Following, userjson.Url, userjson.PreferredUsername, userjson.Name, userjson.Summary, userjson.Icon.Url, userjson.Image.Url, userjson.PublicKey.PublicKeyPem, userjson.instance)
_, err = pool.Exec(context.Background(), "INSERT INTO actors (id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)", actorjson.ID, actorjson.Type, actorjson.Inbox, actorjson.Outbox, actorjson.Followers, actorjson.Following, actorjson.Url, actorjson.PreferredUsername, actorjson.Name, actorjson.Summary, actorjson.Icon.Url, actorjson.Image.Url, actorjson.PublicKey.PublicKeyPem, actorjson.instance)
if err != nil {
return userjson, err
return actorjson, err
}
return userjson, nil
return actorjson, nil
}

View File

@ -74,7 +74,7 @@ func StreamMastodon(endpoint string, o *RunningInstance) {
for s.Scan() {
line := s.Text()
token := strings.SplitN(line, ":", 2)
var newpost ReportPost
var newactivity ReportActivity
if len(token) != 2 {
continue
}
@ -87,7 +87,7 @@ func StreamMastodon(endpoint string, o *RunningInstance) {
switch name {
case "update":
jsondata := token[1][1:]
err := json.Unmarshal([]byte(jsondata), &newpost)
err := json.Unmarshal([]byte(jsondata), &newactivity)
if err != nil {
logDebug.Print("Unable to parse data from "+endpoint+", but still connected.")
continue
@ -100,9 +100,9 @@ func StreamMastodon(endpoint string, o *RunningInstance) {
continue
}
go check_post(newpost.Uri)
go check_activity(newactivity.Uri)
matchset := re.FindStringSubmatch(newpost.Uri)
matchset := re.FindStringSubmatch(newactivity.Uri)
if matchset != nil {
newinstance := matchset[1]
go CheckInstance(newinstance, endpoint)

View File

@ -1,8 +1,8 @@
DROP TABLE IF EXISTS instances;
DROP TABLE IF EXISTS posts;
DROP TABLE IF EXISTS accounts;
DROP TABLE IF EXISTS activities CASCADE;
DROP TABLE IF EXISTS actors CASCADE;
DROP TABLE IF EXISTS instances CASCADE;
CREATE TABLE accounts (
CREATE TABLE actors (
actor_type VARCHAR(1000) NOT NULL,
id VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE,
inbox VARCHAR(2083) NOT NULL,
@ -20,15 +20,14 @@ CREATE TABLE accounts (
instance VARCHAR(1000) NOT NULL
);
CREATE TABLE posts (
CREATE TABLE activities (
id VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE,
inreplyto VARCHAR(2083),
published TIMESTAMP with time zone NOT NULL,
summary TEXT,
content TEXT,
normalized TEXT,
attributedto VARCHAR(2083) REFERENCES accounts,
posthash bytea,
attributedto VARCHAR(2083) REFERENCES actors,
received_at TIMESTAMP with time zone DEFAULT now(),
instance VARCHAR(1000) NOT NULL
);
@ -41,3 +40,7 @@ CREATE TABLE instances (
password VARCHAR(32),
software VARCHAR(50)
);
ALTER TABLE activities ADD COLUMN normalized_idx tsvector;
UPDATE activities SET normalized_idx = to_tsvector('english', normalized);
CREATE INDEX ON activities USING gin(normalized_idx);

46
web.go
View File

@ -120,7 +120,7 @@ func inboxHandler() http.HandlerFunc {
return
}
go check_post(createobject.ID)
go check_activity(createobject.ID)
slashend := strings.Index(createobject.ID[8:], "/")
newinstance := createobject.ID[8 : 8+slashend]
log.Print("The at sign is: ", newinstance)
@ -184,35 +184,35 @@ func usersFedilogue(w http.ResponseWriter, r *http.Request) {
endpoints := map[string]string{"oauthAuthorizationEndpoint": "https://" + host + "/oauth/authorize", "oauthRegistrationEndpoint": "https://" + host + "/api/v1/apps", "oauthTokenEndpoint": "https://" + host + "/oauth/token", "sharedInbox": "https://" + host + "/inbox", "uploadMedia": "https://" + host + "/api/ap/upload_media"}
context := []interface{}{"https://www.w3.org/ns/activitystreams", "https://" + host + "/schemas/litepub-0.1.jsonld", contextlist}
userjsonmap := make(map[string]interface{})
userjsonmap["@context"] = context
userjsonmap["attachment"] = attachment
userjsonmap["capabilities"] = capabilities
userjsonmap["discoverable"] = false
userjsonmap["endpoints"] = endpoints
userjsonmap["followers"] = "https://" + host + "/users/fedilogue/followers"
userjsonmap["following"] = "https://" + host + "/users/fedilogue/following"
userjsonmap["id"] = "https://" + host + "/users/fedilogue"
userjsonmap["inbox"] = "https://" + host + "/users/fedilogue/inbox"
userjsonmap["manuallyApprovesFollowers"] = false
userjsonmap["name"] = "Fedilogue Mass Follower"
userjsonmap["outbox"] = "https://" + host + "/users/fedilogue/outbox"
userjsonmap["preferredUsername"] = "fedilogue"
userjsonmap["publicKey"] = publicKey
userjsonmap["summary"] = ""
userjsonmap["tag"] = tag
userjsonmap["type"] = "Person"
userjsonmap["uri"] = "https://" + host + "/users/fedilogue"
actorjsonmap := make(map[string]interface{})
actorjsonmap["@context"] = context
actorjsonmap["attachment"] = attachment
actorjsonmap["capabilities"] = capabilities
actorjsonmap["discoverable"] = false
actorjsonmap["endpoints"] = endpoints
actorjsonmap["followers"] = "https://" + host + "/users/fedilogue/followers"
actorjsonmap["following"] = "https://" + host + "/users/fedilogue/following"
actorjsonmap["id"] = "https://" + host + "/users/fedilogue"
actorjsonmap["inbox"] = "https://" + host + "/users/fedilogue/inbox"
actorjsonmap["manuallyApprovesFollowers"] = false
actorjsonmap["name"] = "Fedilogue Mass Follower"
actorjsonmap["outbox"] = "https://" + host + "/users/fedilogue/outbox"
actorjsonmap["preferredUsername"] = "fedilogue"
actorjsonmap["publicKey"] = publicKey
actorjsonmap["summary"] = ""
actorjsonmap["tag"] = tag
actorjsonmap["type"] = "Person"
actorjsonmap["uri"] = "https://" + host + "/users/fedilogue"
userjsonbin, err := json.Marshal(userjsonmap)
actorjsonbin, err := json.Marshal(actorjsonmap)
if err != nil {
fmt.Println(err.Error())
return
}
userjsonstr := string(userjsonbin)
actorjsonstr := string(actorjsonbin)
w.Header().Set("Content-Type", "application/activity+json; charset=utf-8")
fmt.Fprintf(w, userjsonstr)
fmt.Fprintf(w, actorjsonstr)
}
func errorHandler(w http.ResponseWriter, r *http.Request) {