migrating to storing data as jsonb object in database

captures all data, avoids cache-misses
This commit is contained in:
farhan 2021-02-01 20:31:40 +00:00
parent 3662535b0d
commit f262de1dc3
2 changed files with 36 additions and 36 deletions

View File

@ -6,7 +6,6 @@ import (
"errors"
"html"
"io/ioutil"
"io"
"net/http"
"strings"
"time"
@ -21,7 +20,6 @@ var re *regexp.Regexp
var matchurl *regexp.Regexp
type ImageType struct {
// Type string `json:"type"`
Url string `json:"url"`
}
@ -93,8 +91,10 @@ func check_activity(uri string) (PostJson, error) {
return activityjson, errors.New("Recently requested within local cache")
}
selectRet := pool.QueryRow(context.Background(), "SELECT id, inReplyTo, published, summary, content, normalized, attributedto, received_at FROM activities WHERE id = $1", uri)
err := selectRet.Scan(&activityjson.ID, &activityjson.InReplyTo, &activityjson.Published, &activityjson.Summary, &activityjson.Content, &activityjson.normalized, &activityjson.AttributedTo, &activityjson.receivedAt)
var jsondocument string
selectRet := pool.QueryRow(context.Background(), "SELECT document, normalized FROM activities WHERE document->'id' = $1", uri)
err := selectRet.Scan(&activityjson.ID, &jsondocument, &activityjson.normalized)
if err == nil {
return activityjson, nil
}
@ -114,6 +114,8 @@ func check_activity(uri string) (PostJson, error) {
}
resp.Body.Close()
jsondocument = string(body)
err = json.Unmarshal(body, &activityjson)
if err != nil {
return activityjson, err
@ -127,7 +129,6 @@ func check_activity(uri string) (PostJson, error) {
// If AttributedTo is blank, this is likely an authentication failure
// For now, skip it...
if activityjson.AttributedTo == "" {
return activityjson, errors.New("Invalid AttributedTo value on " + uri)
}
@ -142,7 +143,7 @@ func check_activity(uri string) (PostJson, error) {
activityjson.normalized = matchurl.ReplaceAllString(activityjson.normalized, "")
activityjson.normalized = spaceReg.ReplaceAllString(activityjson.normalized, " ")
_, err = pool.Exec(context.Background(), "INSERT INTO activities (id, inreplyto, published, summary, content, normalized, attributedto, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8)", activityjson.ID, activityjson.InReplyTo, activityjson.Published, activityjson.Summary, activityjson.Content, activityjson.normalized, activityjson.AttributedTo, activityjson.instance)
_, err = pool.Exec(context.Background(), "INSERT INTO activities (document, normalized, instance) VALUES($1, $2, $3)", jsondocument, activityjson.normalized, activityjson.instance)
if err != nil {
logDebug.Print(err)
return activityjson, err
@ -169,8 +170,9 @@ func check_actor(uri string) (ActorJson, error) {
}
}
selectRet := pool.QueryRow(context.Background(), "SELECT id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance FROM actors WHERE id = $1", uri)
err := selectRet.Scan(&actorjson.ID, &actorjson.Type, &actorjson.Inbox, &actorjson.Outbox, &actorjson.Followers, &actorjson.Following, &actorjson.Url, &actorjson.PreferredUsername, &actorjson.Name, &actorjson.Summary, &actorjson.Icon.Url, &actorjson.Image.Url, &actorjson.PublicKey.PublicKeyPem, &actorjson.instance)
var jsondocument string
selectRet := pool.QueryRow(context.Background(), "SELECT document, instance FROM actors WHERE document->'id' = $1", uri)
err := selectRet.Scan(&actorjson.ID, &jsondocument, &actorjson.instance)
if err == nil {
return actorjson, nil
}
@ -180,6 +182,7 @@ func check_actor(uri string) (ActorJson, error) {
}
actorjson.instance = uri[8 : endslash+8]
logDebug.Print("CHECK: " + uri)
o, _ := GetRunner(actorjson.instance)
req, _ := http.NewRequest("GET", uri, nil)
req.Header.Set("User-Agent", "Tusky")
@ -202,19 +205,23 @@ func check_actor(uri string) (ActorJson, error) {
break
}
err = json.NewDecoder(resp.Body).Decode(&actorjson)
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return actorjson, errors.New("Read error on " + uri)
}
resp.Body.Close()
jsondocument = string(body)
//logDebug.Print(string(jsondocument))
err = json.Unmarshal(body, &actorjson)
if err != nil {
// Going forward, this might need to be double-checked, but for now just die
tries = tries + 1
return actorjson, err
}
io.Copy(ioutil.Discard, resp.Body)
resp.Body.Close()
_, err = pool.Exec(context.Background(), "INSERT INTO actors (id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)", actorjson.ID, actorjson.Type, actorjson.Inbox, actorjson.Outbox, actorjson.Followers, actorjson.Following, actorjson.Url, actorjson.PreferredUsername, actorjson.Name, actorjson.Summary, actorjson.Icon.Url, actorjson.Image.Url, actorjson.PublicKey.PublicKeyPem, actorjson.instance)
_, err = pool.Exec(context.Background(), "INSERT INTO actors (document, instance) VALUES($1, $2)", jsondocument, actorjson.instance)
if err != nil {
logDebug.Print(err)
return actorjson, err
}

View File

@ -3,35 +3,23 @@ DROP TABLE IF EXISTS actors CASCADE;
DROP TABLE IF EXISTS instances CASCADE;
CREATE TABLE actors (
actor_type VARCHAR(1000) NOT NULL,
id VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE,
inbox VARCHAR(2083) NOT NULL,
outbox VARCHAR(2083) NOT NULL,
followers VARCHAR(2083) NOT NULL,
following VARCHAR(2083) NOT NULL,
url VARCHAR(2083) NOT NULL,
preferredusername VARCHAR(1000) NOT NULL,
name VARCHAR(1000) NOT NULL,
summary TEXT,
icon VARCHAR(2083),
image VARCHAR(2083),
publickey TEXT,
id SERIAL PRIMARY KEY,
document JSONB,
identifiedat TIMESTAMP with time zone DEFAULT now(),
instance VARCHAR(1000) NOT NULL
);
CREATE UNIQUE INDEX actors_uri ON actors ( (document->'id') );
CREATE TABLE activities (
id VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE,
inreplyto VARCHAR(2083),
published TIMESTAMP with time zone NOT NULL,
summary TEXT,
content TEXT,
id SERIAL PRIMARY KEY,
document JSONB,
normalized TEXT,
attributedto VARCHAR(2083) REFERENCES actors,
received_at TIMESTAMP with time zone DEFAULT now(),
instance VARCHAR(1000) NOT NULL
);
CREATE UNIQUE INDEX activities_uri ON activities ( (document->'id') );
CREATE TABLE instances (
endpoint VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE,
autostart BOOLEAN,
@ -44,3 +32,8 @@ CREATE TABLE instances (
ALTER TABLE activities ADD COLUMN normalized_idx tsvector;
UPDATE activities SET normalized_idx = to_tsvector('english', normalized);
CREATE INDEX ON activities USING gin(normalized_idx);
CREATE INDEX actors_id_idx ON actors (id);
CREATE INDEX activities_id_idx ON activities (id);
CREATE INDEX actors_uri_idx ON actors ( (document->'id') );
CREATE INDEX activities_uri_idx ON activities ( (document->'id') );