migrating to storing data as jsonb object in database

captures all data, avoids cache-misses
This commit is contained in:
farhan 2021-02-01 20:31:40 +00:00
parent 3662535b0d
commit f262de1dc3
2 changed files with 36 additions and 36 deletions

View File

@ -6,7 +6,6 @@ import (
"errors" "errors"
"html" "html"
"io/ioutil" "io/ioutil"
"io"
"net/http" "net/http"
"strings" "strings"
"time" "time"
@ -21,7 +20,6 @@ var re *regexp.Regexp
var matchurl *regexp.Regexp var matchurl *regexp.Regexp
type ImageType struct { type ImageType struct {
// Type string `json:"type"`
Url string `json:"url"` Url string `json:"url"`
} }
@ -93,8 +91,10 @@ func check_activity(uri string) (PostJson, error) {
return activityjson, errors.New("Recently requested within local cache") return activityjson, errors.New("Recently requested within local cache")
} }
selectRet := pool.QueryRow(context.Background(), "SELECT id, inReplyTo, published, summary, content, normalized, attributedto, received_at FROM activities WHERE id = $1", uri) var jsondocument string
err := selectRet.Scan(&activityjson.ID, &activityjson.InReplyTo, &activityjson.Published, &activityjson.Summary, &activityjson.Content, &activityjson.normalized, &activityjson.AttributedTo, &activityjson.receivedAt)
selectRet := pool.QueryRow(context.Background(), "SELECT document, normalized FROM activities WHERE document->'id' = $1", uri)
err := selectRet.Scan(&activityjson.ID, &jsondocument, &activityjson.normalized)
if err == nil { if err == nil {
return activityjson, nil return activityjson, nil
} }
@ -114,6 +114,8 @@ func check_activity(uri string) (PostJson, error) {
} }
resp.Body.Close() resp.Body.Close()
jsondocument = string(body)
err = json.Unmarshal(body, &activityjson) err = json.Unmarshal(body, &activityjson)
if err != nil { if err != nil {
return activityjson, err return activityjson, err
@ -127,7 +129,6 @@ func check_activity(uri string) (PostJson, error) {
// If AttributedTo is blank, this is likely an authentication failure // If AttributedTo is blank, this is likely an authentication failure
// For now, skip it... // For now, skip it...
if activityjson.AttributedTo == "" { if activityjson.AttributedTo == "" {
return activityjson, errors.New("Invalid AttributedTo value on " + uri) return activityjson, errors.New("Invalid AttributedTo value on " + uri)
} }
@ -142,7 +143,7 @@ func check_activity(uri string) (PostJson, error) {
activityjson.normalized = matchurl.ReplaceAllString(activityjson.normalized, "") activityjson.normalized = matchurl.ReplaceAllString(activityjson.normalized, "")
activityjson.normalized = spaceReg.ReplaceAllString(activityjson.normalized, " ") activityjson.normalized = spaceReg.ReplaceAllString(activityjson.normalized, " ")
_, err = pool.Exec(context.Background(), "INSERT INTO activities (id, inreplyto, published, summary, content, normalized, attributedto, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8)", activityjson.ID, activityjson.InReplyTo, activityjson.Published, activityjson.Summary, activityjson.Content, activityjson.normalized, activityjson.AttributedTo, activityjson.instance) _, err = pool.Exec(context.Background(), "INSERT INTO activities (document, normalized, instance) VALUES($1, $2, $3)", jsondocument, activityjson.normalized, activityjson.instance)
if err != nil { if err != nil {
logDebug.Print(err) logDebug.Print(err)
return activityjson, err return activityjson, err
@ -169,8 +170,9 @@ func check_actor(uri string) (ActorJson, error) {
} }
} }
selectRet := pool.QueryRow(context.Background(), "SELECT id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance FROM actors WHERE id = $1", uri) var jsondocument string
err := selectRet.Scan(&actorjson.ID, &actorjson.Type, &actorjson.Inbox, &actorjson.Outbox, &actorjson.Followers, &actorjson.Following, &actorjson.Url, &actorjson.PreferredUsername, &actorjson.Name, &actorjson.Summary, &actorjson.Icon.Url, &actorjson.Image.Url, &actorjson.PublicKey.PublicKeyPem, &actorjson.instance) selectRet := pool.QueryRow(context.Background(), "SELECT document, instance FROM actors WHERE document->'id' = $1", uri)
err := selectRet.Scan(&actorjson.ID, &jsondocument, &actorjson.instance)
if err == nil { if err == nil {
return actorjson, nil return actorjson, nil
} }
@ -180,6 +182,7 @@ func check_actor(uri string) (ActorJson, error) {
} }
actorjson.instance = uri[8 : endslash+8] actorjson.instance = uri[8 : endslash+8]
logDebug.Print("CHECK: " + uri)
o, _ := GetRunner(actorjson.instance) o, _ := GetRunner(actorjson.instance)
req, _ := http.NewRequest("GET", uri, nil) req, _ := http.NewRequest("GET", uri, nil)
req.Header.Set("User-Agent", "Tusky") req.Header.Set("User-Agent", "Tusky")
@ -202,19 +205,23 @@ func check_actor(uri string) (ActorJson, error) {
break break
} }
err = json.NewDecoder(resp.Body).Decode(&actorjson) body, err := ioutil.ReadAll(resp.Body)
if err != nil {
return actorjson, errors.New("Read error on " + uri)
}
resp.Body.Close()
jsondocument = string(body)
//logDebug.Print(string(jsondocument))
err = json.Unmarshal(body, &actorjson)
if err != nil { if err != nil {
// Going forward, this might need to be double-checked, but for now just die
tries = tries + 1
return actorjson, err return actorjson, err
} }
io.Copy(ioutil.Discard, resp.Body) _, err = pool.Exec(context.Background(), "INSERT INTO actors (document, instance) VALUES($1, $2)", jsondocument, actorjson.instance)
resp.Body.Close()
_, err = pool.Exec(context.Background(), "INSERT INTO actors (id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)", actorjson.ID, actorjson.Type, actorjson.Inbox, actorjson.Outbox, actorjson.Followers, actorjson.Following, actorjson.Url, actorjson.PreferredUsername, actorjson.Name, actorjson.Summary, actorjson.Icon.Url, actorjson.Image.Url, actorjson.PublicKey.PublicKeyPem, actorjson.instance)
if err != nil { if err != nil {
logDebug.Print(err)
return actorjson, err return actorjson, err
} }

View File

@ -3,35 +3,23 @@ DROP TABLE IF EXISTS actors CASCADE;
DROP TABLE IF EXISTS instances CASCADE; DROP TABLE IF EXISTS instances CASCADE;
CREATE TABLE actors ( CREATE TABLE actors (
actor_type VARCHAR(1000) NOT NULL, id SERIAL PRIMARY KEY,
id VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE, document JSONB,
inbox VARCHAR(2083) NOT NULL,
outbox VARCHAR(2083) NOT NULL,
followers VARCHAR(2083) NOT NULL,
following VARCHAR(2083) NOT NULL,
url VARCHAR(2083) NOT NULL,
preferredusername VARCHAR(1000) NOT NULL,
name VARCHAR(1000) NOT NULL,
summary TEXT,
icon VARCHAR(2083),
image VARCHAR(2083),
publickey TEXT,
identifiedat TIMESTAMP with time zone DEFAULT now(), identifiedat TIMESTAMP with time zone DEFAULT now(),
instance VARCHAR(1000) NOT NULL instance VARCHAR(1000) NOT NULL
); );
CREATE UNIQUE INDEX actors_uri ON actors ( (document->'id') );
CREATE TABLE activities ( CREATE TABLE activities (
id VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE, id SERIAL PRIMARY KEY,
inreplyto VARCHAR(2083), document JSONB,
published TIMESTAMP with time zone NOT NULL,
summary TEXT,
content TEXT,
normalized TEXT, normalized TEXT,
attributedto VARCHAR(2083) REFERENCES actors,
received_at TIMESTAMP with time zone DEFAULT now(),
instance VARCHAR(1000) NOT NULL instance VARCHAR(1000) NOT NULL
); );
CREATE UNIQUE INDEX activities_uri ON activities ( (document->'id') );
CREATE TABLE instances ( CREATE TABLE instances (
endpoint VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE, endpoint VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE,
autostart BOOLEAN, autostart BOOLEAN,
@ -44,3 +32,8 @@ CREATE TABLE instances (
ALTER TABLE activities ADD COLUMN normalized_idx tsvector; ALTER TABLE activities ADD COLUMN normalized_idx tsvector;
UPDATE activities SET normalized_idx = to_tsvector('english', normalized); UPDATE activities SET normalized_idx = to_tsvector('english', normalized);
CREATE INDEX ON activities USING gin(normalized_idx); CREATE INDEX ON activities USING gin(normalized_idx);
CREATE INDEX actors_id_idx ON actors (id);
CREATE INDEX activities_id_idx ON activities (id);
CREATE INDEX actors_uri_idx ON actors ( (document->'id') );
CREATE INDEX activities_uri_idx ON activities ( (document->'id') );