migrating to storing data as jsonb object in database
captures all data, avoids cache-misses
This commit is contained in:
parent
3662535b0d
commit
f262de1dc3
39
retrieve.go
39
retrieve.go
@ -6,7 +6,6 @@ import (
|
||||
"errors"
|
||||
"html"
|
||||
"io/ioutil"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
@ -21,7 +20,6 @@ var re *regexp.Regexp
|
||||
var matchurl *regexp.Regexp
|
||||
|
||||
type ImageType struct {
|
||||
// Type string `json:"type"`
|
||||
Url string `json:"url"`
|
||||
}
|
||||
|
||||
@ -93,8 +91,10 @@ func check_activity(uri string) (PostJson, error) {
|
||||
return activityjson, errors.New("Recently requested within local cache")
|
||||
}
|
||||
|
||||
selectRet := pool.QueryRow(context.Background(), "SELECT id, inReplyTo, published, summary, content, normalized, attributedto, received_at FROM activities WHERE id = $1", uri)
|
||||
err := selectRet.Scan(&activityjson.ID, &activityjson.InReplyTo, &activityjson.Published, &activityjson.Summary, &activityjson.Content, &activityjson.normalized, &activityjson.AttributedTo, &activityjson.receivedAt)
|
||||
var jsondocument string
|
||||
|
||||
selectRet := pool.QueryRow(context.Background(), "SELECT document, normalized FROM activities WHERE document->'id' = $1", uri)
|
||||
err := selectRet.Scan(&activityjson.ID, &jsondocument, &activityjson.normalized)
|
||||
if err == nil {
|
||||
return activityjson, nil
|
||||
}
|
||||
@ -114,6 +114,8 @@ func check_activity(uri string) (PostJson, error) {
|
||||
}
|
||||
resp.Body.Close()
|
||||
|
||||
jsondocument = string(body)
|
||||
|
||||
err = json.Unmarshal(body, &activityjson)
|
||||
if err != nil {
|
||||
return activityjson, err
|
||||
@ -127,7 +129,6 @@ func check_activity(uri string) (PostJson, error) {
|
||||
|
||||
// If AttributedTo is blank, this is likely an authentication failure
|
||||
// For now, skip it...
|
||||
|
||||
if activityjson.AttributedTo == "" {
|
||||
return activityjson, errors.New("Invalid AttributedTo value on " + uri)
|
||||
}
|
||||
@ -142,7 +143,7 @@ func check_activity(uri string) (PostJson, error) {
|
||||
activityjson.normalized = matchurl.ReplaceAllString(activityjson.normalized, "")
|
||||
activityjson.normalized = spaceReg.ReplaceAllString(activityjson.normalized, " ")
|
||||
|
||||
_, err = pool.Exec(context.Background(), "INSERT INTO activities (id, inreplyto, published, summary, content, normalized, attributedto, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8)", activityjson.ID, activityjson.InReplyTo, activityjson.Published, activityjson.Summary, activityjson.Content, activityjson.normalized, activityjson.AttributedTo, activityjson.instance)
|
||||
_, err = pool.Exec(context.Background(), "INSERT INTO activities (document, normalized, instance) VALUES($1, $2, $3)", jsondocument, activityjson.normalized, activityjson.instance)
|
||||
if err != nil {
|
||||
logDebug.Print(err)
|
||||
return activityjson, err
|
||||
@ -169,8 +170,9 @@ func check_actor(uri string) (ActorJson, error) {
|
||||
}
|
||||
}
|
||||
|
||||
selectRet := pool.QueryRow(context.Background(), "SELECT id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance FROM actors WHERE id = $1", uri)
|
||||
err := selectRet.Scan(&actorjson.ID, &actorjson.Type, &actorjson.Inbox, &actorjson.Outbox, &actorjson.Followers, &actorjson.Following, &actorjson.Url, &actorjson.PreferredUsername, &actorjson.Name, &actorjson.Summary, &actorjson.Icon.Url, &actorjson.Image.Url, &actorjson.PublicKey.PublicKeyPem, &actorjson.instance)
|
||||
var jsondocument string
|
||||
selectRet := pool.QueryRow(context.Background(), "SELECT document, instance FROM actors WHERE document->'id' = $1", uri)
|
||||
err := selectRet.Scan(&actorjson.ID, &jsondocument, &actorjson.instance)
|
||||
if err == nil {
|
||||
return actorjson, nil
|
||||
}
|
||||
@ -180,6 +182,7 @@ func check_actor(uri string) (ActorJson, error) {
|
||||
}
|
||||
actorjson.instance = uri[8 : endslash+8]
|
||||
|
||||
logDebug.Print("CHECK: " + uri)
|
||||
o, _ := GetRunner(actorjson.instance)
|
||||
req, _ := http.NewRequest("GET", uri, nil)
|
||||
req.Header.Set("User-Agent", "Tusky")
|
||||
@ -202,19 +205,23 @@ func check_actor(uri string) (ActorJson, error) {
|
||||
break
|
||||
}
|
||||
|
||||
err = json.NewDecoder(resp.Body).Decode(&actorjson)
|
||||
body, err := ioutil.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return actorjson, errors.New("Read error on " + uri)
|
||||
}
|
||||
resp.Body.Close()
|
||||
|
||||
jsondocument = string(body)
|
||||
//logDebug.Print(string(jsondocument))
|
||||
|
||||
err = json.Unmarshal(body, &actorjson)
|
||||
if err != nil {
|
||||
// Going forward, this might need to be double-checked, but for now just die
|
||||
tries = tries + 1
|
||||
return actorjson, err
|
||||
}
|
||||
|
||||
io.Copy(ioutil.Discard, resp.Body)
|
||||
|
||||
resp.Body.Close()
|
||||
|
||||
_, err = pool.Exec(context.Background(), "INSERT INTO actors (id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)", actorjson.ID, actorjson.Type, actorjson.Inbox, actorjson.Outbox, actorjson.Followers, actorjson.Following, actorjson.Url, actorjson.PreferredUsername, actorjson.Name, actorjson.Summary, actorjson.Icon.Url, actorjson.Image.Url, actorjson.PublicKey.PublicKeyPem, actorjson.instance)
|
||||
_, err = pool.Exec(context.Background(), "INSERT INTO actors (document, instance) VALUES($1, $2)", jsondocument, actorjson.instance)
|
||||
if err != nil {
|
||||
logDebug.Print(err)
|
||||
return actorjson, err
|
||||
}
|
||||
|
||||
|
33
tables.sql
33
tables.sql
@ -3,35 +3,23 @@ DROP TABLE IF EXISTS actors CASCADE;
|
||||
DROP TABLE IF EXISTS instances CASCADE;
|
||||
|
||||
CREATE TABLE actors (
|
||||
actor_type VARCHAR(1000) NOT NULL,
|
||||
id VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE,
|
||||
inbox VARCHAR(2083) NOT NULL,
|
||||
outbox VARCHAR(2083) NOT NULL,
|
||||
followers VARCHAR(2083) NOT NULL,
|
||||
following VARCHAR(2083) NOT NULL,
|
||||
url VARCHAR(2083) NOT NULL,
|
||||
preferredusername VARCHAR(1000) NOT NULL,
|
||||
name VARCHAR(1000) NOT NULL,
|
||||
summary TEXT,
|
||||
icon VARCHAR(2083),
|
||||
image VARCHAR(2083),
|
||||
publickey TEXT,
|
||||
id SERIAL PRIMARY KEY,
|
||||
document JSONB,
|
||||
identifiedat TIMESTAMP with time zone DEFAULT now(),
|
||||
instance VARCHAR(1000) NOT NULL
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX actors_uri ON actors ( (document->'id') );
|
||||
|
||||
CREATE TABLE activities (
|
||||
id VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE,
|
||||
inreplyto VARCHAR(2083),
|
||||
published TIMESTAMP with time zone NOT NULL,
|
||||
summary TEXT,
|
||||
content TEXT,
|
||||
id SERIAL PRIMARY KEY,
|
||||
document JSONB,
|
||||
normalized TEXT,
|
||||
attributedto VARCHAR(2083) REFERENCES actors,
|
||||
received_at TIMESTAMP with time zone DEFAULT now(),
|
||||
instance VARCHAR(1000) NOT NULL
|
||||
);
|
||||
|
||||
CREATE UNIQUE INDEX activities_uri ON activities ( (document->'id') );
|
||||
|
||||
CREATE TABLE instances (
|
||||
endpoint VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE,
|
||||
autostart BOOLEAN,
|
||||
@ -44,3 +32,8 @@ CREATE TABLE instances (
|
||||
ALTER TABLE activities ADD COLUMN normalized_idx tsvector;
|
||||
UPDATE activities SET normalized_idx = to_tsvector('english', normalized);
|
||||
CREATE INDEX ON activities USING gin(normalized_idx);
|
||||
|
||||
CREATE INDEX actors_id_idx ON actors (id);
|
||||
CREATE INDEX activities_id_idx ON activities (id);
|
||||
CREATE INDEX actors_uri_idx ON actors ( (document->'id') );
|
||||
CREATE INDEX activities_uri_idx ON activities ( (document->'id') );
|
||||
|
Loading…
x
Reference in New Issue
Block a user