diff --git a/retrieve.go b/retrieve.go index 37f315f..4c1fa91 100644 --- a/retrieve.go +++ b/retrieve.go @@ -6,7 +6,6 @@ import ( "errors" "html" "io/ioutil" - "io" "net/http" "strings" "time" @@ -21,7 +20,6 @@ var re *regexp.Regexp var matchurl *regexp.Regexp type ImageType struct { - // Type string `json:"type"` Url string `json:"url"` } @@ -93,8 +91,10 @@ func check_activity(uri string) (PostJson, error) { return activityjson, errors.New("Recently requested within local cache") } - selectRet := pool.QueryRow(context.Background(), "SELECT id, inReplyTo, published, summary, content, normalized, attributedto, received_at FROM activities WHERE id = $1", uri) - err := selectRet.Scan(&activityjson.ID, &activityjson.InReplyTo, &activityjson.Published, &activityjson.Summary, &activityjson.Content, &activityjson.normalized, &activityjson.AttributedTo, &activityjson.receivedAt) + var jsondocument string + + selectRet := pool.QueryRow(context.Background(), "SELECT document, normalized FROM activities WHERE document->'id' = $1", uri) + err := selectRet.Scan(&activityjson.ID, &jsondocument, &activityjson.normalized) if err == nil { return activityjson, nil } @@ -114,6 +114,8 @@ func check_activity(uri string) (PostJson, error) { } resp.Body.Close() + jsondocument = string(body) + err = json.Unmarshal(body, &activityjson) if err != nil { return activityjson, err @@ -127,7 +129,6 @@ func check_activity(uri string) (PostJson, error) { // If AttributedTo is blank, this is likely an authentication failure // For now, skip it... - if activityjson.AttributedTo == "" { return activityjson, errors.New("Invalid AttributedTo value on " + uri) } @@ -142,7 +143,7 @@ func check_activity(uri string) (PostJson, error) { activityjson.normalized = matchurl.ReplaceAllString(activityjson.normalized, "") activityjson.normalized = spaceReg.ReplaceAllString(activityjson.normalized, " ") - _, err = pool.Exec(context.Background(), "INSERT INTO activities (id, inreplyto, published, summary, content, normalized, attributedto, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8)", activityjson.ID, activityjson.InReplyTo, activityjson.Published, activityjson.Summary, activityjson.Content, activityjson.normalized, activityjson.AttributedTo, activityjson.instance) + _, err = pool.Exec(context.Background(), "INSERT INTO activities (document, normalized, instance) VALUES($1, $2, $3)", jsondocument, activityjson.normalized, activityjson.instance) if err != nil { logDebug.Print(err) return activityjson, err @@ -169,8 +170,9 @@ func check_actor(uri string) (ActorJson, error) { } } - selectRet := pool.QueryRow(context.Background(), "SELECT id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance FROM actors WHERE id = $1", uri) - err := selectRet.Scan(&actorjson.ID, &actorjson.Type, &actorjson.Inbox, &actorjson.Outbox, &actorjson.Followers, &actorjson.Following, &actorjson.Url, &actorjson.PreferredUsername, &actorjson.Name, &actorjson.Summary, &actorjson.Icon.Url, &actorjson.Image.Url, &actorjson.PublicKey.PublicKeyPem, &actorjson.instance) + var jsondocument string + selectRet := pool.QueryRow(context.Background(), "SELECT document, instance FROM actors WHERE document->'id' = $1", uri) + err := selectRet.Scan(&actorjson.ID, &jsondocument, &actorjson.instance) if err == nil { return actorjson, nil } @@ -180,6 +182,7 @@ func check_actor(uri string) (ActorJson, error) { } actorjson.instance = uri[8 : endslash+8] +logDebug.Print("CHECK: " + uri) o, _ := GetRunner(actorjson.instance) req, _ := http.NewRequest("GET", uri, nil) req.Header.Set("User-Agent", "Tusky") @@ -202,19 +205,23 @@ func check_actor(uri string) (ActorJson, error) { break } - err = json.NewDecoder(resp.Body).Decode(&actorjson) + body, err := ioutil.ReadAll(resp.Body) + if err != nil { + return actorjson, errors.New("Read error on " + uri) + } + resp.Body.Close() + + jsondocument = string(body) + //logDebug.Print(string(jsondocument)) + + err = json.Unmarshal(body, &actorjson) if err != nil { - // Going forward, this might need to be double-checked, but for now just die - tries = tries + 1 return actorjson, err } - io.Copy(ioutil.Discard, resp.Body) - - resp.Body.Close() - - _, err = pool.Exec(context.Background(), "INSERT INTO actors (id, actor_type, inbox, outbox, followers, following, url, preferredUsername, name, summary, icon, image, publicKey, instance) VALUES($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12, $13, $14)", actorjson.ID, actorjson.Type, actorjson.Inbox, actorjson.Outbox, actorjson.Followers, actorjson.Following, actorjson.Url, actorjson.PreferredUsername, actorjson.Name, actorjson.Summary, actorjson.Icon.Url, actorjson.Image.Url, actorjson.PublicKey.PublicKeyPem, actorjson.instance) + _, err = pool.Exec(context.Background(), "INSERT INTO actors (document, instance) VALUES($1, $2)", jsondocument, actorjson.instance) if err != nil { + logDebug.Print(err) return actorjson, err } diff --git a/tables.sql b/tables.sql index 20f4a69..00226b8 100644 --- a/tables.sql +++ b/tables.sql @@ -3,35 +3,23 @@ DROP TABLE IF EXISTS actors CASCADE; DROP TABLE IF EXISTS instances CASCADE; CREATE TABLE actors ( - actor_type VARCHAR(1000) NOT NULL, - id VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE, - inbox VARCHAR(2083) NOT NULL, - outbox VARCHAR(2083) NOT NULL, - followers VARCHAR(2083) NOT NULL, - following VARCHAR(2083) NOT NULL, - url VARCHAR(2083) NOT NULL, - preferredusername VARCHAR(1000) NOT NULL, - name VARCHAR(1000) NOT NULL, - summary TEXT, - icon VARCHAR(2083), - image VARCHAR(2083), - publickey TEXT, + id SERIAL PRIMARY KEY, + document JSONB, identifiedat TIMESTAMP with time zone DEFAULT now(), instance VARCHAR(1000) NOT NULL ); +CREATE UNIQUE INDEX actors_uri ON actors ( (document->'id') ); + CREATE TABLE activities ( - id VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE, - inreplyto VARCHAR(2083), - published TIMESTAMP with time zone NOT NULL, - summary TEXT, - content TEXT, + id SERIAL PRIMARY KEY, + document JSONB, normalized TEXT, - attributedto VARCHAR(2083) REFERENCES actors, - received_at TIMESTAMP with time zone DEFAULT now(), instance VARCHAR(1000) NOT NULL ); +CREATE UNIQUE INDEX activities_uri ON activities ( (document->'id') ); + CREATE TABLE instances ( endpoint VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE, autostart BOOLEAN, @@ -44,3 +32,8 @@ CREATE TABLE instances ( ALTER TABLE activities ADD COLUMN normalized_idx tsvector; UPDATE activities SET normalized_idx = to_tsvector('english', normalized); CREATE INDEX ON activities USING gin(normalized_idx); + +CREATE INDEX actors_id_idx ON actors (id); +CREATE INDEX activities_id_idx ON activities (id); +CREATE INDEX actors_uri_idx ON actors ( (document->'id') ); +CREATE INDEX activities_uri_idx ON activities ( (document->'id') );