diff --git a/fedilogue/retrieve.go b/fedilogue/retrieve.go index a452aea..04f9220 100644 --- a/fedilogue/retrieve.go +++ b/fedilogue/retrieve.go @@ -3,7 +3,6 @@ package main import ( "context" "encoding/json" - //"errors" "html" "io/ioutil" "net/http" @@ -46,6 +45,12 @@ type ActorJson struct { instance string } +type TagType struct { + Type string `json:"type"` + Name string `json:"name"` +} + + type PostJson struct { id int uri string `json:"id"` @@ -59,7 +64,7 @@ type PostJson struct { Published time.Time `json:"published"` Source string `json:"source"` Summary string `json:"summary"` - // Ignoring tag for now + Tag []TagType `json:"tag"` To []string `json:"to"` Type string `json:"type"` @@ -144,7 +149,13 @@ func check_activity(uri string) { activityjson.normalized = matchurl.ReplaceAllString(activityjson.normalized, "") activityjson.normalized = spaceReg.ReplaceAllString(activityjson.normalized, " ") - _, err = pool.Exec(context.Background(), "INSERT INTO activities (document, normalized, instance) VALUES($1, $2, $3)", jsondocument, activityjson.normalized, activityjson.instance) + var hashtags []string + for _, tag := range activityjson.Tag { + if tag.Type == "Hashtag" { + hashtags = append(hashtags, strings.ToLower(tag.Name)) + } + } + _, err = pool.Exec(context.Background(), "INSERT INTO activities (document, normalized, instance, hashtags) VALUES($1, $2, $3, $4)", jsondocument, activityjson.normalized, activityjson.instance, hashtags) if err != nil { logWarn("Error inserting %s into `activities`: "+ uri, err) return diff --git a/fedilogue/tables.sql b/fedilogue/tables.sql index 68ff439..9fae4f8 100644 --- a/fedilogue/tables.sql +++ b/fedilogue/tables.sql @@ -10,7 +10,8 @@ CREATE TABLE IF NOT EXISTS activities ( document JSONB, normalized TEXT, identifiedat TIMESTAMP with time zone DEFAULT now(), - instance VARCHAR(1000) NOT NULL + instance VARCHAR(1000) NOT NULL, + hashtags VARCHAR(140)[] ); @@ -33,6 +34,7 @@ CREATE UNIQUE INDEX IF NOT EXISTS activities_uri_idx ON activities ( (document- CREATE INDEX IF NOT EXISTS activities_published_idx ON activities ( (document->>'published') ); CREATE INDEX IF NOT EXISTS activities_identifiedat_idx ON activities (identifiedat); +CREATE INDEX IF NOT EXISTS hashtags_idx ON activities(hashtags); CREATE INDEX IF NOT EXISTS normalized_idx ON activities USING gin(normalized_tsvector);