Merge branch 'resttrendingwords' into 'master'

Updating tables, adding pre-populated data to database

See merge request khanzf/fedilogue!18
This commit is contained in:
Fikrān Mutasā'il 2021-12-20 04:43:46 +00:00
commit 12447ab3fa
2 changed files with 769 additions and 23 deletions

View File

@ -1,34 +1,49 @@
CREATE TABLE IF NOT EXISTS actors ( CREATE TABLE IF NOT EXISTS actors (
id SERIAL PRIMARY KEY, id SERIAL PRIMARY KEY,
document JSONB, document JSONB,
identifiedat TIMESTAMP with time zone DEFAULT now(), identifiedat TIMESTAMP with time zone DEFAULT now(),
instance VARCHAR(1000) NOT NULL, instance VARCHAR(1000) NOT NULL,
bot BOOL DEFAULT FALSE bot BOOLEAN DEFAULT FALSE
); );
CREATE TABLE IF NOT EXISTS activities ( CREATE TABLE IF NOT EXISTS activities (
id SERIAL PRIMARY KEY, id SERIAL PRIMARY KEY,
document JSONB, document JSONB,
normalized TEXT, normalized TEXT,
identifiedat TIMESTAMP with time zone DEFAULT now(), identifiedat TIMESTAMP with time zone DEFAULT now(),
instance VARCHAR(1000) NOT NULL, instance VARCHAR(1000) NOT NULL,
hashtags VARCHAR(140)[] hashtags VARCHAR(140)[]
); );
CREATE TABLE IF NOT EXISTS instances ( CREATE TABLE IF NOT EXISTS instances (
endpoint VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE, endpoint VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE,
autostart BOOLEAN, state VARCHAR(16),
state VARCHAR(16), username VARCHAR(32),
username VARCHAR(32), password VARCHAR(32),
password VARCHAR(32), software VARCHAR(50),
software VARCHAR(50) banned BOOLEAN DEFAULT FALSE,
alwaysbot BOOLEAN DEFAULT FALSE
); );
ALTER TABLE activities -- Autostart mastodon.social
ADD normalized_tsvector tsvector INSERT INTO instances (endpoint) VALUES('mastodon.social');
GENERATED ALWAYS AS (to_tsvector('english', normalized)) STORED; -- Banned Instances
INSERT INTO instances (endpoint, banned) VALUES
('switter.at', true),
('xxxtumblr.org', true),
('sinblr.com', true),
('twitiverse.com', true),
('my.dirtyhobby.xyz', true),
('bae.st', true);
-- Alwaysbot instances
INSERT INTO instances (endpoint, alwaysbot) VALUES
('mstdn.foxfam.club', true),
('botsin.space', true),
('newsbots.eu', true);
ALTER TABLE activities
ADD normalized_tsvector tsvector
GENERATED ALWAYS AS (to_tsvector('english', normalized)) STORED;
CREATE UNIQUE INDEX IF NOT EXISTS actors_uri_idx ON actors ( (document->>'id') ); CREATE UNIQUE INDEX IF NOT EXISTS actors_uri_idx ON actors ( (document->>'id') );
CREATE UNIQUE INDEX IF NOT EXISTS activities_uri_idx ON activities ( (document->>'id') ); CREATE UNIQUE INDEX IF NOT EXISTS activities_uri_idx ON activities ( (document->>'id') );
@ -42,3 +57,674 @@ CREATE INDEX IF NOT EXISTS normalized_idx ON activities USING gin(normalized_tsv
CREATE INDEX IF NOT EXISTS actors_id_idx ON actors (id); CREATE INDEX IF NOT EXISTS actors_id_idx ON actors (id);
CREATE INDEX IF NOT EXISTS activities_id_idx ON activities (id); CREATE INDEX IF NOT EXISTS activities_id_idx ON activities (id);
CREATE TABLE IF NOT EXISTS stopwords (
word VARCHAR(20)
);
INSERT INTO stopwords (word)
VALUES
('a'),
('able'),
('about'),
('above'),
('abst'),
('accordance'),
('according'),
('accordingly'),
('across'),
('act'),
('actually'),
('added'),
('adj'),
('affected'),
('affecting'),
('affects'),
('after'),
('afterwards'),
('again'),
('against'),
('ah'),
('all'),
('almost'),
('alone'),
('along'),
('already'),
('also'),
('although'),
('always'),
('am'),
('among'),
('amongst'),
('an'),
('and'),
('announce'),
('another'),
('any'),
('anybody'),
('anyhow'),
('anymore'),
('anyone'),
('anything'),
('anyway'),
('anyways'),
('anywhere'),
('apparently'),
('approximately'),
('are'),
('aren'),
('arent'),
('arise'),
('around'),
('as'),
('aside'),
('ask'),
('asking'),
('at'),
('auth'),
('available'),
('away'),
('awfully'),
('b'),
('back'),
('be'),
('became'),
('because'),
('become'),
('becomes'),
('becoming'),
('been'),
('before'),
('beforehand'),
('begin'),
('beginning'),
('beginnings'),
('begins'),
('behind'),
('being'),
('believe'),
('below'),
('beside'),
('besides'),
('between'),
('beyond'),
('biol'),
('both'),
('brief'),
('briefly'),
('but'),
('by'),
('c'),
('ca'),
('came'),
('can'),
('cannot'),
('can''t'),
('cause'),
('causes'),
('certain'),
('certainly'),
('co'),
('com'),
('come'),
('comes'),
('contain'),
('containing'),
('contains'),
('could'),
('couldnt'),
('d'),
('date'),
('did'),
('didn''t'),
('different'),
('do'),
('does'),
('doesn''t'),
('doing'),
('done'),
('don''t'),
('down'),
('downwards'),
('due'),
('during'),
('e'),
('each'),
('ed'),
('edu'),
('effect'),
('eg'),
('eight'),
('eighty'),
('either'),
('else'),
('elsewhere'),
('end'),
('ending'),
('enough'),
('especially'),
('et'),
('et-al'),
('etc'),
('even'),
('ever'),
('every'),
('everybody'),
('everyone'),
('everything'),
('everywhere'),
('ex'),
('except'),
('f'),
('far'),
('few'),
('ff'),
('fifth'),
('first'),
('five'),
('fix'),
('followed'),
('following'),
('follows'),
('for'),
('former'),
('formerly'),
('forth'),
('found'),
('four'),
('from'),
('further'),
('furthermore'),
('g'),
('gave'),
('get'),
('gets'),
('getting'),
('give'),
('given'),
('gives'),
('giving'),
('go'),
('goes'),
('gone'),
('got'),
('gotten'),
('h'),
('had'),
('happens'),
('hardly'),
('has'),
('hasn''t'),
('have'),
('haven''t'),
('having'),
('he'),
('hed'),
('hence'),
('her'),
('here'),
('hereafter'),
('hereby'),
('herein'),
('heres'),
('hereupon'),
('hers'),
('herself'),
('hes'),
('hi'),
('hid'),
('him'),
('himself'),
('his'),
('hither'),
('home'),
('how'),
('howbeit'),
('however'),
('hundred'),
('i'),
('id'),
('ie'),
('if'),
('i''ll'),
('im'),
('immediate'),
('immediately'),
('importance'),
('important'),
('in'),
('inc'),
('indeed'),
('index'),
('information'),
('instead'),
('into'),
('invention'),
('inward'),
('is'),
('isn''t'),
('it'),
('itd'),
('it''ll'),
('its'),
('itself'),
('i''ve'),
('j'),
('just'),
('k'),
('keep keeps'),
('kept'),
('kg'),
('km'),
('know'),
('known'),
('knows'),
('l'),
('largely'),
('last'),
('lately'),
('later'),
('latter'),
('latterly'),
('least'),
('less'),
('lest'),
('let'),
('lets'),
('like'),
('liked'),
('likely'),
('line'),
('little'),
('''ll'),
('look'),
('looking'),
('looks'),
('ltd'),
('m'),
('made'),
('mainly'),
('make'),
('makes'),
('many'),
('may'),
('maybe'),
('me'),
('mean'),
('means'),
('meantime'),
('meanwhile'),
('merely'),
('mg'),
('might'),
('million'),
('miss'),
('ml'),
('more'),
('moreover'),
('most'),
('mostly'),
('mr'),
('mrs'),
('much'),
('mug'),
('must'),
('my'),
('myself'),
('n'),
('na'),
('name'),
('namely'),
('nay'),
('nd'),
('near'),
('nearly'),
('necessarily'),
('necessary'),
('need'),
('needs'),
('neither'),
('never'),
('nevertheless'),
('new'),
('next'),
('nine'),
('ninety'),
('no'),
('nobody'),
('non'),
('none'),
('nonetheless'),
('noone'),
('nor'),
('normally'),
('nos'),
('not'),
('noted'),
('nothing'),
('now'),
('nowhere'),
('o'),
('obtain'),
('obtained'),
('obviously'),
('of'),
('off'),
('often'),
('oh'),
('ok'),
('okay'),
('old'),
('omitted'),
('on'),
('once'),
('one'),
('ones'),
('only'),
('onto'),
('or'),
('ord'),
('other'),
('others'),
('otherwise'),
('ought'),
('our'),
('ours'),
('ourselves'),
('out'),
('outside'),
('over'),
('overall'),
('owing'),
('own'),
('p'),
('page'),
('pages'),
('part'),
('particular'),
('particularly'),
('past'),
('per'),
('perhaps'),
('placed'),
('please'),
('plus'),
('poorly'),
('possible'),
('possibly'),
('potentially'),
('pp'),
('predominantly'),
('present'),
('previously'),
('primarily'),
('probably'),
('promptly'),
('proud'),
('provides'),
('put'),
('q'),
('que'),
('quickly'),
('quite'),
('qv'),
('r'),
('ran'),
('rather'),
('rd'),
('re'),
('readily'),
('really'),
('recent'),
('recently'),
('ref'),
('refs'),
('regarding'),
('regardless'),
('regards'),
('related'),
('relatively'),
('research'),
('respectively'),
('resulted'),
('resulting'),
('results'),
('right'),
('run'),
('s'),
('said'),
('same'),
('saw'),
('say'),
('saying'),
('says'),
('sec'),
('section'),
('see'),
('seeing'),
('seem'),
('seemed'),
('seeming'),
('seems'),
('seen'),
('self'),
('selves'),
('sent'),
('seven'),
('several'),
('shall'),
('she'),
('shed'),
('she''ll'),
('shes'),
('should'),
('shouldn''t'),
('show'),
('showed'),
('shown'),
('showns'),
('shows'),
('significant'),
('significantly'),
('similar'),
('similarly'),
('since'),
('six'),
('slightly'),
('so'),
('some'),
('somebody'),
('somehow'),
('someone'),
('somethan'),
('something'),
('sometime'),
('sometimes'),
('somewhat'),
('somewhere'),
('soon'),
('sorry'),
('specifically'),
('specified'),
('specify'),
('specifying'),
('still'),
('stop'),
('strongly'),
('sub'),
('substantially'),
('successfully'),
('such'),
('sufficiently'),
('suggest'),
('sup'),
('sure t'),
('take'),
('taken'),
('taking'),
('tell'),
('tends'),
('th'),
('than'),
('thank'),
('thanks'),
('thanx'),
('that'),
('that''ll'),
('thats'),
('that''ve'),
('the'),
('their'),
('theirs'),
('them'),
('themselves'),
('then'),
('thence'),
('there'),
('thereafter'),
('thereby'),
('thered'),
('therefore'),
('therein'),
('there''ll'),
('thereof'),
('therere'),
('theres'),
('thereto'),
('thereupon'),
('there''ve'),
('these'),
('they'),
('theyd'),
('they''ll'),
('theyre'),
('they''ve'),
('think'),
('this'),
('those'),
('thou'),
('though'),
('thoughh'),
('thousand'),
('throug'),
('through'),
('throughout'),
('thru'),
('thus'),
('til'),
('tip'),
('to'),
('together'),
('too'),
('took'),
('toward'),
('towards'),
('tried'),
('tries'),
('truly'),
('try'),
('trying'),
('ts'),
('twice'),
('two'),
('u'),
('un'),
('under'),
('unfortunately'),
('unless'),
('unlike'),
('unlikely'),
('until'),
('unto'),
('up'),
('upon'),
('ups'),
('us'),
('use'),
('used'),
('useful'),
('usefully'),
('usefulness'),
('uses'),
('using'),
('usually'),
('v'),
('value'),
('various'),
('''ve'),
('very'),
('via'),
('viz'),
('vol'),
('vols'),
('vs'),
('w'),
('want'),
('wants'),
('was'),
('wasnt'),
('way'),
('we'),
('wed'),
('welcome'),
('we''ll'),
('went'),
('were'),
('werent'),
('we''ve'),
('what'),
('whatever'),
('what''ll'),
('whats'),
('when'),
('whence'),
('whenever'),
('where'),
('whereafter'),
('whereas'),
('whereby'),
('wherein'),
('wheres'),
('whereupon'),
('wherever'),
('whether'),
('which'),
('while'),
('whim'),
('whither'),
('who'),
('whod'),
('whoever'),
('whole'),
('who''ll'),
('whom'),
('whomever'),
('whos'),
('whose'),
('why'),
('widely'),
('willing'),
('wish'),
('with'),
('within'),
('without'),
('wont'),
('words'),
('world'),
('would'),
('wouldnt'),
('www'),
('x'),
('y'),
('yes'),
('yet'),
('you'),
('youd'),
('you''ll'),
('your'),
('youre'),
('yours'),
('yourself'),
('yourselves'),
('you''ve'),
('z'),
('zero');

View File

@ -12,26 +12,30 @@ import (
"github.com/jackc/pgx/v4" "github.com/jackc/pgx/v4"
) )
var trendsText string var metricsText string
func enableCors(w *http.ResponseWriter) { func enableCors(w *http.ResponseWriter) {
(*w).Header().Set("Access-Control-Allow-Origin", "*") (*w).Header().Set("Access-Control-Allow-Origin", "*")
} }
func runMetrics() { func runMetrics() {
hashtagtotal := runTrendingMetrics() hashtagtotal := runTrendingHashtags()
wordstotal := runTrendingWords()
totalJson := make(map[string]interface{}) totalJson := make(map[string]interface{})
totalJson["hashtags"] = hashtagtotal totalJson["hashtags"] = hashtagtotal
totalJson["words"] = wordstotal
totalJson["datetime"] = time.Now().UTC()
data, err := json.Marshal(totalJson) data, err := json.Marshal(totalJson)
if err != nil { if err != nil {
log.Fatalf("error marshaling combined activity: %v\n", err) log.Fatalf("error marshaling combined activity: %v\n", err)
} }
trendsText = string(data) metricsText = string(data)
} }
func runTrendingMetrics() map[string]interface{} { func runTrendingHashtags() map[string]interface{} {
sql := `SELECT UNNEST(activities.hashtags) as hashtags, count(actors.id) sql := `SELECT UNNEST(activities.hashtags) as hashtags, count(actors.id)
from activities from activities
LEFT JOIN actors ON activities.document->>'attributedTo'=actors.document->>'id' LEFT JOIN actors ON activities.document->>'attributedTo'=actors.document->>'id'
@ -65,16 +69,70 @@ GROUP BY hashtags ORDER BY count DESC LIMIT 20;`
hashtagtotal := make(map[string]interface{}); hashtagtotal := make(map[string]interface{});
hashtagtotal["count"] = hashcount hashtagtotal["count"] = hashcount
hashtagtotal["datetime"] = time.Now().UTC()
hashtagtotal["items"] = hashtagitems hashtagtotal["items"] = hashtagitems
return hashtagtotal return hashtagtotal
} }
func runTrendingWords() map[string]interface{} {
sql := `WITH popular_words AS (
select word FROM ts_stat(
'
SELECT to_tsvector(''simple'', normalized) FROM activities
LEFT JOIN actors ON activities.document->>''attributedTo''=actors.document->>''id''
WHERE activities.identifiedat > current_timestamp - interval ''60 minutes''
AND actors.bot=false
'
)
WHERE length(word) > 3
AND NOT word in (SELECT word FROM stopwords)
ORDER BY ndoc DESC LIMIT 100)
SELECT concat_ws(' ', a1.word, a2.word) phrase, count(*)
FROM popular_words AS a1
CROSS JOIN popular_words AS a2
CROSS JOIN activities
WHERE normalized ilike format('%%%s %s%%', a1.word, a2.word)
AND identifiedat > current_timestamp - interval '60 minutes'
GROUP BY 1
HAVING count(*) > 1
ORDER BY 2 DESC LIMIT 20;
`
rows, err := pool.Query(context.Background(), sql)
if err != nil {
panic(err)
}
trendingitems := make([]interface{}, 0);
trendingcount := 0
for rows.Next() {
var trendingword string
var count int
err = rows.Scan(&trendingword, &count)
if err != nil {
panic(err)
}
trendingitem := make(map[string]interface{})
trendingitem["trending"] = trendingword
trendingitem["count"] = count
trendingitems = append(trendingitems, trendingitem)
trendingcount = trendingcount + 1
}
rows.Close()
trendingwordtotal := make(map[string]interface{});
trendingwordtotal["count"] = trendingcount
trendingwordtotal["items"] = trendingitems
return trendingwordtotal
}
// GET handlers // GET handlers
func getTrending(w http.ResponseWriter, r *http.Request) { func getTrending(w http.ResponseWriter, r *http.Request) {
enableCors(&w) enableCors(&w)
fmt.Fprintf(w, "%s", trendsText) fmt.Fprintf(w, "%s", metricsText)
} }
func getSearch(w http.ResponseWriter, r *http.Request) { func getSearch(w http.ResponseWriter, r *http.Request) {
@ -159,10 +217,12 @@ func getSearch(w http.ResponseWriter, r *http.Request) {
func main() { func main() {
pool = getDbPool() pool = getDbPool()
metricsText = "[]"
go func() { go func() {
for { for {
runMetrics() runMetrics()
time.Sleep(30 * time.Second) time.Sleep(10 * time.Minute)
} }
}() }()