Merge branch 'resttrendingwords' into 'master'

Updating tables, adding pre-populated data to database

See merge request khanzf/fedilogue!18
This commit is contained in:
Fikrān Mutasā'il 2021-12-20 04:43:46 +00:00
commit 12447ab3fa
2 changed files with 769 additions and 23 deletions

View File

@ -3,7 +3,7 @@ CREATE TABLE IF NOT EXISTS actors (
document JSONB,
identifiedat TIMESTAMP with time zone DEFAULT now(),
instance VARCHAR(1000) NOT NULL,
bot BOOL DEFAULT FALSE
bot BOOLEAN DEFAULT FALSE
);
CREATE TABLE IF NOT EXISTS activities (
@ -15,21 +15,36 @@ CREATE TABLE IF NOT EXISTS activities (
hashtags VARCHAR(140)[]
);
CREATE TABLE IF NOT EXISTS instances (
endpoint VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE,
autostart BOOLEAN,
state VARCHAR(16),
username VARCHAR(32),
password VARCHAR(32),
software VARCHAR(50)
software VARCHAR(50),
banned BOOLEAN DEFAULT FALSE,
alwaysbot BOOLEAN DEFAULT FALSE
);
-- Autostart mastodon.social
INSERT INTO instances (endpoint) VALUES('mastodon.social');
-- Banned Instances
INSERT INTO instances (endpoint, banned) VALUES
('switter.at', true),
('xxxtumblr.org', true),
('sinblr.com', true),
('twitiverse.com', true),
('my.dirtyhobby.xyz', true),
('bae.st', true);
-- Alwaysbot instances
INSERT INTO instances (endpoint, alwaysbot) VALUES
('mstdn.foxfam.club', true),
('botsin.space', true),
('newsbots.eu', true);
ALTER TABLE activities
ADD normalized_tsvector tsvector
GENERATED ALWAYS AS (to_tsvector('english', normalized)) STORED;
CREATE UNIQUE INDEX IF NOT EXISTS actors_uri_idx ON actors ( (document->>'id') );
CREATE UNIQUE INDEX IF NOT EXISTS activities_uri_idx ON activities ( (document->>'id') );
@ -42,3 +57,674 @@ CREATE INDEX IF NOT EXISTS normalized_idx ON activities USING gin(normalized_tsv
CREATE INDEX IF NOT EXISTS actors_id_idx ON actors (id);
CREATE INDEX IF NOT EXISTS activities_id_idx ON activities (id);
CREATE TABLE IF NOT EXISTS stopwords (
word VARCHAR(20)
);
INSERT INTO stopwords (word)
VALUES
('a'),
('able'),
('about'),
('above'),
('abst'),
('accordance'),
('according'),
('accordingly'),
('across'),
('act'),
('actually'),
('added'),
('adj'),
('affected'),
('affecting'),
('affects'),
('after'),
('afterwards'),
('again'),
('against'),
('ah'),
('all'),
('almost'),
('alone'),
('along'),
('already'),
('also'),
('although'),
('always'),
('am'),
('among'),
('amongst'),
('an'),
('and'),
('announce'),
('another'),
('any'),
('anybody'),
('anyhow'),
('anymore'),
('anyone'),
('anything'),
('anyway'),
('anyways'),
('anywhere'),
('apparently'),
('approximately'),
('are'),
('aren'),
('arent'),
('arise'),
('around'),
('as'),
('aside'),
('ask'),
('asking'),
('at'),
('auth'),
('available'),
('away'),
('awfully'),
('b'),
('back'),
('be'),
('became'),
('because'),
('become'),
('becomes'),
('becoming'),
('been'),
('before'),
('beforehand'),
('begin'),
('beginning'),
('beginnings'),
('begins'),
('behind'),
('being'),
('believe'),
('below'),
('beside'),
('besides'),
('between'),
('beyond'),
('biol'),
('both'),
('brief'),
('briefly'),
('but'),
('by'),
('c'),
('ca'),
('came'),
('can'),
('cannot'),
('can''t'),
('cause'),
('causes'),
('certain'),
('certainly'),
('co'),
('com'),
('come'),
('comes'),
('contain'),
('containing'),
('contains'),
('could'),
('couldnt'),
('d'),
('date'),
('did'),
('didn''t'),
('different'),
('do'),
('does'),
('doesn''t'),
('doing'),
('done'),
('don''t'),
('down'),
('downwards'),
('due'),
('during'),
('e'),
('each'),
('ed'),
('edu'),
('effect'),
('eg'),
('eight'),
('eighty'),
('either'),
('else'),
('elsewhere'),
('end'),
('ending'),
('enough'),
('especially'),
('et'),
('et-al'),
('etc'),
('even'),
('ever'),
('every'),
('everybody'),
('everyone'),
('everything'),
('everywhere'),
('ex'),
('except'),
('f'),
('far'),
('few'),
('ff'),
('fifth'),
('first'),
('five'),
('fix'),
('followed'),
('following'),
('follows'),
('for'),
('former'),
('formerly'),
('forth'),
('found'),
('four'),
('from'),
('further'),
('furthermore'),
('g'),
('gave'),
('get'),
('gets'),
('getting'),
('give'),
('given'),
('gives'),
('giving'),
('go'),
('goes'),
('gone'),
('got'),
('gotten'),
('h'),
('had'),
('happens'),
('hardly'),
('has'),
('hasn''t'),
('have'),
('haven''t'),
('having'),
('he'),
('hed'),
('hence'),
('her'),
('here'),
('hereafter'),
('hereby'),
('herein'),
('heres'),
('hereupon'),
('hers'),
('herself'),
('hes'),
('hi'),
('hid'),
('him'),
('himself'),
('his'),
('hither'),
('home'),
('how'),
('howbeit'),
('however'),
('hundred'),
('i'),
('id'),
('ie'),
('if'),
('i''ll'),
('im'),
('immediate'),
('immediately'),
('importance'),
('important'),
('in'),
('inc'),
('indeed'),
('index'),
('information'),
('instead'),
('into'),
('invention'),
('inward'),
('is'),
('isn''t'),
('it'),
('itd'),
('it''ll'),
('its'),
('itself'),
('i''ve'),
('j'),
('just'),
('k'),
('keep keeps'),
('kept'),
('kg'),
('km'),
('know'),
('known'),
('knows'),
('l'),
('largely'),
('last'),
('lately'),
('later'),
('latter'),
('latterly'),
('least'),
('less'),
('lest'),
('let'),
('lets'),
('like'),
('liked'),
('likely'),
('line'),
('little'),
('''ll'),
('look'),
('looking'),
('looks'),
('ltd'),
('m'),
('made'),
('mainly'),
('make'),
('makes'),
('many'),
('may'),
('maybe'),
('me'),
('mean'),
('means'),
('meantime'),
('meanwhile'),
('merely'),
('mg'),
('might'),
('million'),
('miss'),
('ml'),
('more'),
('moreover'),
('most'),
('mostly'),
('mr'),
('mrs'),
('much'),
('mug'),
('must'),
('my'),
('myself'),
('n'),
('na'),
('name'),
('namely'),
('nay'),
('nd'),
('near'),
('nearly'),
('necessarily'),
('necessary'),
('need'),
('needs'),
('neither'),
('never'),
('nevertheless'),
('new'),
('next'),
('nine'),
('ninety'),
('no'),
('nobody'),
('non'),
('none'),
('nonetheless'),
('noone'),
('nor'),
('normally'),
('nos'),
('not'),
('noted'),
('nothing'),
('now'),
('nowhere'),
('o'),
('obtain'),
('obtained'),
('obviously'),
('of'),
('off'),
('often'),
('oh'),
('ok'),
('okay'),
('old'),
('omitted'),
('on'),
('once'),
('one'),
('ones'),
('only'),
('onto'),
('or'),
('ord'),
('other'),
('others'),
('otherwise'),
('ought'),
('our'),
('ours'),
('ourselves'),
('out'),
('outside'),
('over'),
('overall'),
('owing'),
('own'),
('p'),
('page'),
('pages'),
('part'),
('particular'),
('particularly'),
('past'),
('per'),
('perhaps'),
('placed'),
('please'),
('plus'),
('poorly'),
('possible'),
('possibly'),
('potentially'),
('pp'),
('predominantly'),
('present'),
('previously'),
('primarily'),
('probably'),
('promptly'),
('proud'),
('provides'),
('put'),
('q'),
('que'),
('quickly'),
('quite'),
('qv'),
('r'),
('ran'),
('rather'),
('rd'),
('re'),
('readily'),
('really'),
('recent'),
('recently'),
('ref'),
('refs'),
('regarding'),
('regardless'),
('regards'),
('related'),
('relatively'),
('research'),
('respectively'),
('resulted'),
('resulting'),
('results'),
('right'),
('run'),
('s'),
('said'),
('same'),
('saw'),
('say'),
('saying'),
('says'),
('sec'),
('section'),
('see'),
('seeing'),
('seem'),
('seemed'),
('seeming'),
('seems'),
('seen'),
('self'),
('selves'),
('sent'),
('seven'),
('several'),
('shall'),
('she'),
('shed'),
('she''ll'),
('shes'),
('should'),
('shouldn''t'),
('show'),
('showed'),
('shown'),
('showns'),
('shows'),
('significant'),
('significantly'),
('similar'),
('similarly'),
('since'),
('six'),
('slightly'),
('so'),
('some'),
('somebody'),
('somehow'),
('someone'),
('somethan'),
('something'),
('sometime'),
('sometimes'),
('somewhat'),
('somewhere'),
('soon'),
('sorry'),
('specifically'),
('specified'),
('specify'),
('specifying'),
('still'),
('stop'),
('strongly'),
('sub'),
('substantially'),
('successfully'),
('such'),
('sufficiently'),
('suggest'),
('sup'),
('sure t'),
('take'),
('taken'),
('taking'),
('tell'),
('tends'),
('th'),
('than'),
('thank'),
('thanks'),
('thanx'),
('that'),
('that''ll'),
('thats'),
('that''ve'),
('the'),
('their'),
('theirs'),
('them'),
('themselves'),
('then'),
('thence'),
('there'),
('thereafter'),
('thereby'),
('thered'),
('therefore'),
('therein'),
('there''ll'),
('thereof'),
('therere'),
('theres'),
('thereto'),
('thereupon'),
('there''ve'),
('these'),
('they'),
('theyd'),
('they''ll'),
('theyre'),
('they''ve'),
('think'),
('this'),
('those'),
('thou'),
('though'),
('thoughh'),
('thousand'),
('throug'),
('through'),
('throughout'),
('thru'),
('thus'),
('til'),
('tip'),
('to'),
('together'),
('too'),
('took'),
('toward'),
('towards'),
('tried'),
('tries'),
('truly'),
('try'),
('trying'),
('ts'),
('twice'),
('two'),
('u'),
('un'),
('under'),
('unfortunately'),
('unless'),
('unlike'),
('unlikely'),
('until'),
('unto'),
('up'),
('upon'),
('ups'),
('us'),
('use'),
('used'),
('useful'),
('usefully'),
('usefulness'),
('uses'),
('using'),
('usually'),
('v'),
('value'),
('various'),
('''ve'),
('very'),
('via'),
('viz'),
('vol'),
('vols'),
('vs'),
('w'),
('want'),
('wants'),
('was'),
('wasnt'),
('way'),
('we'),
('wed'),
('welcome'),
('we''ll'),
('went'),
('were'),
('werent'),
('we''ve'),
('what'),
('whatever'),
('what''ll'),
('whats'),
('when'),
('whence'),
('whenever'),
('where'),
('whereafter'),
('whereas'),
('whereby'),
('wherein'),
('wheres'),
('whereupon'),
('wherever'),
('whether'),
('which'),
('while'),
('whim'),
('whither'),
('who'),
('whod'),
('whoever'),
('whole'),
('who''ll'),
('whom'),
('whomever'),
('whos'),
('whose'),
('why'),
('widely'),
('willing'),
('wish'),
('with'),
('within'),
('without'),
('wont'),
('words'),
('world'),
('would'),
('wouldnt'),
('www'),
('x'),
('y'),
('yes'),
('yet'),
('you'),
('youd'),
('you''ll'),
('your'),
('youre'),
('yours'),
('yourself'),
('yourselves'),
('you''ve'),
('z'),
('zero');

View File

@ -12,26 +12,30 @@ import (
"github.com/jackc/pgx/v4"
)
var trendsText string
var metricsText string
func enableCors(w *http.ResponseWriter) {
(*w).Header().Set("Access-Control-Allow-Origin", "*")
}
func runMetrics() {
hashtagtotal := runTrendingMetrics()
hashtagtotal := runTrendingHashtags()
wordstotal := runTrendingWords()
totalJson := make(map[string]interface{})
totalJson["hashtags"] = hashtagtotal
totalJson["words"] = wordstotal
totalJson["datetime"] = time.Now().UTC()
data, err := json.Marshal(totalJson)
if err != nil {
log.Fatalf("error marshaling combined activity: %v\n", err)
}
trendsText = string(data)
metricsText = string(data)
}
func runTrendingMetrics() map[string]interface{} {
func runTrendingHashtags() map[string]interface{} {
sql := `SELECT UNNEST(activities.hashtags) as hashtags, count(actors.id)
from activities
LEFT JOIN actors ON activities.document->>'attributedTo'=actors.document->>'id'
@ -65,16 +69,70 @@ GROUP BY hashtags ORDER BY count DESC LIMIT 20;`
hashtagtotal := make(map[string]interface{});
hashtagtotal["count"] = hashcount
hashtagtotal["datetime"] = time.Now().UTC()
hashtagtotal["items"] = hashtagitems
return hashtagtotal
}
func runTrendingWords() map[string]interface{} {
sql := `WITH popular_words AS (
select word FROM ts_stat(
'
SELECT to_tsvector(''simple'', normalized) FROM activities
LEFT JOIN actors ON activities.document->>''attributedTo''=actors.document->>''id''
WHERE activities.identifiedat > current_timestamp - interval ''60 minutes''
AND actors.bot=false
'
)
WHERE length(word) > 3
AND NOT word in (SELECT word FROM stopwords)
ORDER BY ndoc DESC LIMIT 100)
SELECT concat_ws(' ', a1.word, a2.word) phrase, count(*)
FROM popular_words AS a1
CROSS JOIN popular_words AS a2
CROSS JOIN activities
WHERE normalized ilike format('%%%s %s%%', a1.word, a2.word)
AND identifiedat > current_timestamp - interval '60 minutes'
GROUP BY 1
HAVING count(*) > 1
ORDER BY 2 DESC LIMIT 20;
`
rows, err := pool.Query(context.Background(), sql)
if err != nil {
panic(err)
}
trendingitems := make([]interface{}, 0);
trendingcount := 0
for rows.Next() {
var trendingword string
var count int
err = rows.Scan(&trendingword, &count)
if err != nil {
panic(err)
}
trendingitem := make(map[string]interface{})
trendingitem["trending"] = trendingword
trendingitem["count"] = count
trendingitems = append(trendingitems, trendingitem)
trendingcount = trendingcount + 1
}
rows.Close()
trendingwordtotal := make(map[string]interface{});
trendingwordtotal["count"] = trendingcount
trendingwordtotal["items"] = trendingitems
return trendingwordtotal
}
// GET handlers
func getTrending(w http.ResponseWriter, r *http.Request) {
enableCors(&w)
fmt.Fprintf(w, "%s", trendsText)
fmt.Fprintf(w, "%s", metricsText)
}
func getSearch(w http.ResponseWriter, r *http.Request) {
@ -159,10 +217,12 @@ func getSearch(w http.ResponseWriter, r *http.Request) {
func main() {
pool = getDbPool()
metricsText = "[]"
go func() {
for {
runMetrics()
time.Sleep(30 * time.Second)
time.Sleep(10 * time.Minute)
}
}()