diff --git a/fedilogue/tables.sql b/fedilogue/tables.sql index 2399ad5..568ea2f 100644 --- a/fedilogue/tables.sql +++ b/fedilogue/tables.sql @@ -1,34 +1,49 @@ CREATE TABLE IF NOT EXISTS actors ( id SERIAL PRIMARY KEY, document JSONB, - identifiedat TIMESTAMP with time zone DEFAULT now(), - instance VARCHAR(1000) NOT NULL, - bot BOOL DEFAULT FALSE + identifiedat TIMESTAMP with time zone DEFAULT now(), + instance VARCHAR(1000) NOT NULL, + bot BOOLEAN DEFAULT FALSE ); CREATE TABLE IF NOT EXISTS activities ( id SERIAL PRIMARY KEY, document JSONB, - normalized TEXT, - identifiedat TIMESTAMP with time zone DEFAULT now(), - instance VARCHAR(1000) NOT NULL, + normalized TEXT, + identifiedat TIMESTAMP with time zone DEFAULT now(), + instance VARCHAR(1000) NOT NULL, hashtags VARCHAR(140)[] ); - CREATE TABLE IF NOT EXISTS instances ( - endpoint VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE, - autostart BOOLEAN, - state VARCHAR(16), - username VARCHAR(32), - password VARCHAR(32), - software VARCHAR(50) + endpoint VARCHAR(2083) NOT NULL PRIMARY KEY UNIQUE, + state VARCHAR(16), + username VARCHAR(32), + password VARCHAR(32), + software VARCHAR(50), + banned BOOLEAN DEFAULT FALSE, + alwaysbot BOOLEAN DEFAULT FALSE ); -ALTER TABLE activities - ADD normalized_tsvector tsvector - GENERATED ALWAYS AS (to_tsvector('english', normalized)) STORED; +-- Autostart mastodon.social +INSERT INTO instances (endpoint) VALUES('mastodon.social'); +-- Banned Instances +INSERT INTO instances (endpoint, banned) VALUES + ('switter.at', true), + ('xxxtumblr.org', true), + ('sinblr.com', true), + ('twitiverse.com', true), + ('my.dirtyhobby.xyz', true), + ('bae.st', true); +-- Alwaysbot instances +INSERT INTO instances (endpoint, alwaysbot) VALUES + ('mstdn.foxfam.club', true), + ('botsin.space', true), + ('newsbots.eu', true); +ALTER TABLE activities + ADD normalized_tsvector tsvector + GENERATED ALWAYS AS (to_tsvector('english', normalized)) STORED; CREATE UNIQUE INDEX IF NOT EXISTS actors_uri_idx ON actors ( (document->>'id') ); CREATE UNIQUE INDEX IF NOT EXISTS activities_uri_idx ON activities ( (document->>'id') ); @@ -42,3 +57,674 @@ CREATE INDEX IF NOT EXISTS normalized_idx ON activities USING gin(normalized_tsv CREATE INDEX IF NOT EXISTS actors_id_idx ON actors (id); CREATE INDEX IF NOT EXISTS activities_id_idx ON activities (id); +CREATE TABLE IF NOT EXISTS stopwords ( + word VARCHAR(20) +); + +INSERT INTO stopwords (word) +VALUES + ('a'), + ('able'), + ('about'), + ('above'), + ('abst'), + ('accordance'), + ('according'), + ('accordingly'), + ('across'), + ('act'), + ('actually'), + ('added'), + ('adj'), + ('affected'), + ('affecting'), + ('affects'), + ('after'), + ('afterwards'), + ('again'), + ('against'), + ('ah'), + ('all'), + ('almost'), + ('alone'), + ('along'), + ('already'), + ('also'), + ('although'), + ('always'), + ('am'), + ('among'), + ('amongst'), + ('an'), + ('and'), + ('announce'), + ('another'), + ('any'), + ('anybody'), + ('anyhow'), + ('anymore'), + ('anyone'), + ('anything'), + ('anyway'), + ('anyways'), + ('anywhere'), + ('apparently'), + ('approximately'), + ('are'), + ('aren'), + ('arent'), + ('arise'), + ('around'), + ('as'), + ('aside'), + ('ask'), + ('asking'), + ('at'), + ('auth'), + ('available'), + ('away'), + ('awfully'), + ('b'), + ('back'), + ('be'), + ('became'), + ('because'), + ('become'), + ('becomes'), + ('becoming'), + ('been'), + ('before'), + ('beforehand'), + ('begin'), + ('beginning'), + ('beginnings'), + ('begins'), + ('behind'), + ('being'), + ('believe'), + ('below'), + ('beside'), + ('besides'), + ('between'), + ('beyond'), + ('biol'), + ('both'), + ('brief'), + ('briefly'), + ('but'), + ('by'), + ('c'), + ('ca'), + ('came'), + ('can'), + ('cannot'), + ('can''t'), + ('cause'), + ('causes'), + ('certain'), + ('certainly'), + ('co'), + ('com'), + ('come'), + ('comes'), + ('contain'), + ('containing'), + ('contains'), + ('could'), + ('couldnt'), + ('d'), + ('date'), + ('did'), + ('didn''t'), + ('different'), + ('do'), + ('does'), + ('doesn''t'), + ('doing'), + ('done'), + ('don''t'), + ('down'), + ('downwards'), + ('due'), + ('during'), + ('e'), + ('each'), + ('ed'), + ('edu'), + ('effect'), + ('eg'), + ('eight'), + ('eighty'), + ('either'), + ('else'), + ('elsewhere'), + ('end'), + ('ending'), + ('enough'), + ('especially'), + ('et'), + ('et-al'), + ('etc'), + ('even'), + ('ever'), + ('every'), + ('everybody'), + ('everyone'), + ('everything'), + ('everywhere'), + ('ex'), + ('except'), + ('f'), + ('far'), + ('few'), + ('ff'), + ('fifth'), + ('first'), + ('five'), + ('fix'), + ('followed'), + ('following'), + ('follows'), + ('for'), + ('former'), + ('formerly'), + ('forth'), + ('found'), + ('four'), + ('from'), + ('further'), + ('furthermore'), + ('g'), + ('gave'), + ('get'), + ('gets'), + ('getting'), + ('give'), + ('given'), + ('gives'), + ('giving'), + ('go'), + ('goes'), + ('gone'), + ('got'), + ('gotten'), + ('h'), + ('had'), + ('happens'), + ('hardly'), + ('has'), + ('hasn''t'), + ('have'), + ('haven''t'), + ('having'), + ('he'), + ('hed'), + ('hence'), + ('her'), + ('here'), + ('hereafter'), + ('hereby'), + ('herein'), + ('heres'), + ('hereupon'), + ('hers'), + ('herself'), + ('hes'), + ('hi'), + ('hid'), + ('him'), + ('himself'), + ('his'), + ('hither'), + ('home'), + ('how'), + ('howbeit'), + ('however'), + ('hundred'), + ('i'), + ('id'), + ('ie'), + ('if'), + ('i''ll'), + ('im'), + ('immediate'), + ('immediately'), + ('importance'), + ('important'), + ('in'), + ('inc'), + ('indeed'), + ('index'), + ('information'), + ('instead'), + ('into'), + ('invention'), + ('inward'), + ('is'), + ('isn''t'), + ('it'), + ('itd'), + ('it''ll'), + ('its'), + ('itself'), + ('i''ve'), + ('j'), + ('just'), + ('k'), + ('keep keeps'), + ('kept'), + ('kg'), + ('km'), + ('know'), + ('known'), + ('knows'), + ('l'), + ('largely'), + ('last'), + ('lately'), + ('later'), + ('latter'), + ('latterly'), + ('least'), + ('less'), + ('lest'), + ('let'), + ('lets'), + ('like'), + ('liked'), + ('likely'), + ('line'), + ('little'), + ('''ll'), + ('look'), + ('looking'), + ('looks'), + ('ltd'), + ('m'), + ('made'), + ('mainly'), + ('make'), + ('makes'), + ('many'), + ('may'), + ('maybe'), + ('me'), + ('mean'), + ('means'), + ('meantime'), + ('meanwhile'), + ('merely'), + ('mg'), + ('might'), + ('million'), + ('miss'), + ('ml'), + ('more'), + ('moreover'), + ('most'), + ('mostly'), + ('mr'), + ('mrs'), + ('much'), + ('mug'), + ('must'), + ('my'), + ('myself'), + ('n'), + ('na'), + ('name'), + ('namely'), + ('nay'), + ('nd'), + ('near'), + ('nearly'), + ('necessarily'), + ('necessary'), + ('need'), + ('needs'), + ('neither'), + ('never'), + ('nevertheless'), + ('new'), + ('next'), + ('nine'), + ('ninety'), + ('no'), + ('nobody'), + ('non'), + ('none'), + ('nonetheless'), + ('noone'), + ('nor'), + ('normally'), + ('nos'), + ('not'), + ('noted'), + ('nothing'), + ('now'), + ('nowhere'), + ('o'), + ('obtain'), + ('obtained'), + ('obviously'), + ('of'), + ('off'), + ('often'), + ('oh'), + ('ok'), + ('okay'), + ('old'), + ('omitted'), + ('on'), + ('once'), + ('one'), + ('ones'), + ('only'), + ('onto'), + ('or'), + ('ord'), + ('other'), + ('others'), + ('otherwise'), + ('ought'), + ('our'), + ('ours'), + ('ourselves'), + ('out'), + ('outside'), + ('over'), + ('overall'), + ('owing'), + ('own'), + ('p'), + ('page'), + ('pages'), + ('part'), + ('particular'), + ('particularly'), + ('past'), + ('per'), + ('perhaps'), + ('placed'), + ('please'), + ('plus'), + ('poorly'), + ('possible'), + ('possibly'), + ('potentially'), + ('pp'), + ('predominantly'), + ('present'), + ('previously'), + ('primarily'), + ('probably'), + ('promptly'), + ('proud'), + ('provides'), + ('put'), + ('q'), + ('que'), + ('quickly'), + ('quite'), + ('qv'), + ('r'), + ('ran'), + ('rather'), + ('rd'), + ('re'), + ('readily'), + ('really'), + ('recent'), + ('recently'), + ('ref'), + ('refs'), + ('regarding'), + ('regardless'), + ('regards'), + ('related'), + ('relatively'), + ('research'), + ('respectively'), + ('resulted'), + ('resulting'), + ('results'), + ('right'), + ('run'), + ('s'), + ('said'), + ('same'), + ('saw'), + ('say'), + ('saying'), + ('says'), + ('sec'), + ('section'), + ('see'), + ('seeing'), + ('seem'), + ('seemed'), + ('seeming'), + ('seems'), + ('seen'), + ('self'), + ('selves'), + ('sent'), + ('seven'), + ('several'), + ('shall'), + ('she'), + ('shed'), + ('she''ll'), + ('shes'), + ('should'), + ('shouldn''t'), + ('show'), + ('showed'), + ('shown'), + ('showns'), + ('shows'), + ('significant'), + ('significantly'), + ('similar'), + ('similarly'), + ('since'), + ('six'), + ('slightly'), + ('so'), + ('some'), + ('somebody'), + ('somehow'), + ('someone'), + ('somethan'), + ('something'), + ('sometime'), + ('sometimes'), + ('somewhat'), + ('somewhere'), + ('soon'), + ('sorry'), + ('specifically'), + ('specified'), + ('specify'), + ('specifying'), + ('still'), + ('stop'), + ('strongly'), + ('sub'), + ('substantially'), + ('successfully'), + ('such'), + ('sufficiently'), + ('suggest'), + ('sup'), + ('sure t'), + ('take'), + ('taken'), + ('taking'), + ('tell'), + ('tends'), + ('th'), + ('than'), + ('thank'), + ('thanks'), + ('thanx'), + ('that'), + ('that''ll'), + ('thats'), + ('that''ve'), + ('the'), + ('their'), + ('theirs'), + ('them'), + ('themselves'), + ('then'), + ('thence'), + ('there'), + ('thereafter'), + ('thereby'), + ('thered'), + ('therefore'), + ('therein'), + ('there''ll'), + ('thereof'), + ('therere'), + ('theres'), + ('thereto'), + ('thereupon'), + ('there''ve'), + ('these'), + ('they'), + ('theyd'), + ('they''ll'), + ('theyre'), + ('they''ve'), + ('think'), + ('this'), + ('those'), + ('thou'), + ('though'), + ('thoughh'), + ('thousand'), + ('throug'), + ('through'), + ('throughout'), + ('thru'), + ('thus'), + ('til'), + ('tip'), + ('to'), + ('together'), + ('too'), + ('took'), + ('toward'), + ('towards'), + ('tried'), + ('tries'), + ('truly'), + ('try'), + ('trying'), + ('ts'), + ('twice'), + ('two'), + ('u'), + ('un'), + ('under'), + ('unfortunately'), + ('unless'), + ('unlike'), + ('unlikely'), + ('until'), + ('unto'), + ('up'), + ('upon'), + ('ups'), + ('us'), + ('use'), + ('used'), + ('useful'), + ('usefully'), + ('usefulness'), + ('uses'), + ('using'), + ('usually'), + ('v'), + ('value'), + ('various'), + ('''ve'), + ('very'), + ('via'), + ('viz'), + ('vol'), + ('vols'), + ('vs'), + ('w'), + ('want'), + ('wants'), + ('was'), + ('wasnt'), + ('way'), + ('we'), + ('wed'), + ('welcome'), + ('we''ll'), + ('went'), + ('were'), + ('werent'), + ('we''ve'), + ('what'), + ('whatever'), + ('what''ll'), + ('whats'), + ('when'), + ('whence'), + ('whenever'), + ('where'), + ('whereafter'), + ('whereas'), + ('whereby'), + ('wherein'), + ('wheres'), + ('whereupon'), + ('wherever'), + ('whether'), + ('which'), + ('while'), + ('whim'), + ('whither'), + ('who'), + ('whod'), + ('whoever'), + ('whole'), + ('who''ll'), + ('whom'), + ('whomever'), + ('whos'), + ('whose'), + ('why'), + ('widely'), + ('willing'), + ('wish'), + ('with'), + ('within'), + ('without'), + ('wont'), + ('words'), + ('world'), + ('would'), + ('wouldnt'), + ('www'), + ('x'), + ('y'), + ('yes'), + ('yet'), + ('you'), + ('youd'), + ('you''ll'), + ('your'), + ('youre'), + ('yours'), + ('yourself'), + ('yourselves'), + ('you''ve'), + ('z'), + ('zero');