Psql idioms

From DISI
Revision as of 23:00, 14 August 2015 by Frodo (talk | contribs) (asfd)
Jump to navigation Jump to search

Postgres idioms to save you time

update purchasability by catalog

begin;
update substance as s  set purchasability = substance_best_purchasability(s.sub_id) from catalog_item as ci join catalog as c on c.cat_id=ci.cat_id_fk where s.sub_id = ci.sub_id_fk and c.short_name = 'sialbb';
commit;
definition of subtance_best_purchasability: 


Curators only

cleanup note after load:

update note set sub_id_fk=replacement_fk from retired where sub_id_fk=old_id;

Table locks

select * from admin_postgresql_locks;

Table clustering status. (correlation). Very cool. Close to zero on a big table means big payoff for clustering. But be careful about substance, takes 5 days!

select tablename,attname,correlation from pg_stats where tablename like '%sub%'  order by correlation limit 20;

retired cleanup - legacy - fyi only

begin;
create temporary table blah as select min(old_id) as low_fk, replacement_fk from retired where replacement_fk > 100000000 and old_id < 100000000
group by replacement_fk; delete from retired where replacement_fk in (select replacement_fk from blah); select * from substance where sub_id=105622689;
update substance as s1 set sub_id=b.low_fk from substance as s2 join blah as b on 
s2.sub_id = b.replacement_fk where s1.sub_id=s2.sub_id
create temporary table inchioops as select inchikey, count(inchikey) from substance group by inchikey having count(inchikey) > 1;

load chembl

  • 1. grant all on database stage to test;
  • 2. sed 's/SET search_path = public,/SET search_path = chembl20,/' chembl_20.pgdump.sql >> install2

Closest purchasable metabolite

select s.sub_id, s.smiles, tanimoto_sml(f.data,    bfp_from_binary_text('\x804300004000200080400000800000008220800040000120008002002011000010040081010000002840400009d040048000020200001a404100000002000040')), short_name from fingerprints.ecfp4_data as f join fingerprints.ecfp4_id on ecfp4_id=f.ecfp4_fk join substance as s on s.ecfp4_fk=ecfp4_id join catalog_item on sub_id_fk=s.sub_id join catalog on cat_id_fk=cat_id where f.ecfp4_fk < 1000000 and purchasable > 30 order by bfp_from_binary_text('\x804300004000200080400000800000008220800040000120008002002011000010040081010000002840400009d040048000020200001a404100000002000040') <%> f.data limit 5;


select max(ecfp4_fk) from fingerprints.ecfp4_data

Sysadmins only

SELECT * from pg_stat_activity ;
SELECT * from pg_stat_activity where state = 'active';
pg_cancel_backend(pid int) to kill

SELECT pg_cancel_backend(pid) from pg_stat_activity where state = 'active' and query like '%substance.purch%' and usename ='zincread';


select count(*) from fingerprints.ecfp4.data;

select max(ecfp4_id) from fingerprints.ecfp4.data

\dt fingerprints.ecfp4.data

select min(sea_id)-1 from sea2;
alter SEQUENCE sea_sea_id_seq RESTART 1; -- Must be done before alter max value
alter SEQUENCE sea_sea_id_seq MAXVALUE 189749359;
select last_value from sea_sea_id_seq;

update traits column of substance

zinc15=> update substance as n set features=uniq(sort(n.features + intset(202^C) from substance as o join catalog_item on o.sub_id=sub_id_fk join catalog on cat_id_fk=cat_id where catalog.np>1 and o.sub_id=n.sub_id;
zinc15=> \timing
Timing is on.
zinc15=> update substance as n set features=uniq(sort(n.features + intset(202))) from substance as o join catalog_item on o.sub_id=sub_id_fk join catalog on cat_id_fk=cat_id where catalog.np>1 and o.sub_id=n.sub_id;
UPDATE 23515
Time: 284042.158 ms
zinc15=> update substance as n set features=uniq(sort(n.features + intset(203))) from substance as o join catalog_item on o.sub_id=sub_id_fk join catalog on cat_id_fk=cat_id where catalog.np>0 and o.sub_id=n.sub_id;
^[[AUPDATE 107856
Time: 672273.171 ms
zinc15=> update substance as n set features=uniq(sort(n.features + intset(211))) from substance as o join catalog_item on o.sub_id=sub_id_fk join catalog on cat_id_fk=cat_id where catalog.drug=10 and o.sub_id=n.sub_id;
UPDATE 3270
Time: 10765.466 ms
zinc15=> update substance as n set features=uniq(sort(n.features + intset(212))) from substance as o join catalog_item on o.sub_id=sub_id_fk join catalog on cat_id_fk=cat_id where catalog.drug>7 and o.sub_id=n.sub_id;
UPDATE 9241
Time: 397424.032 ms
zinc15=> update substance as n set features=uniq(sort(n.features + intset(213))) from substance as o join catalog_item on o.sub_id=sub_id_fk join catalog on cat_id_fk=cat_id where catalog.drug>5 and o.sub_id=n.sub_id;

And aggregators:

update substance as n set features=uniq(sort(n.features + intset(220))) from substance as o join catalog_item on o.sub_id=sub_id_fk join catalog on cat_id_fk=cat_id where catalog.cat_id=1 and o.sub_id=n.sub_id;

Loading Chembl

grant all privileges on table gene2 to test;
grant all privileges on table annotation2 to test; 
grant all privileges on table note2 to test;


create table target_class2 (like target_class including all);  


islikea metabolite

SELECT substance.* FROM substance JOIN catalog_item ON sub_id=sub_id_fk JOIN catalog ON cat_id_fk=cat_id where short_name='sial' AND sub_id IN (SELECT s2.sub_id FROM substance AS s1 JOIN fingerprints.ecfp4_data AS d1 ON s1.ecfp4_fk=d1.ecfp4_fk JOIN fingerprints.ecfp4_data as d2 ON d1.data % d2.data JOIN substance AS s2 ON d2.ecfp4_fk=s2.ecfp4_fk WHERE s1.features @> '{201}' AND d1.ecfp4_fk < 500000) LIMIT 50;

how to fix a text field

update pattern set name = lower(replace(replace(replace(description,'(','_'),')','_'),' ','-'));

find things by smarts directly.

psql -h samekh -U test zinc15 -c "select sub_id,smiles from substance where smiles  @> '[#15]~[#15,#7,#16]'::qmol" >! b

corrected

update pattern as p set n_biogenic = (select count(distinct(sp.sub_id_fk)) from subpat as sp joing substance as s on sp.sub_id_fk = s.sub_id where sp.pattern_fk =p.pattern_id and s.features @> intset(203} ;  same as n_endometab 201

update pattern stats

update pattern as p1  set n_endometab = (select count(distinct(sp.sub_id_fk)) from pattern as p2 join subpat as sp on sp.pattern_fk = p2.pattern_id where p1.pattern_id = p2.pattern_id and sp.sub_id_fk in (select sub_id_fk from catalog_item where cat_id_fk in (select cat_id from catalog where drug=10)));

world:

update pattern as p1  set n_world = (select count(distinct(sp.sub_id_fk)) from pattern as p2 join subpat as sp on sp.pattern_fk = p2.pattern_id where p1.pattern_id = p2.pattern_id and sp.sub_id_fk in (select sub_id_fk from catalog_item where cat_id_fk in (select cat_id from catalog where drug>=8)));

n_inman

update pattern as p1  set n_inman = (select count(distinct(sp.sub_id_fk)) from pattern as p2 join subpat as sp on sp.pattern_fk = p2.pattern_id where p1.pattern_id = p2.pattern_id and sp.sub_id_fk in (select sub_id_fk from catalog_item where cat_id_fk in (select cat_id from catalog where drug>=6)));

n_biogenic

update pattern as p1  set n_biogenic = (select count(distinct(sp.sub_id_fk)) from pattern as p2 join subpat as sp on sp.pattern_fk = p2.pattern_id where p1.pattern_id = p2.pattern_id and sp.sub_id_fk in (select sub_id_fk from catalog_item where cat_id_fk in (select cat_id from catalog where np>0)));

purchasable

update pattern as p1  set n_purchasable = (select count(distinct(sp.sub_id_fk)) from pattern as p2 join subpat as sp on sp.pattern_fk = p2.pattern_id where p1.pattern_id = p2.pattern_id and sp.sub_id_fk in (select sub_id as sub_id_fk from substance where purchasability >=10 and p1.pattern_id between 1 and 10 ));

all

update pattern as p1  set n_total = (select count(distinct(sp.sub_id_fk)) from pattern as p2 join subpat as sp on sp.pattern_fk = p2.pattern_id where p1.pattern_id = p2.pattern_id and p1.pattern_id between 1 and 10);

rings - biogenic - not attempted yet.

update rings as r1  set n_biogenic = (select count(distinct(hr.sub_id_fk)) from ring as r2 join hasring as hr on hr.ring_fk = r2.ring_id where r1.ring_id = r2.ring_id and hr.sub_id_fk in (select sub_id_fk from catalog_item where cat_id_fk in (select cat_id from catalog where np>0)));

ring statistics.

select count(hasring_id), ring_fk from hasring group by ring_fk order by count(hasring_id) desc;

free non free

alter user zincfree set search_path to  "$user",public

undo:

alter user zincfree set search_path to  "$user",free, public
source env.csh
zincserver.restart-backend.sh

create new condition sets

select * from clinical1.ct2condition where to_tsvector('english',name) @@ to_tsquery('depression|anxiety|schizophrenia');
update clinical1.ct2condition set condclass_fk=1 where condclass_fk=2;
update clinical1.ct2condition set condclass_fk = 2 where condclass_fk = 0 and to_tsvector('english',name) @@ to_tsquery('depression');