Psql idioms: Difference between revisions

From DISI
Jump to navigation Jump to search
(blah)
(asdf)
 
(24 intermediate revisions by the same user not shown)
Line 1: Line 1:
Postgres idioms to save you time
  select distinct subname.sub_id_fk,clinical2.ct2int.ct2int_id,subname.name,clinical2.ct2int.name from subname, clinical2.ct2int where subname.query@@clinical2.ct2int.terms and subname.sub_id_fk ;
# insert into join table to like CT to substances




begin; insert into synonym select nextval('synonym_synonym_id_seq'::regclass),excipient_id,'COMPOUND'::synonym_kinds,name from excipient where excipient.excipient_id not in (select excipient_id from synonym, excipient where excipient.name=synonym.value and excipient.excipient_id=synonym.excipient_fk);
'''Bold text'''Table locks?
select * from admin_postgresql_locks;
SELECT * from pg_stat_activity where state = 'active';
pg_cancel_backend(pid int) to kill
Table cluster status. (correlation).
select tablename,attname,correlation from pg_stats where tablename like '%sub%'  order by correlation limit 20;


= Curators only=  
= Curators only=  
cleanup note after load:
== update purchasability by catalog =
  update note set sub_id_fk=replacement_fk from retired where sub_id_fk=old_id;
  begin; update substance as s  set purchasability = substance_best_purchasability(s.sub_id) from catalog_item as ci join catalog as c on c.cat_id=ci.cat_id_fk where s.sub_id = ci.sub_id_fk and c.short_name = 'sialbb'; commit;


== load chembl ==
== load chembl ==
Line 14: Line 25:
  select s.sub_id, s.smiles, tanimoto_sml(f.data,    bfp_from_binary_text('\x804300004000200080400000800000008220800040000120008002002011000010040081010000002840400009d040048000020200001a404100000002000040')), short_name from fingerprints.ecfp4_data as f join fingerprints.ecfp4_id on ecfp4_id=f.ecfp4_fk join substance as s on s.ecfp4_fk=ecfp4_id join catalog_item on sub_id_fk=s.sub_id join catalog on cat_id_fk=cat_id where f.ecfp4_fk < 1000000 and purchasable > 30 order by bfp_from_binary_text('\x804300004000200080400000800000008220800040000120008002002011000010040081010000002840400009d040048000020200001a404100000002000040') <%> f.data limit 5;
  select s.sub_id, s.smiles, tanimoto_sml(f.data,    bfp_from_binary_text('\x804300004000200080400000800000008220800040000120008002002011000010040081010000002840400009d040048000020200001a404100000002000040')), short_name from fingerprints.ecfp4_data as f join fingerprints.ecfp4_id on ecfp4_id=f.ecfp4_fk join substance as s on s.ecfp4_fk=ecfp4_id join catalog_item on sub_id_fk=s.sub_id join catalog on cat_id_fk=cat_id where f.ecfp4_fk < 1000000 and purchasable > 30 order by bfp_from_binary_text('\x804300004000200080400000800000008220800040000120008002002011000010040081010000002840400009d040048000020200001a404100000002000040') <%> f.data limit 5;


Aggregators:
update substance as n set features=uniq(sort(n.features + intset(220))) from substance as o join catalog_item on o.sub_id=sub_id_fk join catalog on cat_id_fk=cat_id where catalog.cat_id=1 and o.sub_id=n.sub_id;


select max(ecfp4_fk) from fingerprints.ecfp4_data
islikea metabolite (still to be implemented)
 
  SELECT substance.* FROM substance JOIN catalog_item ON sub_id=sub_id_fk JOIN catalog ON cat_id_fk=cat_id where short_name='sial' AND sub_id IN (SELECT s2.sub_id FROM substance AS s1 JOIN fingerprints.ecfp4_data AS d1 ON s1.ecfp4_fk=d1.ecfp4_fk JOIN fingerprints.ecfp4_data as d2 ON d1.data % d2.data JOIN substance AS s2 ON d2.ecfp4_fk=s2.ecfp4_fk WHERE s1.features @> '{201}' AND d1.ecfp4_fk < 500000) LIMIT 50;
= Sysadmins only =
  SELECT * from pg_stat_activity ;
SELECT * from pg_stat_activity where state = 'active';
pg_cancel_backend(pid int) to kill


select count(*) from fingerprints.ecfp4.data;
how to fix a text field
update pattern set name = lower(replace(replace(replace(description,'(','_'),')','_'),' ','-'));


select max(ecfp4_id) from fingerprints.ecfp4.data
SMARTS
select sub_id,smiles from substance where smiles  @> '[#15]~[#15,#7,#16]'::qmol";


\dt fingerprints.ecfp4.data
FREE
alter user zincfree set search_path to  "$user",public
undo:
alter user zincfree set search_path to  "$user",free, public


== create new condition sets ==
select * from clinical1.ct2condition where to_tsvector('english',name) @@ to_tsquery('depression|anxiety|schizophrenia');
update clinical1.ct2condition set condclass_fk=1 where condclass_fk=2;
update clinical1.ct2condition set condclass_fk = 2 where condclass_fk = 0 and to_tsvector('english',name) @@ to_tsquery('depression');


= update traits column of substance =
Maximum FP FK referenced from substance
<pre>
select max(ecfp4_fk) from substance;
zinc15=> update substance as n set features=uniq(sort(n.features + intset(202^C) from substance as o join catalog_item on o.sub_id=sub_id_fk join catalog on cat_id_fk=cat_id where catalog.np>1 and o.sub_id=n.sub_id;
zinc15=> \timing
Timing is on.
zinc15=> update substance as n set features=uniq(sort(n.features + intset(202))) from substance as o join catalog_item on o.sub_id=sub_id_fk join catalog on cat_id_fk=cat_id where catalog.np>1 and o.sub_id=n.sub_id;
UPDATE 23515
Time: 284042.158 ms
zinc15=> update substance as n set features=uniq(sort(n.features + intset(203))) from substance as o join catalog_item on o.sub_id=sub_id_fk join catalog on cat_id_fk=cat_id where catalog.np>0 and o.sub_id=n.sub_id;
^[[AUPDATE 107856
Time: 672273.171 ms
zinc15=> update substance as n set features=uniq(sort(n.features + intset(211))) from substance as o join catalog_item on o.sub_id=sub_id_fk join catalog on cat_id_fk=cat_id where catalog.drug=10 and o.sub_id=n.sub_id;
UPDATE 3270
Time: 10765.466 ms
zinc15=> update substance as n set features=uniq(sort(n.features + intset(212))) from substance as o join catalog_item on o.sub_id=sub_id_fk join catalog on cat_id_fk=cat_id where catalog.drug>7 and o.sub_id=n.sub_id;
UPDATE 9241
Time: 397424.032 ms
zinc15=> update substance as n set features=uniq(sort(n.features + intset(213))) from substance as o join catalog_item on o.sub_id=sub_id_fk join catalog on cat_id_fk=cat_id where catalog.drug>5 and o.sub_id=n.sub_id;
</pre>


And aggregators:
how many protomers
  update substance as n set features=uniq(sort(n.features + intset(220))) from substance as o join catalog_item on o.sub_id=sub_id_fk join catalog on cat_id_fk=cat_id where catalog.cat_id=1 and o.sub_id=n.sub_id;
  select count(prot_id) from protomer;


Loading Chembl
how many substances
  grant all privileges on table gene2 to test;
  select count(sub_id) from substance;
grant all privileges on table annotation2 to test;
grant all privileges on table note2 to test;


max FP reference in FP master join table:
select max(ecfp4_fk) from fingerprints.ecfp4_data


[[Category:Idioms]]
[[Category:Idioms]]

Latest revision as of 18:00, 24 August 2017

 select distinct subname.sub_id_fk,clinical2.ct2int.ct2int_id,subname.name,clinical2.ct2int.name from subname, clinical2.ct2int where subname.query@@clinical2.ct2int.terms and subname.sub_id_fk ; 
  1. insert into join table to like CT to substances


begin; insert into synonym select nextval('synonym_synonym_id_seq'::regclass),excipient_id,'COMPOUND'::synonym_kinds,name from excipient where excipient.excipient_id not in (select excipient_id from synonym, excipient where excipient.name=synonym.value and excipient.excipient_id=synonym.excipient_fk);


Bold textTable locks?

select * from admin_postgresql_locks;
SELECT * from pg_stat_activity where state = 'active';
pg_cancel_backend(pid int) to kill

Table cluster status. (correlation).

select tablename,attname,correlation from pg_stats where tablename like '%sub%'  order by correlation limit 20;

Curators only

= update purchasability by catalog

begin; update substance as s  set purchasability = substance_best_purchasability(s.sub_id) from catalog_item as ci join catalog as c on c.cat_id=ci.cat_id_fk where s.sub_id = ci.sub_id_fk and c.short_name = 'sialbb'; commit;

load chembl

  • 1. grant all on database stage to test;
  • 2. sed 's/SET search_path = public,/SET search_path = chembl20,/' chembl_20.pgdump.sql >> install2

Closest purchasable metabolite

select s.sub_id, s.smiles, tanimoto_sml(f.data,    bfp_from_binary_text('\x804300004000200080400000800000008220800040000120008002002011000010040081010000002840400009d040048000020200001a404100000002000040')), short_name from fingerprints.ecfp4_data as f join fingerprints.ecfp4_id on ecfp4_id=f.ecfp4_fk join substance as s on s.ecfp4_fk=ecfp4_id join catalog_item on sub_id_fk=s.sub_id join catalog on cat_id_fk=cat_id where f.ecfp4_fk < 1000000 and purchasable > 30 order by bfp_from_binary_text('\x804300004000200080400000800000008220800040000120008002002011000010040081010000002840400009d040048000020200001a404100000002000040') <%> f.data limit 5;

Aggregators:

update substance as n set features=uniq(sort(n.features + intset(220))) from substance as o join catalog_item on o.sub_id=sub_id_fk join catalog on cat_id_fk=cat_id where catalog.cat_id=1 and o.sub_id=n.sub_id;

islikea metabolite (still to be implemented)

SELECT substance.* FROM substance JOIN catalog_item ON sub_id=sub_id_fk JOIN catalog ON cat_id_fk=cat_id where short_name='sial' AND sub_id IN (SELECT s2.sub_id FROM substance AS s1 JOIN fingerprints.ecfp4_data AS d1 ON s1.ecfp4_fk=d1.ecfp4_fk JOIN fingerprints.ecfp4_data as d2 ON d1.data % d2.data JOIN substance AS s2 ON d2.ecfp4_fk=s2.ecfp4_fk WHERE s1.features @> '{201}' AND d1.ecfp4_fk < 500000) LIMIT 50;

how to fix a text field

update pattern set name = lower(replace(replace(replace(description,'(','_'),')','_'),' ','-'));

SMARTS

select sub_id,smiles from substance where smiles  @> '[#15]~[#15,#7,#16]'::qmol";

FREE

alter user zincfree set search_path to  "$user",public

undo:

alter user zincfree set search_path to  "$user",free, public

create new condition sets

select * from clinical1.ct2condition where to_tsvector('english',name) @@ to_tsquery('depression|anxiety|schizophrenia');
update clinical1.ct2condition set condclass_fk=1 where condclass_fk=2;
update clinical1.ct2condition set condclass_fk = 2 where condclass_fk = 0 and to_tsvector('english',name) @@ to_tsquery('depression');

Maximum FP FK referenced from substance

select max(ecfp4_fk) from substance;

how many protomers

select count(prot_id) from protomer;

how many substances

select count(sub_id) from substance;

max FP reference in FP master join table:

select max(ecfp4_fk) from fingerprints.ecfp4_data