Clinical Trials Loading: Difference between revisions
Jump to navigation
Jump to search
Enkhjargal (talk | contribs) No edit summary |
Enkhjargal (talk | contribs) No edit summary |
||
Line 7: | Line 7: | ||
zinc/SQL_statement/clinical_trial.sql | zinc/SQL_statement/clinical_trial.sql | ||
Please see below the sql_statements | |||
3. Load raw data to the database tables: | 3. Load raw data to the database tables: | ||
Line 16: | Line 18: | ||
python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_condition /nfs/db/trials/important/browse_conditions.txt | python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_condition /nfs/db/trials/important/browse_conditions.txt | ||
python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_intervention /nfs/db/trials/important/interventions.txt | python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_intervention /nfs/db/trials/important/interventions.txt | ||
------------------------ zinc/SQL_statement/clinical_trial.sql ---------------------------------------- | |||
1. Creating the scheme and give the grants | |||
BEGIN; | |||
create schema clinical2; | |||
GRANT USAGE on schema clinical2 to root; | |||
GRANT USAGE on schema clinical2 to zincread; | |||
GRANT USAGE on schema clinical2 to zincfree; | |||
GRANT USAGE on schema clinical2 to test; | |||
GRANT USAGE on schema clinical2 to admin; | |||
GRANT USAGE on schema clinical2 to adminprivate; | |||
2. Creating the tables and give grants | |||
BEGIN; | |||
create table clinical2.ctstatus (like clinical1.ctstatus including defaults including constraints including indexes); | |||
REVOKE ALL ON TABLE clinical2.ctstatus FROM PUBLIC; | |||
GRANT ALL ON TABLE clinical2.ctstatus TO root; | |||
GRANT SELECT ON TABLE clinical2.ctstatus TO zincread; | |||
GRANT SELECT ON TABLE clinical2.ctstatus TO zincfree; | |||
GRANT ALL ON TABLE clinical2.ctstatus TO test; | |||
GRANT ALL ON TABLE clinical2.ctstatus TO adminprivate; | |||
GRANT ALL ON TABLE clinical2.ctstatus TO admin; | |||
create table clinical2.ctphase (like clinical1.ctphase including defaults including constraints including indexes); | |||
REVOKE ALL ON TABLE clinical2.ctphase FROM PUBLIC; | |||
GRANT ALL ON TABLE clinical2.ctphase TO root; | |||
GRANT SELECT ON TABLE clinical2.ctphase TO zincread; | |||
GRANT SELECT ON TABLE clinical2.ctphase TO zincfree; | |||
GRANT ALL ON TABLE clinical2.ctphase TO test; | |||
GRANT ALL ON TABLE clinical2.ctphase TO adminprivate; | |||
GRANT ALL ON TABLE clinical2.ctphase TO admin; | |||
create table clinical2.ct2 (like clinical1.ct2 including defaults including constraints including indexes); | |||
alter table clinical2.ct2 add constraint ct2_ctphase_fk_fkey foreign key (ctphase_fk) references clinical2.ctphase(ctphase_id); | |||
alter table clinical2.ct2 add constraint ct2_ctstatus_fk_fkey foreign key (ctstatus_fk) references clinical2.ctstatus(ctstatus_id); | |||
alter table clinical2.ct2 add column changed_date date; | |||
REVOKE ALL ON TABLE clinical2.ct2 FROM PUBLIC; | |||
GRANT ALL ON TABLE clinical2.ct2 TO root; | |||
GRANT SELECT ON TABLE clinical2.ct2 TO zincread; | |||
GRANT SELECT ON TABLE clinical2.ct2 TO zincfree; | |||
GRANT ALL ON TABLE clinical2.ct2 TO test; | |||
GRANT ALL ON TABLE clinical2.ct2 TO adminprivate; | |||
GRANT ALL ON TABLE clinical2.ct2 TO admin; | |||
create table clinical2.ct2condclass (like clinical1.ct2condclass including defaults including constraints including indexes); | |||
REVOKE ALL ON TABLE clinical2.ct2condclass FROM PUBLIC; | |||
GRANT ALL ON TABLE clinical2.ct2condclass TO root; | |||
GRANT SELECT ON TABLE clinical2.ct2condclass TO zincread; | |||
GRANT SELECT ON TABLE clinical2.ct2condclass TO zincfree; | |||
GRANT ALL ON TABLE clinical2.ct2condclass TO test; | |||
GRANT ALL ON TABLE clinical2.ct2condclass TO adminprivate; | |||
GRANT ALL ON TABLE clinical2.ct2condclass TO admin; | |||
create table clinical2.ct2condition (like clinical1.ct2condition including defaults including constraints including indexes); | |||
alter table clinical2.ct2condition add constraint ct2condition_condclass_fk_fkey foreign key (condclass_fk) references clinical2.ct2condclass(ct2condclass_id); | |||
REVOKE ALL ON TABLE clinical2.ct2condition FROM PUBLIC; | |||
GRANT ALL ON TABLE clinical2.ct2condition TO root; | |||
GRANT SELECT ON TABLE clinical2.ct2condition TO zincread; | |||
GRANT SELECT ON TABLE clinical2.ct2condition TO zincfree; | |||
GRANT ALL ON TABLE clinical2.ct2condition TO test; | |||
GRANT ALL ON TABLE clinical2.ct2condition TO adminprivate; | |||
GRANT ALL ON TABLE clinical2.ct2condition TO admin; | |||
create table clinical2.ct2tocond (like clinical1.ct2tocond including defaults including constraints including indexes); | |||
alter table clinical2.ct2tocond add constraint ct2tocond_ct2condition_fk_fkey foreign key (ct2condition_fk) references clinical2.ct2condition(ct2condition_id); | |||
alter table clinical2.ct2tocond add constraint ct2tocond_ct2_fk_fkey foreign key (ct2_fk) references clinical2.ct2(ct2_id); | |||
REVOKE ALL ON TABLE clinical2.ct2tocond FROM PUBLIC; | |||
GRANT ALL ON TABLE clinical2.ct2tocond TO root; | |||
GRANT SELECT ON TABLE clinical2.ct2tocond TO zincread; | |||
GRANT SELECT ON TABLE clinical2.ct2tocond TO zincfree; | |||
GRANT ALL ON TABLE clinical2.ct2tocond TO test; | |||
GRANT ALL ON TABLE clinical2.ct2tocond TO adminprivate; | |||
GRANT ALL ON TABLE clinical2.ct2tocond TO admin; | |||
create table clinical2.ct2int (like clinical1.ct2int including defaults including constraints including indexes); | |||
alter tabel clinical2.ct2int drop column ct2_fk; | |||
REVOKE ALL ON TABLE clinical2.ct2int FROM PUBLIC; | |||
GRANT ALL ON TABLE clinical2.ct2int TO root; | |||
GRANT SELECT ON TABLE clinical2.ct2int TO zincread; | |||
GRANT SELECT ON TABLE clinical2.ct2int TO zincfree; | |||
GRANT ALL ON TABLE clinical2.ct2int TO test; | |||
GRANT ALL ON TABLE clinical2.ct2int TO adminprivate; | |||
GRANT ALL ON TABLE clinical2.ct2int TO admin; | |||
create table clinical2.ct2subint (like clinical1.ct2subint including defaults including constraints including indexes); | |||
alter table clinical2.ct2subint add constraint ct2subint_ct2int_fk_fkey foreign key (ct2int_fk) references clinical2.ct2int(ct2int_id); | |||
alter table clinical2.ct2subint add constraint ct2subint_sub_id_fk_fkey foreign key (sub_id_fk) references substance(sub_id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED; | |||
REVOKE ALL ON TABLE clinical2.ct2subint FROM PUBLIC; | |||
GRANT ALL ON TABLE clinical2.ct2subint TO root; | |||
GRANT SELECT ON TABLE clinical2.ct2subint TO zincread; | |||
GRANT SELECT ON TABLE clinical2.ct2subint TO zincfree; | |||
GRANT ALL ON TABLE clinical2.ct2subint TO test; | |||
GRANT ALL ON TABLE clinical2.ct2subint TO adminprivate; | |||
GRANT ALL ON TABLE clinical2.ct2subint TO admin; | |||
create table clinical2.ct2toint (ct2toint_id integer primary key serial not null, | |||
ct2_fk integer references clinical2.ct2, | |||
ct2int_fk integer references clinical2.ct2int) | |||
create sequence clinical2.ct2toint_ct2toint_seq; | |||
alter table clinical2.ct2toint alter column ct2toint_id set data type not null default nextval('clinical2.ct2toint_ct2toint_seq'::regclass); | |||
REVOKE ALL ON TABLE clinical2.ct2toint_ct2toint_seq FROM PUBLIC; | |||
GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO root; | |||
GRANT SELECT ON TABLE clinical2.ct2toint_ct2toint_seq TO zincread; | |||
GRANT SELECT ON TABLE clinical2.ct2toint_ct2toint_seq TO zincfree; | |||
GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO test; | |||
GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO adminprivate; | |||
GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO admin; | |||
REVOKE ALL ON TABLE clinical2.ct2toint FROM PUBLIC; | |||
GRANT ALL ON TABLE clinical2.ct2toint TO root; | |||
GRANT SELECT ON TABLE clinical2.ct2toint TO zincread; | |||
GRANT SELECT ON TABLE clinical2.ct2toint TO zincfree; | |||
GRANT ALL ON TABLE clinical2.ct2toint TO test; | |||
GRANT ALL ON TABLE clinical2.ct2toint TO adminprivate; | |||
GRANT ALL ON TABLE clinical2.ct2toint TO admin; | |||
3. Load data from existing tables | |||
insert into clinical2.ctstatus select * from clinical1.ctstatus; | |||
insert into clinical2.ctphase select * from clinical1.ctphase; | |||
insert into clinical2.ct2condclass select * from clinical1.ct2condclass; | |||
4. Matching the name between subname.name and intervention.name | |||
create temp table subname | |||
as select sub_id_fk as sub_id_fk, who_name as name | |||
from catalog_item join catalog on (cat_id_fk=cat_id and short_name='chembl20') | |||
join chembl20.molecule_dictionary as md on supplier_code=md.chembl_id | |||
join chembl20.molecule_atc_classification as mac on md.molregno=mac.molregno | |||
join chembl20.atc_classification as ac on mac.level5=ac.level5; | |||
insert into subname | |||
select cs.sub_id_fk, sy.synonym | |||
from catalog_substance as cs join synonym as s on cs.cat_content_fk = s.cat_content_fk | |||
where not exists (select 1 from subname as sn where sn.sub_id_fk = cs.sub_id_fk and sn.name = sy.synonym); | |||
alter table subname add column q tsquery; | |||
update table subname as s | |||
set q=plainto_tsquery(t.who_name) | |||
from subname as t | |||
where s.sub_id_fk=t.sub_id_fk and s.who_name=t.who_name; | |||
alter table clinical2.ct2int add column terms tsvector; | |||
update table clinical2.ct2int set term=to_tsvector('english', name) | |||
insert into clinical2.ct2subint (sub_id_fk, ct2int_fk) | |||
select distinct sub.sub_id_fk, int.ct2int_id | |||
from clinical2.ct2int as int join subname as sub on | |||
int.terms@@sub.query; |
Revision as of 20:18, 24 August 2017
1. Relevant Files: /nfs/db/trials/important --> clinical trials raw data /nfs/home/teague/work/Projects/trials/extract.py --> script to clean the raw data http://wiki.docking.org/index.php/Creating_clinical_name_mappings --> how to create name_mappings
2. SQL queries to create a new clinical trial schema and table are located in zinc code:
zinc/SQL_statement/clinical_trial.sql
Please see below the sql_statements
3. Load raw data to the database tables: If you want to delete all the existing data, then use --wipe Otherwise it will load data incrementally to the existing data.
source /nfs/soft/www/apps/zinc15/envs/dev/bin/activate python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_ct /nfs/db/trials/important/studies.txt python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_condition /nfs/db/trials/important/browse_conditions.txt python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_intervention /nfs/db/trials/important/interventions.txt
------------------------ zinc/SQL_statement/clinical_trial.sql ----------------------------------------
1. Creating the scheme and give the grants
BEGIN; create schema clinical2; GRANT USAGE on schema clinical2 to root; GRANT USAGE on schema clinical2 to zincread; GRANT USAGE on schema clinical2 to zincfree; GRANT USAGE on schema clinical2 to test; GRANT USAGE on schema clinical2 to admin; GRANT USAGE on schema clinical2 to adminprivate;
2. Creating the tables and give grants
BEGIN; create table clinical2.ctstatus (like clinical1.ctstatus including defaults including constraints including indexes); REVOKE ALL ON TABLE clinical2.ctstatus FROM PUBLIC; GRANT ALL ON TABLE clinical2.ctstatus TO root; GRANT SELECT ON TABLE clinical2.ctstatus TO zincread; GRANT SELECT ON TABLE clinical2.ctstatus TO zincfree; GRANT ALL ON TABLE clinical2.ctstatus TO test; GRANT ALL ON TABLE clinical2.ctstatus TO adminprivate; GRANT ALL ON TABLE clinical2.ctstatus TO admin;
create table clinical2.ctphase (like clinical1.ctphase including defaults including constraints including indexes); REVOKE ALL ON TABLE clinical2.ctphase FROM PUBLIC; GRANT ALL ON TABLE clinical2.ctphase TO root; GRANT SELECT ON TABLE clinical2.ctphase TO zincread; GRANT SELECT ON TABLE clinical2.ctphase TO zincfree; GRANT ALL ON TABLE clinical2.ctphase TO test; GRANT ALL ON TABLE clinical2.ctphase TO adminprivate; GRANT ALL ON TABLE clinical2.ctphase TO admin;
create table clinical2.ct2 (like clinical1.ct2 including defaults including constraints including indexes); alter table clinical2.ct2 add constraint ct2_ctphase_fk_fkey foreign key (ctphase_fk) references clinical2.ctphase(ctphase_id); alter table clinical2.ct2 add constraint ct2_ctstatus_fk_fkey foreign key (ctstatus_fk) references clinical2.ctstatus(ctstatus_id); alter table clinical2.ct2 add column changed_date date;
REVOKE ALL ON TABLE clinical2.ct2 FROM PUBLIC; GRANT ALL ON TABLE clinical2.ct2 TO root; GRANT SELECT ON TABLE clinical2.ct2 TO zincread; GRANT SELECT ON TABLE clinical2.ct2 TO zincfree; GRANT ALL ON TABLE clinical2.ct2 TO test; GRANT ALL ON TABLE clinical2.ct2 TO adminprivate; GRANT ALL ON TABLE clinical2.ct2 TO admin;
create table clinical2.ct2condclass (like clinical1.ct2condclass including defaults including constraints including indexes); REVOKE ALL ON TABLE clinical2.ct2condclass FROM PUBLIC; GRANT ALL ON TABLE clinical2.ct2condclass TO root; GRANT SELECT ON TABLE clinical2.ct2condclass TO zincread; GRANT SELECT ON TABLE clinical2.ct2condclass TO zincfree; GRANT ALL ON TABLE clinical2.ct2condclass TO test; GRANT ALL ON TABLE clinical2.ct2condclass TO adminprivate; GRANT ALL ON TABLE clinical2.ct2condclass TO admin;
create table clinical2.ct2condition (like clinical1.ct2condition including defaults including constraints including indexes); alter table clinical2.ct2condition add constraint ct2condition_condclass_fk_fkey foreign key (condclass_fk) references clinical2.ct2condclass(ct2condclass_id); REVOKE ALL ON TABLE clinical2.ct2condition FROM PUBLIC; GRANT ALL ON TABLE clinical2.ct2condition TO root; GRANT SELECT ON TABLE clinical2.ct2condition TO zincread; GRANT SELECT ON TABLE clinical2.ct2condition TO zincfree; GRANT ALL ON TABLE clinical2.ct2condition TO test; GRANT ALL ON TABLE clinical2.ct2condition TO adminprivate; GRANT ALL ON TABLE clinical2.ct2condition TO admin;
create table clinical2.ct2tocond (like clinical1.ct2tocond including defaults including constraints including indexes); alter table clinical2.ct2tocond add constraint ct2tocond_ct2condition_fk_fkey foreign key (ct2condition_fk) references clinical2.ct2condition(ct2condition_id); alter table clinical2.ct2tocond add constraint ct2tocond_ct2_fk_fkey foreign key (ct2_fk) references clinical2.ct2(ct2_id); REVOKE ALL ON TABLE clinical2.ct2tocond FROM PUBLIC; GRANT ALL ON TABLE clinical2.ct2tocond TO root; GRANT SELECT ON TABLE clinical2.ct2tocond TO zincread; GRANT SELECT ON TABLE clinical2.ct2tocond TO zincfree; GRANT ALL ON TABLE clinical2.ct2tocond TO test; GRANT ALL ON TABLE clinical2.ct2tocond TO adminprivate; GRANT ALL ON TABLE clinical2.ct2tocond TO admin;
create table clinical2.ct2int (like clinical1.ct2int including defaults including constraints including indexes); alter tabel clinical2.ct2int drop column ct2_fk; REVOKE ALL ON TABLE clinical2.ct2int FROM PUBLIC; GRANT ALL ON TABLE clinical2.ct2int TO root; GRANT SELECT ON TABLE clinical2.ct2int TO zincread; GRANT SELECT ON TABLE clinical2.ct2int TO zincfree; GRANT ALL ON TABLE clinical2.ct2int TO test; GRANT ALL ON TABLE clinical2.ct2int TO adminprivate; GRANT ALL ON TABLE clinical2.ct2int TO admin;
create table clinical2.ct2subint (like clinical1.ct2subint including defaults including constraints including indexes); alter table clinical2.ct2subint add constraint ct2subint_ct2int_fk_fkey foreign key (ct2int_fk) references clinical2.ct2int(ct2int_id); alter table clinical2.ct2subint add constraint ct2subint_sub_id_fk_fkey foreign key (sub_id_fk) references substance(sub_id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED; REVOKE ALL ON TABLE clinical2.ct2subint FROM PUBLIC; GRANT ALL ON TABLE clinical2.ct2subint TO root; GRANT SELECT ON TABLE clinical2.ct2subint TO zincread; GRANT SELECT ON TABLE clinical2.ct2subint TO zincfree; GRANT ALL ON TABLE clinical2.ct2subint TO test; GRANT ALL ON TABLE clinical2.ct2subint TO adminprivate; GRANT ALL ON TABLE clinical2.ct2subint TO admin;
create table clinical2.ct2toint (ct2toint_id integer primary key serial not null, ct2_fk integer references clinical2.ct2, ct2int_fk integer references clinical2.ct2int)
create sequence clinical2.ct2toint_ct2toint_seq; alter table clinical2.ct2toint alter column ct2toint_id set data type not null default nextval('clinical2.ct2toint_ct2toint_seq'::regclass); REVOKE ALL ON TABLE clinical2.ct2toint_ct2toint_seq FROM PUBLIC; GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO root; GRANT SELECT ON TABLE clinical2.ct2toint_ct2toint_seq TO zincread; GRANT SELECT ON TABLE clinical2.ct2toint_ct2toint_seq TO zincfree; GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO test; GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO adminprivate; GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO admin;
REVOKE ALL ON TABLE clinical2.ct2toint FROM PUBLIC; GRANT ALL ON TABLE clinical2.ct2toint TO root; GRANT SELECT ON TABLE clinical2.ct2toint TO zincread; GRANT SELECT ON TABLE clinical2.ct2toint TO zincfree; GRANT ALL ON TABLE clinical2.ct2toint TO test; GRANT ALL ON TABLE clinical2.ct2toint TO adminprivate; GRANT ALL ON TABLE clinical2.ct2toint TO admin;
3. Load data from existing tables
insert into clinical2.ctstatus select * from clinical1.ctstatus; insert into clinical2.ctphase select * from clinical1.ctphase; insert into clinical2.ct2condclass select * from clinical1.ct2condclass;
4. Matching the name between subname.name and intervention.name
create temp table subname as select sub_id_fk as sub_id_fk, who_name as name from catalog_item join catalog on (cat_id_fk=cat_id and short_name='chembl20') join chembl20.molecule_dictionary as md on supplier_code=md.chembl_id join chembl20.molecule_atc_classification as mac on md.molregno=mac.molregno join chembl20.atc_classification as ac on mac.level5=ac.level5;
insert into subname select cs.sub_id_fk, sy.synonym from catalog_substance as cs join synonym as s on cs.cat_content_fk = s.cat_content_fk where not exists (select 1 from subname as sn where sn.sub_id_fk = cs.sub_id_fk and sn.name = sy.synonym);
alter table subname add column q tsquery;
update table subname as s set q=plainto_tsquery(t.who_name) from subname as t where s.sub_id_fk=t.sub_id_fk and s.who_name=t.who_name;
alter table clinical2.ct2int add column terms tsvector; update table clinical2.ct2int set term=to_tsvector('english', name)
insert into clinical2.ct2subint (sub_id_fk, ct2int_fk) select distinct sub.sub_id_fk, int.ct2int_id from clinical2.ct2int as int join subname as sub on int.terms@@sub.query;