Clinical Trials Loading: Difference between revisions
Jump to navigation
Jump to search
Enkhjargal (talk | contribs) No edit summary |
Enkhjargal (talk | contribs) No edit summary |
||
(One intermediate revision by the same user not shown) | |||
Line 1: | Line 1: | ||
Look at the /nfs/db/trials/HOWTO.Enkhee | |||
1. Relevant Files: | |||
/nfs/db/trials/important --> clinical trials raw data | /nfs/db/trials/important --> clinical trials raw data | ||
/nfs/home/teague/work/Projects/trials/extract.py --> script to clean the raw data | /nfs/home/teague/work/Projects/trials/extract.py --> script to clean the raw data | ||
Line 149: | Line 152: | ||
insert into clinical2.ctphase select * from clinical1.ctphase; | insert into clinical2.ctphase select * from clinical1.ctphase; | ||
insert into clinical2.ct2condclass select * from clinical1.ct2condclass; | insert into clinical2.ct2condclass select * from clinical1.ct2condclass; | ||
4. Matching the name between subname.name and intervention.name | 4. Matching the name between subname.name and intervention.name | ||
create temp table tempsubname | |||
as select sub_id_fk as sub_id_fk, who_name as name | |||
from catalog_item join catalog on (cat_id_fk=cat_id and short_name='chembl20') | |||
join chembl20.molecule_dictionary as md on supplier_code=md.chembl_id | |||
join chembl20.molecule_atc_classification as mac on md.molregno=mac.molregno | |||
join chembl20.atc_classification as ac on mac.level5=ac.level5; | |||
insert into tempsubname | |||
select cs.sub_id_fk, sy.synonym | |||
from catalog_substance as cs join synonym as sy on cs.cat_content_fk = sy.cat_content_fk | |||
where not exists (select 1 from tempsubname as sn where sn.sub_id_fk = cs.sub_id_fk and sn.name = sy.synonym); | |||
alter table tempsubname add column q tsquery; | |||
update tempsubname as s | |||
set q=plainto_tsquery(t.name) | |||
from tempsubname as t | |||
where s.sub_id_fk=t.sub_id_fk and s.name=t.name; | |||
alter table clinical2.ct2int add column terms tsvector; | alter table clinical2.ct2int add column terms tsvector; | ||
update table clinical2.ct2int set term=to_tsvector('english', name) | update table clinical2.ct2int set term=to_tsvector('english', name) | ||
insert into clinical2.ct2subint (sub_id_fk, ct2int_fk) | |||
select distinct sub.sub_id_fk, int.ct2int_id | |||
from clinical2.ct2int as int join tempsubname as sub on | |||
int.terms@@sub.q | |||
where not exists (select 1 from clinical2.ct2int as int join tempsubname as sub on int.terms@@sub.q); | |||
5. Warehousing | 5. Warehousing |
Latest revision as of 22:40, 12 December 2017
Look at the /nfs/db/trials/HOWTO.Enkhee
1. Relevant Files:
/nfs/db/trials/important --> clinical trials raw data /nfs/home/teague/work/Projects/trials/extract.py --> script to clean the raw data http://wiki.docking.org/index.php/Creating_clinical_name_mappings --> how to create name_mappings
2. SQL queries to create a new clinical trial schema and table are located in zinc code:
zinc/SQL_statement/clinical_trial.sql
Please see below the sql_statements
3. Load raw data to the database tables: If you want to delete all the existing data, then use --wipe Otherwise it will load data incrementally to the existing data.
source /nfs/soft/www/apps/zinc15/envs/dev/bin/activate python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_ct /nfs/db/trials/important/studies.txt python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_condition /nfs/db/trials/important/browse_conditions.txt python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_intervention /nfs/db/trials/important/interventions.txt
------------------------ zinc/SQL_statement/clinical_trial.sql ----------------------------------------
1. Creating the scheme and give the grants
BEGIN; create schema clinical2; GRANT USAGE on schema clinical2 to root; GRANT USAGE on schema clinical2 to zincread; GRANT USAGE on schema clinical2 to zincfree; GRANT USAGE on schema clinical2 to test; GRANT USAGE on schema clinical2 to admin; GRANT USAGE on schema clinical2 to adminprivate;
2. Creating the tables and give grants
BEGIN; create table clinical2.ctstatus (like clinical1.ctstatus including defaults including constraints including indexes); REVOKE ALL ON TABLE clinical2.ctstatus FROM PUBLIC; GRANT ALL ON TABLE clinical2.ctstatus TO root; GRANT SELECT ON TABLE clinical2.ctstatus TO zincread; GRANT SELECT ON TABLE clinical2.ctstatus TO zincfree; GRANT ALL ON TABLE clinical2.ctstatus TO test; GRANT ALL ON TABLE clinical2.ctstatus TO adminprivate; GRANT ALL ON TABLE clinical2.ctstatus TO admin;
create table clinical2.ctphase (like clinical1.ctphase including defaults including constraints including indexes); REVOKE ALL ON TABLE clinical2.ctphase FROM PUBLIC; GRANT ALL ON TABLE clinical2.ctphase TO root; GRANT SELECT ON TABLE clinical2.ctphase TO zincread; GRANT SELECT ON TABLE clinical2.ctphase TO zincfree; GRANT ALL ON TABLE clinical2.ctphase TO test; GRANT ALL ON TABLE clinical2.ctphase TO adminprivate; GRANT ALL ON TABLE clinical2.ctphase TO admin;
create table clinical2.ct2 (like clinical1.ct2 including defaults including constraints including indexes); alter table clinical2.ct2 add constraint ct2_ctphase_fk_fkey foreign key (ctphase_fk) references clinical2.ctphase(ctphase_id); alter table clinical2.ct2 add constraint ct2_ctstatus_fk_fkey foreign key (ctstatus_fk) references clinical2.ctstatus(ctstatus_id); alter table clinical2.ct2 add column changed_date date;
REVOKE ALL ON TABLE clinical2.ct2 FROM PUBLIC; GRANT ALL ON TABLE clinical2.ct2 TO root; GRANT SELECT ON TABLE clinical2.ct2 TO zincread; GRANT SELECT ON TABLE clinical2.ct2 TO zincfree; GRANT ALL ON TABLE clinical2.ct2 TO test; GRANT ALL ON TABLE clinical2.ct2 TO adminprivate; GRANT ALL ON TABLE clinical2.ct2 TO admin;
create table clinical2.ct2condclass (like clinical1.ct2condclass including defaults including constraints including indexes); REVOKE ALL ON TABLE clinical2.ct2condclass FROM PUBLIC; GRANT ALL ON TABLE clinical2.ct2condclass TO root; GRANT SELECT ON TABLE clinical2.ct2condclass TO zincread; GRANT SELECT ON TABLE clinical2.ct2condclass TO zincfree; GRANT ALL ON TABLE clinical2.ct2condclass TO test; GRANT ALL ON TABLE clinical2.ct2condclass TO adminprivate; GRANT ALL ON TABLE clinical2.ct2condclass TO admin;
create table clinical2.ct2condition (like clinical1.ct2condition including defaults including constraints including indexes); alter table clinical2.ct2condition add constraint ct2condition_condclass_fk_fkey foreign key (condclass_fk) references clinical2.ct2condclass(ct2condclass_id); REVOKE ALL ON TABLE clinical2.ct2condition FROM PUBLIC; GRANT ALL ON TABLE clinical2.ct2condition TO root; GRANT SELECT ON TABLE clinical2.ct2condition TO zincread; GRANT SELECT ON TABLE clinical2.ct2condition TO zincfree; GRANT ALL ON TABLE clinical2.ct2condition TO test; GRANT ALL ON TABLE clinical2.ct2condition TO adminprivate; GRANT ALL ON TABLE clinical2.ct2condition TO admin;
create table clinical2.ct2tocond (like clinical1.ct2tocond including defaults including constraints including indexes); alter table clinical2.ct2tocond add constraint ct2tocond_ct2condition_fk_fkey foreign key (ct2condition_fk) references clinical2.ct2condition(ct2condition_id); alter table clinical2.ct2tocond add constraint ct2tocond_ct2_fk_fkey foreign key (ct2_fk) references clinical2.ct2(ct2_id); REVOKE ALL ON TABLE clinical2.ct2tocond FROM PUBLIC; GRANT ALL ON TABLE clinical2.ct2tocond TO root; GRANT SELECT ON TABLE clinical2.ct2tocond TO zincread; GRANT SELECT ON TABLE clinical2.ct2tocond TO zincfree; GRANT ALL ON TABLE clinical2.ct2tocond TO test; GRANT ALL ON TABLE clinical2.ct2tocond TO adminprivate; GRANT ALL ON TABLE clinical2.ct2tocond TO admin;
create table clinical2.ct2int (like clinical1.ct2int including defaults including constraints including indexes); alter tabel clinical2.ct2int drop column ct2_fk; REVOKE ALL ON TABLE clinical2.ct2int FROM PUBLIC; GRANT ALL ON TABLE clinical2.ct2int TO root; GRANT SELECT ON TABLE clinical2.ct2int TO zincread; GRANT SELECT ON TABLE clinical2.ct2int TO zincfree; GRANT ALL ON TABLE clinical2.ct2int TO test; GRANT ALL ON TABLE clinical2.ct2int TO adminprivate; GRANT ALL ON TABLE clinical2.ct2int TO admin;
create table clinical2.ct2subint (like clinical1.ct2subint including defaults including constraints including indexes); alter table clinical2.ct2subint add constraint ct2subint_ct2int_fk_fkey foreign key (ct2int_fk) references clinical2.ct2int(ct2int_id); alter table clinical2.ct2subint add constraint ct2subint_sub_id_fk_fkey foreign key (sub_id_fk) references substance(sub_id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED; REVOKE ALL ON TABLE clinical2.ct2subint FROM PUBLIC; GRANT ALL ON TABLE clinical2.ct2subint TO root; GRANT SELECT ON TABLE clinical2.ct2subint TO zincread; GRANT SELECT ON TABLE clinical2.ct2subint TO zincfree; GRANT ALL ON TABLE clinical2.ct2subint TO test; GRANT ALL ON TABLE clinical2.ct2subint TO adminprivate; GRANT ALL ON TABLE clinical2.ct2subint TO admin;
create table clinical2.ct2toint (ct2toint_id integer primary key serial not null, ct2_fk integer references clinical2.ct2, ct2int_fk integer references clinical2.ct2int)
create sequence clinical2.ct2toint_ct2toint_seq; alter table clinical2.ct2toint alter column ct2toint_id set data type not null default nextval('clinical2.ct2toint_ct2toint_seq'::regclass); REVOKE ALL ON TABLE clinical2.ct2toint_ct2toint_seq FROM PUBLIC; GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO root; GRANT SELECT ON TABLE clinical2.ct2toint_ct2toint_seq TO zincread; GRANT SELECT ON TABLE clinical2.ct2toint_ct2toint_seq TO zincfree; GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO test; GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO adminprivate; GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO admin;
REVOKE ALL ON TABLE clinical2.ct2toint FROM PUBLIC; GRANT ALL ON TABLE clinical2.ct2toint TO root; GRANT SELECT ON TABLE clinical2.ct2toint TO zincread; GRANT SELECT ON TABLE clinical2.ct2toint TO zincfree; GRANT ALL ON TABLE clinical2.ct2toint TO test; GRANT ALL ON TABLE clinical2.ct2toint TO adminprivate; GRANT ALL ON TABLE clinical2.ct2toint TO admin;
3. Load data from existing tables
insert into clinical2.ctstatus select * from clinical1.ctstatus; insert into clinical2.ctphase select * from clinical1.ctphase; insert into clinical2.ct2condclass select * from clinical1.ct2condclass;
4. Matching the name between subname.name and intervention.name create temp table tempsubname as select sub_id_fk as sub_id_fk, who_name as name from catalog_item join catalog on (cat_id_fk=cat_id and short_name='chembl20') join chembl20.molecule_dictionary as md on supplier_code=md.chembl_id join chembl20.molecule_atc_classification as mac on md.molregno=mac.molregno join chembl20.atc_classification as ac on mac.level5=ac.level5;
insert into tempsubname select cs.sub_id_fk, sy.synonym from catalog_substance as cs join synonym as sy on cs.cat_content_fk = sy.cat_content_fk where not exists (select 1 from tempsubname as sn where sn.sub_id_fk = cs.sub_id_fk and sn.name = sy.synonym); alter table tempsubname add column q tsquery;
update tempsubname as s set q=plainto_tsquery(t.name) from tempsubname as t where s.sub_id_fk=t.sub_id_fk and s.name=t.name;
alter table clinical2.ct2int add column terms tsvector; update table clinical2.ct2int set term=to_tsvector('english', name)
insert into clinical2.ct2subint (sub_id_fk, ct2int_fk) select distinct sub.sub_id_fk, int.ct2int_id from clinical2.ct2int as int join tempsubname as sub on int.terms@@sub.q where not exists (select 1 from clinical2.ct2int as int join tempsubname as sub on int.terms@@sub.q);
5. Warehousing
update clinical2.ct2int as ct2int set num_trials = (select count(*) from clinical2.ct2toint as ct2toint where ct2int.ct2int_id = ct2toint.ct2int_fk);
update clinical2.ct2int as ct2int set num_substances = (select count(*) from clinical2.ct2subint as ct2subint where ct2int.ct2int_id = ct2subint.ct2int_fk);
update clinical2.ct2condition as ct2cond set num_trials = (select count(*) from clinical2.ct2tocond as ct2tocond where ct2cond.ct2condition_id = ct2tocond.ct2condition_fk);
update clinical2.ct2condition as ct2cond set num_substances = (select count(distinct(ct2subint.sub_id_fk)) from clinical2.ct2tocond as ct2tocond, clinical2.ct2toint as ct2toint, clinical2.ct2subint as ct2subint where ct2cond.ct2condition_id = ct2tocond.ct2condition_fk and ct2tocond.ct2_fk = ct2toint.ct2_fk and ct2toint.ct2int_fk=ct2subint.ct2int_fk);