Clinical Trials Loading: Difference between revisions

From DISI
Jump to navigation Jump to search
No edit summary
No edit summary
 
(2 intermediate revisions by the same user not shown)
Line 1: Line 1:
1. Relevant Files:
Look at the /nfs/db/trials/HOWTO.Enkhee
 
 
1. Relevant Files:
     /nfs/db/trials/important --> clinical trials raw data  
     /nfs/db/trials/important --> clinical trials raw data  
     /nfs/home/teague/work/Projects/trials/extract.py --> script to clean the raw data
     /nfs/home/teague/work/Projects/trials/extract.py --> script to clean the raw data
Line 150: Line 153:
   insert into clinical2.ct2condclass select * from clinical1.ct2condclass;
   insert into clinical2.ct2condclass select * from clinical1.ct2condclass;


4. Matching the name between subname.name and intervention.name
  create temp table tempsubname
  as select sub_id_fk as sub_id_fk, who_name as name
      from catalog_item join catalog on (cat_id_fk=cat_id and short_name='chembl20')
                        join chembl20.molecule_dictionary as md on supplier_code=md.chembl_id
                        join chembl20.molecule_atc_classification as mac on md.molregno=mac.molregno
                        join chembl20.atc_classification as ac on mac.level5=ac.level5;
insert into tempsubname
  select cs.sub_id_fk, sy.synonym
  from catalog_substance as cs join synonym as sy on cs.cat_content_fk = sy.cat_content_fk
  where not exists (select 1 from tempsubname as sn where sn.sub_id_fk = cs.sub_id_fk and sn.name = sy.synonym);
alter table tempsubname add column q tsquery;
update tempsubname as s
  set q=plainto_tsquery(t.name)
  from tempsubname as t
  where s.sub_id_fk=t.sub_id_fk and s.name=t.name;




4. Matching the name between subname.name and intervention.name
  alter table clinical2.ct2int add column terms tsvector;
  update table clinical2.ct2int set term=to_tsvector('english', name)


  create temp table subname
insert into clinical2.ct2subint (sub_id_fk, ct2int_fk)
    as select sub_id_fk as sub_id_fk, who_name as name
  select distinct sub.sub_id_fk, int.ct2int_id
      from catalog_item join catalog on (cat_id_fk=cat_id and short_name='chembl20')
  from clinical2.ct2int as int join tempsubname as sub on
                        join chembl20.molecule_dictionary as md on supplier_code=md.chembl_id
                        int.terms@@sub.q
                        join chembl20.molecule_atc_classification as mac on md.molregno=mac.molregno
  where not exists (select 1 from clinical2.ct2int as int join tempsubname as sub on int.terms@@sub.q);
                        join chembl20.atc_classification as ac on mac.level5=ac.level5;


  insert into subname
    select cs.sub_id_fk, sy.synonym
    from catalog_substance as cs join synonym as s on cs.cat_content_fk = s.cat_content_fk
    where not exists (select 1 from subname as sn where sn.sub_id_fk = cs.sub_id_fk and sn.name = sy.synonym);


  alter table subname add column q tsquery;
5. Warehousing


   update table subname as s
   update clinical2.ct2int as ct2int set num_trials = (select
  set q=plainto_tsquery(t.who_name)
    count(*) from clinical2.ct2toint as ct2toint where
  from subname as t
    ct2int.ct2int_id = ct2toint.ct2int_fk);
  where s.sub_id_fk=t.sub_id_fk and s.who_name=t.who_name;


   alter table clinical2.ct2int add column terms tsvector;
   update clinical2.ct2int as ct2int set num_substances = (select
  update table clinical2.ct2int set term=to_tsvector('english', name)
    count(*) from clinical2.ct2subint as ct2subint where
    ct2int.ct2int_id = ct2subint.ct2int_fk);


  update clinical2.ct2condition as ct2cond set num_trials = (select
    count(*) from clinical2.ct2tocond as ct2tocond where
    ct2cond.ct2condition_id = ct2tocond.ct2condition_fk);


   insert into clinical2.ct2subint (sub_id_fk, ct2int_fk)
   update clinical2.ct2condition as ct2cond set num_substances = (select
     select distinct sub.sub_id_fk, int.ct2int_id
     count(distinct(ct2subint.sub_id_fk)) from clinical2.ct2tocond as ct2tocond, clinical2.ct2toint as ct2toint, clinical2.ct2subint as ct2subint where
    from clinical2.ct2int as int join subname as sub on
    ct2cond.ct2condition_id = ct2tocond.ct2condition_fk and ct2tocond.ct2_fk = ct2toint.ct2_fk and ct2toint.ct2int_fk=ct2subint.ct2int_fk);
                        int.terms@@sub.query;

Latest revision as of 22:40, 12 December 2017

Look at the /nfs/db/trials/HOWTO.Enkhee


1. Relevant Files:

   /nfs/db/trials/important --> clinical trials raw data 
   /nfs/home/teague/work/Projects/trials/extract.py --> script to clean the raw data
   http://wiki.docking.org/index.php/Creating_clinical_name_mappings --> how to create name_mappings
2. SQL queries to create a new clinical trial schema and table are located in zinc code:
   zinc/SQL_statement/clinical_trial.sql
   Please see below the sql_statements
3. Load raw data to the database tables:
   If you want to delete all the existing data, then use --wipe
   Otherwise it will load data incrementally to the existing data.
   source /nfs/soft/www/apps/zinc15/envs/dev/bin/activate
   python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_ct /nfs/db/trials/important/studies.txt 
   python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_condition /nfs/db/trials/important/browse_conditions.txt 
   python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_intervention /nfs/db/trials/important/interventions.txt


 ------------------------   zinc/SQL_statement/clinical_trial.sql  ----------------------------------------
1. Creating the scheme and give the grants
 BEGIN;
 create schema clinical2;
 GRANT USAGE on schema clinical2 to root;
 GRANT USAGE on schema clinical2 to zincread;
 GRANT USAGE on schema clinical2 to zincfree;
 GRANT USAGE on schema clinical2 to test;
 GRANT USAGE on schema clinical2 to admin;
 GRANT USAGE on schema clinical2 to adminprivate;
2. Creating the tables and give grants
 BEGIN;
 create table clinical2.ctstatus (like clinical1.ctstatus including defaults including constraints including indexes);
 REVOKE ALL ON TABLE clinical2.ctstatus FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ctstatus TO root;
 GRANT SELECT ON TABLE clinical2.ctstatus TO zincread;
 GRANT SELECT ON TABLE clinical2.ctstatus TO zincfree;
 GRANT ALL ON TABLE clinical2.ctstatus TO test;
 GRANT ALL ON TABLE clinical2.ctstatus TO adminprivate;
 GRANT ALL ON TABLE clinical2.ctstatus TO admin;
 create table clinical2.ctphase (like clinical1.ctphase including defaults including constraints including indexes);
 REVOKE ALL ON TABLE clinical2.ctphase FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ctphase TO root;
 GRANT SELECT ON TABLE clinical2.ctphase TO zincread;
 GRANT SELECT ON TABLE clinical2.ctphase TO zincfree;
 GRANT ALL ON TABLE clinical2.ctphase TO test;
 GRANT ALL ON TABLE clinical2.ctphase TO adminprivate;
 GRANT ALL ON TABLE clinical2.ctphase TO admin;
 create table clinical2.ct2 (like clinical1.ct2 including defaults including constraints including indexes);
 alter table clinical2.ct2 add constraint ct2_ctphase_fk_fkey foreign key (ctphase_fk) references clinical2.ctphase(ctphase_id);
 alter table clinical2.ct2 add constraint ct2_ctstatus_fk_fkey foreign key (ctstatus_fk) references clinical2.ctstatus(ctstatus_id);
 alter table clinical2.ct2 add column changed_date date;
 REVOKE ALL ON TABLE clinical2.ct2 FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ct2 TO root;
 GRANT SELECT ON TABLE clinical2.ct2 TO zincread;
 GRANT SELECT ON TABLE clinical2.ct2 TO zincfree;
 GRANT ALL ON TABLE clinical2.ct2 TO test;
 GRANT ALL ON TABLE clinical2.ct2 TO adminprivate;
 GRANT ALL ON TABLE clinical2.ct2 TO admin;
 create table clinical2.ct2condclass (like clinical1.ct2condclass including defaults including constraints including indexes);
 REVOKE ALL ON TABLE clinical2.ct2condclass FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ct2condclass TO root;
 GRANT SELECT ON TABLE clinical2.ct2condclass TO zincread;
 GRANT SELECT ON TABLE clinical2.ct2condclass TO zincfree;
 GRANT ALL ON TABLE clinical2.ct2condclass TO test;
 GRANT ALL ON TABLE clinical2.ct2condclass TO adminprivate;
 GRANT ALL ON TABLE clinical2.ct2condclass TO admin;
 create table clinical2.ct2condition (like clinical1.ct2condition including defaults including constraints including indexes);
 alter table clinical2.ct2condition add constraint ct2condition_condclass_fk_fkey foreign key (condclass_fk) references clinical2.ct2condclass(ct2condclass_id);
 REVOKE ALL ON TABLE clinical2.ct2condition FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ct2condition TO root;
 GRANT SELECT ON TABLE clinical2.ct2condition TO zincread;
 GRANT SELECT ON TABLE clinical2.ct2condition TO zincfree;
 GRANT ALL ON TABLE clinical2.ct2condition TO test;
 GRANT ALL ON TABLE clinical2.ct2condition TO adminprivate;
 GRANT ALL ON TABLE clinical2.ct2condition TO admin;
 create table clinical2.ct2tocond (like clinical1.ct2tocond including defaults including constraints including indexes);
 alter table clinical2.ct2tocond add constraint ct2tocond_ct2condition_fk_fkey foreign key (ct2condition_fk) references clinical2.ct2condition(ct2condition_id);
 alter table clinical2.ct2tocond add constraint ct2tocond_ct2_fk_fkey foreign key (ct2_fk) references clinical2.ct2(ct2_id);
 REVOKE ALL ON TABLE clinical2.ct2tocond FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ct2tocond TO root;
 GRANT SELECT ON TABLE clinical2.ct2tocond TO zincread;
 GRANT SELECT ON TABLE clinical2.ct2tocond TO zincfree;
 GRANT ALL ON TABLE clinical2.ct2tocond TO test;
 GRANT ALL ON TABLE clinical2.ct2tocond TO adminprivate;
 GRANT ALL ON TABLE clinical2.ct2tocond TO admin;


 create table clinical2.ct2int (like clinical1.ct2int including defaults including constraints including indexes);
 alter tabel clinical2.ct2int drop column ct2_fk;
 REVOKE ALL ON TABLE clinical2.ct2int FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ct2int TO root;
 GRANT SELECT ON TABLE clinical2.ct2int TO zincread;
 GRANT SELECT ON TABLE clinical2.ct2int TO zincfree;
 GRANT ALL ON TABLE clinical2.ct2int TO test;
 GRANT ALL ON TABLE clinical2.ct2int TO adminprivate;
 GRANT ALL ON TABLE clinical2.ct2int TO admin;


 create table clinical2.ct2subint (like clinical1.ct2subint including defaults including constraints including indexes);
 alter table clinical2.ct2subint add constraint ct2subint_ct2int_fk_fkey foreign key (ct2int_fk) references clinical2.ct2int(ct2int_id);
 alter table clinical2.ct2subint add constraint ct2subint_sub_id_fk_fkey foreign key (sub_id_fk) references substance(sub_id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
 REVOKE ALL ON TABLE clinical2.ct2subint FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ct2subint TO root;
 GRANT SELECT ON TABLE clinical2.ct2subint TO zincread;
 GRANT SELECT ON TABLE clinical2.ct2subint TO zincfree;
 GRANT ALL ON TABLE clinical2.ct2subint TO test;
 GRANT ALL ON TABLE clinical2.ct2subint TO adminprivate;
 GRANT ALL ON TABLE clinical2.ct2subint TO admin;


 create table clinical2.ct2toint (ct2toint_id integer primary key serial not null,
                                ct2_fk integer references clinical2.ct2,
                                ct2int_fk integer references clinical2.ct2int)
 create sequence clinical2.ct2toint_ct2toint_seq;
 alter table clinical2.ct2toint alter column ct2toint_id set data type not null default nextval('clinical2.ct2toint_ct2toint_seq'::regclass);
 REVOKE ALL ON TABLE clinical2.ct2toint_ct2toint_seq FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO root;
 GRANT SELECT ON TABLE clinical2.ct2toint_ct2toint_seq TO zincread;
 GRANT SELECT ON TABLE clinical2.ct2toint_ct2toint_seq TO zincfree;
 GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO test;
 GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO adminprivate;
 GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO admin;


 REVOKE ALL ON TABLE clinical2.ct2toint FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ct2toint TO root;
 GRANT SELECT ON TABLE clinical2.ct2toint TO zincread;
 GRANT SELECT ON TABLE clinical2.ct2toint TO zincfree;
 GRANT ALL ON TABLE clinical2.ct2toint TO test;
 GRANT ALL ON TABLE clinical2.ct2toint TO adminprivate;
 GRANT ALL ON TABLE clinical2.ct2toint TO admin;
3. Load data from existing tables
 insert into clinical2.ctstatus select * from clinical1.ctstatus;
 insert into clinical2.ctphase select * from clinical1.ctphase;
 insert into clinical2.ct2condclass select * from clinical1.ct2condclass;
4. Matching the name between subname.name and intervention.name

 create temp table tempsubname
  as select sub_id_fk as sub_id_fk, who_name as name
     from catalog_item join catalog on (cat_id_fk=cat_id and short_name='chembl20')
                       join chembl20.molecule_dictionary as md on supplier_code=md.chembl_id
                       join chembl20.molecule_atc_classification as mac on md.molregno=mac.molregno
                       join chembl20.atc_classification as ac on mac.level5=ac.level5;
insert into tempsubname
  select cs.sub_id_fk, sy.synonym
  from catalog_substance as cs join synonym as sy on cs.cat_content_fk = sy.cat_content_fk
  where not exists (select 1 from tempsubname as sn where sn.sub_id_fk = cs.sub_id_fk and sn.name = sy.synonym);

alter table tempsubname add column q tsquery;
update tempsubname as s
 set q=plainto_tsquery(t.name)
 from tempsubname as t
 where s.sub_id_fk=t.sub_id_fk and s.name=t.name;


 alter table clinical2.ct2int add column terms tsvector;
 update table clinical2.ct2int set term=to_tsvector('english', name)
insert into clinical2.ct2subint (sub_id_fk, ct2int_fk)
  select distinct sub.sub_id_fk, int.ct2int_id
  from clinical2.ct2int as int join tempsubname as sub on
                       int.terms@@sub.q
  where not exists (select 1 from clinical2.ct2int as int join tempsubname as sub on int.terms@@sub.q);


5. Warehousing
 update clinical2.ct2int as ct2int set num_trials = (select
   count(*) from clinical2.ct2toint as ct2toint where
    ct2int.ct2int_id = ct2toint.ct2int_fk);
 update clinical2.ct2int as ct2int set num_substances = (select
   count(*) from clinical2.ct2subint as ct2subint where
    ct2int.ct2int_id = ct2subint.ct2int_fk);
 update clinical2.ct2condition as ct2cond set num_trials = (select
   count(*) from clinical2.ct2tocond as ct2tocond where
    ct2cond.ct2condition_id = ct2tocond.ct2condition_fk);
 update clinical2.ct2condition as ct2cond set num_substances = (select
   count(distinct(ct2subint.sub_id_fk)) from clinical2.ct2tocond as ct2tocond, clinical2.ct2toint as ct2toint, clinical2.ct2subint as ct2subint where
    ct2cond.ct2condition_id = ct2tocond.ct2condition_fk and ct2tocond.ct2_fk = ct2toint.ct2_fk and ct2toint.ct2int_fk=ct2subint.ct2int_fk);