Clinical Trials Loading: Difference between revisions

From DISI
Jump to navigation Jump to search
No edit summary
No edit summary
Line 7: Line 7:


     zinc/SQL_statement/clinical_trial.sql
     zinc/SQL_statement/clinical_trial.sql
    Please see below the sql_statements


  3. Load raw data to the database tables:
  3. Load raw data to the database tables:
Line 16: Line 18:
     python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_condition /nfs/db/trials/important/browse_conditions.txt  
     python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_condition /nfs/db/trials/important/browse_conditions.txt  
     python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_intervention /nfs/db/trials/important/interventions.txt
     python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_intervention /nfs/db/trials/important/interventions.txt
  ------------------------  zinc/SQL_statement/clinical_trial.sql  ----------------------------------------
1. Creating the scheme and give the grants
  BEGIN;
  create schema clinical2;
  GRANT USAGE on schema clinical2 to root;
  GRANT USAGE on schema clinical2 to zincread;
  GRANT USAGE on schema clinical2 to zincfree;
  GRANT USAGE on schema clinical2 to test;
  GRANT USAGE on schema clinical2 to admin;
  GRANT USAGE on schema clinical2 to adminprivate;
2. Creating the tables and give grants
  BEGIN;
  create table clinical2.ctstatus (like clinical1.ctstatus including defaults including constraints including indexes);
  REVOKE ALL ON TABLE clinical2.ctstatus FROM PUBLIC;
  GRANT ALL ON TABLE clinical2.ctstatus TO root;
  GRANT SELECT ON TABLE clinical2.ctstatus TO zincread;
  GRANT SELECT ON TABLE clinical2.ctstatus TO zincfree;
  GRANT ALL ON TABLE clinical2.ctstatus TO test;
  GRANT ALL ON TABLE clinical2.ctstatus TO adminprivate;
  GRANT ALL ON TABLE clinical2.ctstatus TO admin;
  create table clinical2.ctphase (like clinical1.ctphase including defaults including constraints including indexes);
  REVOKE ALL ON TABLE clinical2.ctphase FROM PUBLIC;
  GRANT ALL ON TABLE clinical2.ctphase TO root;
  GRANT SELECT ON TABLE clinical2.ctphase TO zincread;
  GRANT SELECT ON TABLE clinical2.ctphase TO zincfree;
  GRANT ALL ON TABLE clinical2.ctphase TO test;
  GRANT ALL ON TABLE clinical2.ctphase TO adminprivate;
  GRANT ALL ON TABLE clinical2.ctphase TO admin;
  create table clinical2.ct2 (like clinical1.ct2 including defaults including constraints including indexes);
  alter table clinical2.ct2 add constraint ct2_ctphase_fk_fkey foreign key (ctphase_fk) references clinical2.ctphase(ctphase_id);
  alter table clinical2.ct2 add constraint ct2_ctstatus_fk_fkey foreign key (ctstatus_fk) references clinical2.ctstatus(ctstatus_id);
  alter table clinical2.ct2 add column changed_date date;
  REVOKE ALL ON TABLE clinical2.ct2 FROM PUBLIC;
  GRANT ALL ON TABLE clinical2.ct2 TO root;
  GRANT SELECT ON TABLE clinical2.ct2 TO zincread;
  GRANT SELECT ON TABLE clinical2.ct2 TO zincfree;
  GRANT ALL ON TABLE clinical2.ct2 TO test;
  GRANT ALL ON TABLE clinical2.ct2 TO adminprivate;
  GRANT ALL ON TABLE clinical2.ct2 TO admin;
  create table clinical2.ct2condclass (like clinical1.ct2condclass including defaults including constraints including indexes);
  REVOKE ALL ON TABLE clinical2.ct2condclass FROM PUBLIC;
  GRANT ALL ON TABLE clinical2.ct2condclass TO root;
  GRANT SELECT ON TABLE clinical2.ct2condclass TO zincread;
  GRANT SELECT ON TABLE clinical2.ct2condclass TO zincfree;
  GRANT ALL ON TABLE clinical2.ct2condclass TO test;
  GRANT ALL ON TABLE clinical2.ct2condclass TO adminprivate;
  GRANT ALL ON TABLE clinical2.ct2condclass TO admin;
  create table clinical2.ct2condition (like clinical1.ct2condition including defaults including constraints including indexes);
  alter table clinical2.ct2condition add constraint ct2condition_condclass_fk_fkey foreign key (condclass_fk) references clinical2.ct2condclass(ct2condclass_id);
  REVOKE ALL ON TABLE clinical2.ct2condition FROM PUBLIC;
  GRANT ALL ON TABLE clinical2.ct2condition TO root;
  GRANT SELECT ON TABLE clinical2.ct2condition TO zincread;
  GRANT SELECT ON TABLE clinical2.ct2condition TO zincfree;
  GRANT ALL ON TABLE clinical2.ct2condition TO test;
  GRANT ALL ON TABLE clinical2.ct2condition TO adminprivate;
  GRANT ALL ON TABLE clinical2.ct2condition TO admin;
  create table clinical2.ct2tocond (like clinical1.ct2tocond including defaults including constraints including indexes);
  alter table clinical2.ct2tocond add constraint ct2tocond_ct2condition_fk_fkey foreign key (ct2condition_fk) references clinical2.ct2condition(ct2condition_id);
  alter table clinical2.ct2tocond add constraint ct2tocond_ct2_fk_fkey foreign key (ct2_fk) references clinical2.ct2(ct2_id);
  REVOKE ALL ON TABLE clinical2.ct2tocond FROM PUBLIC;
  GRANT ALL ON TABLE clinical2.ct2tocond TO root;
  GRANT SELECT ON TABLE clinical2.ct2tocond TO zincread;
  GRANT SELECT ON TABLE clinical2.ct2tocond TO zincfree;
  GRANT ALL ON TABLE clinical2.ct2tocond TO test;
  GRANT ALL ON TABLE clinical2.ct2tocond TO adminprivate;
  GRANT ALL ON TABLE clinical2.ct2tocond TO admin;
  create table clinical2.ct2int (like clinical1.ct2int including defaults including constraints including indexes);
  alter tabel clinical2.ct2int drop column ct2_fk;
  REVOKE ALL ON TABLE clinical2.ct2int FROM PUBLIC;
  GRANT ALL ON TABLE clinical2.ct2int TO root;
  GRANT SELECT ON TABLE clinical2.ct2int TO zincread;
  GRANT SELECT ON TABLE clinical2.ct2int TO zincfree;
  GRANT ALL ON TABLE clinical2.ct2int TO test;
  GRANT ALL ON TABLE clinical2.ct2int TO adminprivate;
  GRANT ALL ON TABLE clinical2.ct2int TO admin;
  create table clinical2.ct2subint (like clinical1.ct2subint including defaults including constraints including indexes);
  alter table clinical2.ct2subint add constraint ct2subint_ct2int_fk_fkey foreign key (ct2int_fk) references clinical2.ct2int(ct2int_id);
  alter table clinical2.ct2subint add constraint ct2subint_sub_id_fk_fkey foreign key (sub_id_fk) references substance(sub_id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
  REVOKE ALL ON TABLE clinical2.ct2subint FROM PUBLIC;
  GRANT ALL ON TABLE clinical2.ct2subint TO root;
  GRANT SELECT ON TABLE clinical2.ct2subint TO zincread;
  GRANT SELECT ON TABLE clinical2.ct2subint TO zincfree;
  GRANT ALL ON TABLE clinical2.ct2subint TO test;
  GRANT ALL ON TABLE clinical2.ct2subint TO adminprivate;
  GRANT ALL ON TABLE clinical2.ct2subint TO admin;
  create table clinical2.ct2toint (ct2toint_id integer primary key serial not null,
                                ct2_fk integer references clinical2.ct2,
                                ct2int_fk integer references clinical2.ct2int)
  create sequence clinical2.ct2toint_ct2toint_seq;
  alter table clinical2.ct2toint alter column ct2toint_id set data type not null default nextval('clinical2.ct2toint_ct2toint_seq'::regclass);
  REVOKE ALL ON TABLE clinical2.ct2toint_ct2toint_seq FROM PUBLIC;
  GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO root;
  GRANT SELECT ON TABLE clinical2.ct2toint_ct2toint_seq TO zincread;
  GRANT SELECT ON TABLE clinical2.ct2toint_ct2toint_seq TO zincfree;
  GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO test;
  GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO adminprivate;
  GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO admin;
  REVOKE ALL ON TABLE clinical2.ct2toint FROM PUBLIC;
  GRANT ALL ON TABLE clinical2.ct2toint TO root;
  GRANT SELECT ON TABLE clinical2.ct2toint TO zincread;
  GRANT SELECT ON TABLE clinical2.ct2toint TO zincfree;
  GRANT ALL ON TABLE clinical2.ct2toint TO test;
  GRANT ALL ON TABLE clinical2.ct2toint TO adminprivate;
  GRANT ALL ON TABLE clinical2.ct2toint TO admin;
3. Load data from existing tables
  insert into clinical2.ctstatus select * from clinical1.ctstatus;
  insert into clinical2.ctphase select * from clinical1.ctphase;
  insert into clinical2.ct2condclass select * from clinical1.ct2condclass;
4. Matching the name between subname.name and intervention.name
  create temp table subname
    as select sub_id_fk as sub_id_fk, who_name as name
      from catalog_item join catalog on (cat_id_fk=cat_id and short_name='chembl20')
                        join chembl20.molecule_dictionary as md on supplier_code=md.chembl_id
                        join chembl20.molecule_atc_classification as mac on md.molregno=mac.molregno
                        join chembl20.atc_classification as ac on mac.level5=ac.level5;
  insert into subname
    select cs.sub_id_fk, sy.synonym
    from catalog_substance as cs join synonym as s on cs.cat_content_fk = s.cat_content_fk
    where not exists (select 1 from subname as sn where sn.sub_id_fk = cs.sub_id_fk and sn.name = sy.synonym);
  alter table subname add column q tsquery;
  update table subname as s
  set q=plainto_tsquery(t.who_name)
  from subname as t
  where s.sub_id_fk=t.sub_id_fk and s.who_name=t.who_name;
  alter table clinical2.ct2int add column terms tsvector;
  update table clinical2.ct2int set term=to_tsvector('english', name)
  insert into clinical2.ct2subint (sub_id_fk, ct2int_fk)
    select distinct sub.sub_id_fk, int.ct2int_id
    from clinical2.ct2int as int join subname as sub on
                        int.terms@@sub.query;

Revision as of 20:18, 24 August 2017

1. Relevant Files:
   /nfs/db/trials/important --> clinical trials raw data 
   /nfs/home/teague/work/Projects/trials/extract.py --> script to clean the raw data
   http://wiki.docking.org/index.php/Creating_clinical_name_mappings --> how to create name_mappings
2. SQL queries to create a new clinical trial schema and table are located in zinc code:
   zinc/SQL_statement/clinical_trial.sql
   Please see below the sql_statements
3. Load raw data to the database tables:
   If you want to delete all the existing data, then use --wipe
   Otherwise it will load data incrementally to the existing data.
   source /nfs/soft/www/apps/zinc15/envs/dev/bin/activate
   python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_ct /nfs/db/trials/important/studies.txt 
   python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_condition /nfs/db/trials/important/browse_conditions.txt 
   python /nfs/soft/www/apps/zinc15/envs/dev/bin/zinc-manage utils clinical-trials load_intervention /nfs/db/trials/important/interventions.txt


 ------------------------   zinc/SQL_statement/clinical_trial.sql  ----------------------------------------
1. Creating the scheme and give the grants
 BEGIN;
 create schema clinical2;
 GRANT USAGE on schema clinical2 to root;
 GRANT USAGE on schema clinical2 to zincread;
 GRANT USAGE on schema clinical2 to zincfree;
 GRANT USAGE on schema clinical2 to test;
 GRANT USAGE on schema clinical2 to admin;
 GRANT USAGE on schema clinical2 to adminprivate;
2. Creating the tables and give grants
 BEGIN;
 create table clinical2.ctstatus (like clinical1.ctstatus including defaults including constraints including indexes);
 REVOKE ALL ON TABLE clinical2.ctstatus FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ctstatus TO root;
 GRANT SELECT ON TABLE clinical2.ctstatus TO zincread;
 GRANT SELECT ON TABLE clinical2.ctstatus TO zincfree;
 GRANT ALL ON TABLE clinical2.ctstatus TO test;
 GRANT ALL ON TABLE clinical2.ctstatus TO adminprivate;
 GRANT ALL ON TABLE clinical2.ctstatus TO admin;
 create table clinical2.ctphase (like clinical1.ctphase including defaults including constraints including indexes);
 REVOKE ALL ON TABLE clinical2.ctphase FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ctphase TO root;
 GRANT SELECT ON TABLE clinical2.ctphase TO zincread;
 GRANT SELECT ON TABLE clinical2.ctphase TO zincfree;
 GRANT ALL ON TABLE clinical2.ctphase TO test;
 GRANT ALL ON TABLE clinical2.ctphase TO adminprivate;
 GRANT ALL ON TABLE clinical2.ctphase TO admin;
 create table clinical2.ct2 (like clinical1.ct2 including defaults including constraints including indexes);
 alter table clinical2.ct2 add constraint ct2_ctphase_fk_fkey foreign key (ctphase_fk) references clinical2.ctphase(ctphase_id);
 alter table clinical2.ct2 add constraint ct2_ctstatus_fk_fkey foreign key (ctstatus_fk) references clinical2.ctstatus(ctstatus_id);
 alter table clinical2.ct2 add column changed_date date;
 REVOKE ALL ON TABLE clinical2.ct2 FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ct2 TO root;
 GRANT SELECT ON TABLE clinical2.ct2 TO zincread;
 GRANT SELECT ON TABLE clinical2.ct2 TO zincfree;
 GRANT ALL ON TABLE clinical2.ct2 TO test;
 GRANT ALL ON TABLE clinical2.ct2 TO adminprivate;
 GRANT ALL ON TABLE clinical2.ct2 TO admin;
 create table clinical2.ct2condclass (like clinical1.ct2condclass including defaults including constraints including indexes);
 REVOKE ALL ON TABLE clinical2.ct2condclass FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ct2condclass TO root;
 GRANT SELECT ON TABLE clinical2.ct2condclass TO zincread;
 GRANT SELECT ON TABLE clinical2.ct2condclass TO zincfree;
 GRANT ALL ON TABLE clinical2.ct2condclass TO test;
 GRANT ALL ON TABLE clinical2.ct2condclass TO adminprivate;
 GRANT ALL ON TABLE clinical2.ct2condclass TO admin;
 create table clinical2.ct2condition (like clinical1.ct2condition including defaults including constraints including indexes);
 alter table clinical2.ct2condition add constraint ct2condition_condclass_fk_fkey foreign key (condclass_fk) references clinical2.ct2condclass(ct2condclass_id);
 REVOKE ALL ON TABLE clinical2.ct2condition FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ct2condition TO root;
 GRANT SELECT ON TABLE clinical2.ct2condition TO zincread;
 GRANT SELECT ON TABLE clinical2.ct2condition TO zincfree;
 GRANT ALL ON TABLE clinical2.ct2condition TO test;
 GRANT ALL ON TABLE clinical2.ct2condition TO adminprivate;
 GRANT ALL ON TABLE clinical2.ct2condition TO admin;
 create table clinical2.ct2tocond (like clinical1.ct2tocond including defaults including constraints including indexes);
 alter table clinical2.ct2tocond add constraint ct2tocond_ct2condition_fk_fkey foreign key (ct2condition_fk) references clinical2.ct2condition(ct2condition_id);
 alter table clinical2.ct2tocond add constraint ct2tocond_ct2_fk_fkey foreign key (ct2_fk) references clinical2.ct2(ct2_id);
 REVOKE ALL ON TABLE clinical2.ct2tocond FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ct2tocond TO root;
 GRANT SELECT ON TABLE clinical2.ct2tocond TO zincread;
 GRANT SELECT ON TABLE clinical2.ct2tocond TO zincfree;
 GRANT ALL ON TABLE clinical2.ct2tocond TO test;
 GRANT ALL ON TABLE clinical2.ct2tocond TO adminprivate;
 GRANT ALL ON TABLE clinical2.ct2tocond TO admin;


 create table clinical2.ct2int (like clinical1.ct2int including defaults including constraints including indexes);
 alter tabel clinical2.ct2int drop column ct2_fk;
 REVOKE ALL ON TABLE clinical2.ct2int FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ct2int TO root;
 GRANT SELECT ON TABLE clinical2.ct2int TO zincread;
 GRANT SELECT ON TABLE clinical2.ct2int TO zincfree;
 GRANT ALL ON TABLE clinical2.ct2int TO test;
 GRANT ALL ON TABLE clinical2.ct2int TO adminprivate;
 GRANT ALL ON TABLE clinical2.ct2int TO admin;


 create table clinical2.ct2subint (like clinical1.ct2subint including defaults including constraints including indexes);
 alter table clinical2.ct2subint add constraint ct2subint_ct2int_fk_fkey foreign key (ct2int_fk) references clinical2.ct2int(ct2int_id);
 alter table clinical2.ct2subint add constraint ct2subint_sub_id_fk_fkey foreign key (sub_id_fk) references substance(sub_id) ON UPDATE CASCADE ON DELETE CASCADE DEFERRABLE INITIALLY DEFERRED;
 REVOKE ALL ON TABLE clinical2.ct2subint FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ct2subint TO root;
 GRANT SELECT ON TABLE clinical2.ct2subint TO zincread;
 GRANT SELECT ON TABLE clinical2.ct2subint TO zincfree;
 GRANT ALL ON TABLE clinical2.ct2subint TO test;
 GRANT ALL ON TABLE clinical2.ct2subint TO adminprivate;
 GRANT ALL ON TABLE clinical2.ct2subint TO admin;


 create table clinical2.ct2toint (ct2toint_id integer primary key serial not null,
                                ct2_fk integer references clinical2.ct2,
                                ct2int_fk integer references clinical2.ct2int)
 create sequence clinical2.ct2toint_ct2toint_seq;
 alter table clinical2.ct2toint alter column ct2toint_id set data type not null default nextval('clinical2.ct2toint_ct2toint_seq'::regclass);
 REVOKE ALL ON TABLE clinical2.ct2toint_ct2toint_seq FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO root;
 GRANT SELECT ON TABLE clinical2.ct2toint_ct2toint_seq TO zincread;
 GRANT SELECT ON TABLE clinical2.ct2toint_ct2toint_seq TO zincfree;
 GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO test;
 GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO adminprivate;
 GRANT ALL ON TABLE clinical2.ct2toint_ct2toint_seq TO admin;


 REVOKE ALL ON TABLE clinical2.ct2toint FROM PUBLIC;
 GRANT ALL ON TABLE clinical2.ct2toint TO root;
 GRANT SELECT ON TABLE clinical2.ct2toint TO zincread;
 GRANT SELECT ON TABLE clinical2.ct2toint TO zincfree;
 GRANT ALL ON TABLE clinical2.ct2toint TO test;
 GRANT ALL ON TABLE clinical2.ct2toint TO adminprivate;
 GRANT ALL ON TABLE clinical2.ct2toint TO admin;
3. Load data from existing tables
 insert into clinical2.ctstatus select * from clinical1.ctstatus;
 insert into clinical2.ctphase select * from clinical1.ctphase;
 insert into clinical2.ct2condclass select * from clinical1.ct2condclass;


4. Matching the name between subname.name and intervention.name
 create temp table subname
   as select sub_id_fk as sub_id_fk, who_name as name
      from catalog_item join catalog on (cat_id_fk=cat_id and short_name='chembl20')
                        join chembl20.molecule_dictionary as md on supplier_code=md.chembl_id
                        join chembl20.molecule_atc_classification as mac on md.molregno=mac.molregno
                        join chembl20.atc_classification as ac on mac.level5=ac.level5;
 insert into subname
   select cs.sub_id_fk, sy.synonym
   from catalog_substance as cs join synonym as s on cs.cat_content_fk = s.cat_content_fk
   where not exists (select 1 from subname as sn where sn.sub_id_fk = cs.sub_id_fk and sn.name = sy.synonym);
 alter table subname add column q tsquery;
 update table subname as s
  set q=plainto_tsquery(t.who_name)
  from subname as t
  where s.sub_id_fk=t.sub_id_fk and s.who_name=t.who_name;
 alter table clinical2.ct2int add column terms tsvector;
 update table clinical2.ct2int set term=to_tsvector('english', name)


 insert into clinical2.ct2subint (sub_id_fk, ct2int_fk)
   select distinct sub.sub_id_fk, int.ct2int_id
   from clinical2.ct2int as int join subname as sub on
                        int.terms@@sub.query;