Creating clinical name mappings

From DISI
Revision as of 21:09, 12 August 2015 by Frodo (talk | contribs) (asdf)
Jump to navigation Jump to search

This is part of the curation of clinical trial data.

Day One

create temp table subname as select sub_id_fk as sub_id_fk, who_name as name from catalog_item join catalog on (cat_id_fk=cat_id and short_name='chembl20') join chembl20.molecule_dictionary as md on supplier_code=md.chembl_id join chembl20.molecule_atc_classification as mac on md.molregno=mac.molregno join chembl20.atc_classification as ac on mac.level5=ac.level5;

insert into subname select cs.sub_id_fk, sy.synonym from catalog_substance as cs join synonym as s on cs.cat_content_fk = s.cat_content_fk where not exists (select 1 from subname as sn where sn.sub_id_fk = cs.sub_id_fk and sn.name = sy.synonym);

alter table subname add column q tsquery;

update table subname as s set q=plainto_tsquery(t.who_name) from subname as t where s.sub_id_fk=t.sub_id_fk and s.who_name=t.who_name;

\copy ctinter from druginfo.txt

alter table  ctinter add column terms tsvector;

update ctinter as x set terms = to_tsvector('english', y.name) from ctinter as y where x.id=y.id;

create index subanme_idx on subname using gist(q);
create index ctinter_idx on ctinter using gin(terms);

vacuum analyze verbose ctinter;
vacuum analyze verbose subname;

select * from ctinter join subname on terms@@q where sub_id_fk=53;
select * from subname join ctinter on q @@ terms where to_tsvector('english', 'biotin') @@ q ;

Day Two


Keeping track of a bit more here:

(Assuming ctstatus is already loaded)

create temporary table cttemp (nct varchar, status varchar, start_date varchar, title varchar);

\copy cttemp from /nfs/work/teague/Projects/trials/extracted/trials.txt

update cttemp set "date"=NULL where "date"='';

insert into clinical1.ct2 (ct_code, description, ctstatus_fk, start_date) select nct as code, status as description, ctstatus_id as ctstatus_fk , cast(start_date as date) as start_date from cttemp join clinical1.ctstatus on title=name;

create temporary table ctinttemp (code varchar, kind varchar, name varchar, description varchar);

\copy ctinttemp from /nfs/work/teague/Projects/trials/extracted/trial_drugs.txt

insert into clinical1.ct2int  (ct2_fk, name) select ct2_id, name from ctinttemp join clinical1.ct2 on code=ct_code;

create index ix_ct2int_name on clinical1.ct2int (name);
create index ix_ct2int_ct2_fk on clinical1.ct2int (ct2_fk);

alter table ctinttemp add column terms tsvector;
update ctinttemp set terms=to_tsvector('english', name), query=plainto_tsquery(name); 

create index ix_ctinttemp_terms on ctinttemp using gist(terms);

vacuum analyze verbose ctinttemp;

insert into clinical1.ct2subint (sub_id_fk, ct2int_fk) select distinct sub_id_fk, ct2int_id from ctinttemp as ct join clinical1.ct2 on ct.code=ct_code join clinical1.ct2int on ct2_id=ct2_fk and clinical1.ct2int.name=ct.name join subname on ct.terms@@subname.query;

insert into clinical1.ct2subint (sub_id_fk, ct2int_fk) select distinct sub_id_fk, ct2int_id from ctinttemp as ct join clinical1.ct2 on ct.code=ct_code join clinical1.ct2int as cti on ct2_id=cti.ct2_fk and cti.name=ct.name join subname on ct.terms@@subname.query where not exists(select 1 from clinical1.ct2subint as e where e.sub_id_fk=subname.sub_id_fk and e.ct2int_fk=cti.ct2int_id);