Creating clinical name mappings

From DISI
Jump to navigation Jump to search

This is part of the curation of clinical trial data.

Day One

create temp table subname as select sub_id_fk as sub_id_fk, who_name as name from catalog_item join catalog on (cat_id_fk=cat_id and short_name='chembl20') join chembl20.molecule_dictionary as md on supplier_code=md.chembl_id join chembl20.molecule_atc_classification as mac on md.molregno=mac.molregno join chembl20.atc_classification as ac on mac.level5=ac.level5;

insert into subname select cs.sub_id_fk, sy.synonym from catalog_substance as cs join synonym as s on cs.cat_content_fk = s.cat_content_fk where not exists (select 1 from subname as sn where sn.sub_id_fk = cs.sub_id_fk and sn.name = sy.synonym);

alter table subname add column q tsquery;

update table subname as s set q=plainto_tsquery(t.who_name) from subname as t where s.sub_id_fk=t.sub_id_fk and s.who_name=t.who_name;

\copy ctinter from druginfo.txt

alter table  ctinter add column terms tsvector;

update ctinter as x set terms = to_tsvector('english', y.name) from ctinter as y where x.id=y.id;

create index subanme_idx on subname using gist(q);
create index ctinter_idx on ctinter using gin(terms);

vacuum analyze verbose ctinter;
vacuum analyze verbose subname;

select * from ctinter join subname on terms@@q where sub_id_fk=53;
select * from subname join ctinter on q @@ terms where to_tsvector('english', 'biotin') @@ q ;

Day Two


Keeping track of a bit more here:

(Assuming ctstatus is already loaded)

create temporary table cttemp (nct varchar, title varchar, start_date varchar, phase varchar, status varchar);

\copy cttemp from /nfs/work/teague/Projects/trials/extracted/trials.txt

update cttemp set start_date = null where start_date='';

insert into ct2 (ct_code, description, start_date, ctphase_fk, ctstatus_fk) select nct, title, start_date::date, ctphase_id, ctstatus_id from cttemp join ctphase on phase=ctphase.name join ctstatus on status=ctstatus.name;

create temporary table ct2inttemp (code varchar, kind varchar, name varchar, description varchar);

\copy ctinttemp from /nfs/work/teague/Projects/trials/extracted/trial_drugs.txt

insert into clinical1.ct2int  (ct2_fk, name) select ct2_id, name from ctinttemp join clinical1.ct2 on code=ct_code;

create index ix_ct2int_name on clinical1.ct2int (name);
create index ix_ct2int_ct2_fk on clinical1.ct2int (ct2_fk);

alter table ctinttemp add column terms tsvector;
update ctinttemp set terms=to_tsvector('english', name); 

create index ix_ct2inttemp_terms on ctinttemp using gin (terms);

vacuum analyze verbose ct2inttemp;

select sub_id_fk, ct.ct2int_id, ctemp.name as intname, subname.name as subname into temp table ctsubinttemp from subname join ct2inttemp as ctemp on ctemp.terms @@ subname.query join (select ct_code as code, ct2int.name as name, ct2int_id from ct2 join ct2int on ct2_fk=ct2_id) as ct on (ct.name=ctemp.name and ct.code=ctemp.code) where ctemp.kind in ('Drug', 'Nutritional Suppliement');

-- insert into clinical1.ct2subint (sub_id_fk, ct2int_fk) select distinct sub_id_fk, ct2int_id from ctinttemp as ct join clinical1.ct2 on ct.code=ct_code join clinical1.ct2int on ct2_id=ct2_fk and clinical1.ct2int.name=ct.name join subname on ct.terms@@subname.query;

-- insert into clinical1.ct2subint (sub_id_fk, ct2int_fk) select distinct sub_id_fk, ct2int_id from ctinttemp as ct join clinical1.ct2 on ct.code=ct_code join clinical1.ct2int as cti on ct2_id=cti.ct2_fk and cti.name=ct.name join subname on ct.terms@@subname.query where not exists(select 1 from clinical1.ct2subint as e where e.sub_id_fk=subname.sub_id_fk and e.ct2int_fk=cti.ct2int_id);

Day Three

create temporary table ctcondtmp (code varchar, name varchar, slug varchar);

\copy ctcondtemp from /nfs/work/teague/Projects/trials/extracted/trial_conditions.txt

insert into clinical1.ct2condition (short_name, name) select distinct on (slug) slug, name from ctcondtemp order by slug, name;

insert into clinical1.ct2tocond (ct2_fk, ct2condition_fk) select ct2_id, ct2condition_id from ctcondtemp join clinical1.ct2 on code=ct_code join clinical1.ct2condition on slug=short_name;

alter table ctcondtemp add column terms tsvector;
update ctcondtemp set terms=to_tsvector('english', name);
create index ix_ctcondtemp_terms on ctcondtemp using gist(terms);
vacuum analyze verbose ctcondtemp;

insert into clinical1.ct2condclass (name, description) values ('cancer', 'All classes of cancer') returning ct2condclass_id;

# returns 1

update clinical1.ct2condition set condclass_fk=1 where ct2condition_id in (select ct2condition_id from ctcondtemp join clinical1.ct2condition on slug=short_name where terms @@ to_tsquery('cancer | neoplasm | oncology | leukemia | melanoma | carcinoma '));