Creating clinical name mappings: Difference between revisions
Jump to navigation
Jump to search
(asdf) |
m (Updating) |
||
(2 intermediate revisions by one other user not shown) | |||
Line 1: | Line 1: | ||
This is part of the curation of clinical trial data. | This is part of the curation of clinical trial data. | ||
= Day One = | |||
<pre> | <pre> | ||
create temp table subname as select sub_id_fk as sub_id_fk, who_name as name from catalog_item join catalog on (cat_id_fk=cat_id and short_name='chembl20') join chembl20.molecule_dictionary as md on supplier_code=md.chembl_id join chembl20.molecule_atc_classification as mac on md.molregno=mac.molregno join chembl20.atc_classification as ac on mac.level5=ac.level5; | create temp table subname as select sub_id_fk as sub_id_fk, who_name as name from catalog_item join catalog on (cat_id_fk=cat_id and short_name='chembl20') join chembl20.molecule_dictionary as md on supplier_code=md.chembl_id join chembl20.molecule_atc_classification as mac on md.molregno=mac.molregno join chembl20.atc_classification as ac on mac.level5=ac.level5; | ||
Line 24: | Line 25: | ||
select * from ctinter join subname on terms@@q where sub_id_fk=53; | select * from ctinter join subname on terms@@q where sub_id_fk=53; | ||
select * from subname join ctinter on q @@ terms where to_tsvector('english', 'biotin') @@ q ; | select * from subname join ctinter on q @@ terms where to_tsvector('english', 'biotin') @@ q ; | ||
</pre> | |||
= Day Two = | |||
<pre> | |||
Keeping track of a bit more here: | |||
(Assuming ctstatus is already loaded) | |||
create temporary table cttemp (nct varchar, title varchar, start_date varchar, phase varchar, status varchar); | |||
\copy cttemp from /nfs/work/teague/Projects/trials/extracted/trials.txt | |||
update cttemp set start_date = null where start_date=''; | |||
insert into ct2 (ct_code, description, start_date, ctphase_fk, ctstatus_fk) select nct, title, start_date::date, ctphase_id, ctstatus_id from cttemp join ctphase on phase=ctphase.name join ctstatus on status=ctstatus.name; | |||
create temporary table ct2inttemp (code varchar, kind varchar, name varchar, description varchar); | |||
\copy ctinttemp from /nfs/work/teague/Projects/trials/extracted/trial_drugs.txt | |||
insert into clinical1.ct2int (ct2_fk, name) select ct2_id, name from ctinttemp join clinical1.ct2 on code=ct_code; | |||
create index ix_ct2int_name on clinical1.ct2int (name); | |||
create index ix_ct2int_ct2_fk on clinical1.ct2int (ct2_fk); | |||
alter table ctinttemp add column terms tsvector; | |||
update ctinttemp set terms=to_tsvector('english', name); | |||
create index ix_ct2inttemp_terms on ctinttemp using gin (terms); | |||
vacuum analyze verbose ct2inttemp; | |||
select sub_id_fk, ct.ct2int_id, ctemp.name as intname, subname.name as subname into temp table ctsubinttemp from subname join ct2inttemp as ctemp on ctemp.terms @@ subname.query join (select ct_code as code, ct2int.name as name, ct2int_id from ct2 join ct2int on ct2_fk=ct2_id) as ct on (ct.name=ctemp.name and ct.code=ctemp.code) where ctemp.kind in ('Drug', 'Nutritional Suppliement'); | |||
-- insert into clinical1.ct2subint (sub_id_fk, ct2int_fk) select distinct sub_id_fk, ct2int_id from ctinttemp as ct join clinical1.ct2 on ct.code=ct_code join clinical1.ct2int on ct2_id=ct2_fk and clinical1.ct2int.name=ct.name join subname on ct.terms@@subname.query; | |||
-- insert into clinical1.ct2subint (sub_id_fk, ct2int_fk) select distinct sub_id_fk, ct2int_id from ctinttemp as ct join clinical1.ct2 on ct.code=ct_code join clinical1.ct2int as cti on ct2_id=cti.ct2_fk and cti.name=ct.name join subname on ct.terms@@subname.query where not exists(select 1 from clinical1.ct2subint as e where e.sub_id_fk=subname.sub_id_fk and e.ct2int_fk=cti.ct2int_id); | |||
</pre> | |||
= Day Three = | |||
<pre> | |||
create temporary table ctcondtmp (code varchar, name varchar, slug varchar); | |||
\copy ctcondtemp from /nfs/work/teague/Projects/trials/extracted/trial_conditions.txt | |||
insert into clinical1.ct2condition (short_name, name) select distinct on (slug) slug, name from ctcondtemp order by slug, name; | |||
insert into clinical1.ct2tocond (ct2_fk, ct2condition_fk) select ct2_id, ct2condition_id from ctcondtemp join clinical1.ct2 on code=ct_code join clinical1.ct2condition on slug=short_name; | |||
alter table ctcondtemp add column terms tsvector; | |||
update ctcondtemp set terms=to_tsvector('english', name); | |||
create index ix_ctcondtemp_terms on ctcondtemp using gist(terms); | |||
vacuum analyze verbose ctcondtemp; | |||
insert into clinical1.ct2condclass (name, description) values ('cancer', 'All classes of cancer') returning ct2condclass_id; | |||
# returns 1 | |||
update clinical1.ct2condition set condclass_fk=1 where ct2condition_id in (select ct2condition_id from ctcondtemp join clinical1.ct2condition on slug=short_name where terms @@ to_tsquery('cancer | neoplasm | oncology | leukemia | melanoma | carcinoma ')); | |||
</pre> | </pre> | ||
[[Category:Curator]] | [[Category:Curator]] |
Latest revision as of 22:03, 13 August 2015
This is part of the curation of clinical trial data.
Day One
create temp table subname as select sub_id_fk as sub_id_fk, who_name as name from catalog_item join catalog on (cat_id_fk=cat_id and short_name='chembl20') join chembl20.molecule_dictionary as md on supplier_code=md.chembl_id join chembl20.molecule_atc_classification as mac on md.molregno=mac.molregno join chembl20.atc_classification as ac on mac.level5=ac.level5; insert into subname select cs.sub_id_fk, sy.synonym from catalog_substance as cs join synonym as s on cs.cat_content_fk = s.cat_content_fk where not exists (select 1 from subname as sn where sn.sub_id_fk = cs.sub_id_fk and sn.name = sy.synonym); alter table subname add column q tsquery; update table subname as s set q=plainto_tsquery(t.who_name) from subname as t where s.sub_id_fk=t.sub_id_fk and s.who_name=t.who_name; \copy ctinter from druginfo.txt alter table ctinter add column terms tsvector; update ctinter as x set terms = to_tsvector('english', y.name) from ctinter as y where x.id=y.id; create index subanme_idx on subname using gist(q); create index ctinter_idx on ctinter using gin(terms); vacuum analyze verbose ctinter; vacuum analyze verbose subname; select * from ctinter join subname on terms@@q where sub_id_fk=53; select * from subname join ctinter on q @@ terms where to_tsvector('english', 'biotin') @@ q ;
Day Two
Keeping track of a bit more here: (Assuming ctstatus is already loaded) create temporary table cttemp (nct varchar, title varchar, start_date varchar, phase varchar, status varchar); \copy cttemp from /nfs/work/teague/Projects/trials/extracted/trials.txt update cttemp set start_date = null where start_date=''; insert into ct2 (ct_code, description, start_date, ctphase_fk, ctstatus_fk) select nct, title, start_date::date, ctphase_id, ctstatus_id from cttemp join ctphase on phase=ctphase.name join ctstatus on status=ctstatus.name; create temporary table ct2inttemp (code varchar, kind varchar, name varchar, description varchar); \copy ctinttemp from /nfs/work/teague/Projects/trials/extracted/trial_drugs.txt insert into clinical1.ct2int (ct2_fk, name) select ct2_id, name from ctinttemp join clinical1.ct2 on code=ct_code; create index ix_ct2int_name on clinical1.ct2int (name); create index ix_ct2int_ct2_fk on clinical1.ct2int (ct2_fk); alter table ctinttemp add column terms tsvector; update ctinttemp set terms=to_tsvector('english', name); create index ix_ct2inttemp_terms on ctinttemp using gin (terms); vacuum analyze verbose ct2inttemp; select sub_id_fk, ct.ct2int_id, ctemp.name as intname, subname.name as subname into temp table ctsubinttemp from subname join ct2inttemp as ctemp on ctemp.terms @@ subname.query join (select ct_code as code, ct2int.name as name, ct2int_id from ct2 join ct2int on ct2_fk=ct2_id) as ct on (ct.name=ctemp.name and ct.code=ctemp.code) where ctemp.kind in ('Drug', 'Nutritional Suppliement'); -- insert into clinical1.ct2subint (sub_id_fk, ct2int_fk) select distinct sub_id_fk, ct2int_id from ctinttemp as ct join clinical1.ct2 on ct.code=ct_code join clinical1.ct2int on ct2_id=ct2_fk and clinical1.ct2int.name=ct.name join subname on ct.terms@@subname.query; -- insert into clinical1.ct2subint (sub_id_fk, ct2int_fk) select distinct sub_id_fk, ct2int_id from ctinttemp as ct join clinical1.ct2 on ct.code=ct_code join clinical1.ct2int as cti on ct2_id=cti.ct2_fk and cti.name=ct.name join subname on ct.terms@@subname.query where not exists(select 1 from clinical1.ct2subint as e where e.sub_id_fk=subname.sub_id_fk and e.ct2int_fk=cti.ct2int_id);
Day Three
create temporary table ctcondtmp (code varchar, name varchar, slug varchar); \copy ctcondtemp from /nfs/work/teague/Projects/trials/extracted/trial_conditions.txt insert into clinical1.ct2condition (short_name, name) select distinct on (slug) slug, name from ctcondtemp order by slug, name; insert into clinical1.ct2tocond (ct2_fk, ct2condition_fk) select ct2_id, ct2condition_id from ctcondtemp join clinical1.ct2 on code=ct_code join clinical1.ct2condition on slug=short_name; alter table ctcondtemp add column terms tsvector; update ctcondtemp set terms=to_tsvector('english', name); create index ix_ctcondtemp_terms on ctcondtemp using gist(terms); vacuum analyze verbose ctcondtemp; insert into clinical1.ct2condclass (name, description) values ('cancer', 'All classes of cancer') returning ct2condclass_id; # returns 1 update clinical1.ct2condition set condclass_fk=1 where ct2condition_id in (select ct2condition_id from ctcondtemp join clinical1.ct2condition on slug=short_name where terms @@ to_tsquery('cancer | neoplasm | oncology | leukemia | melanoma | carcinoma '));