forked from OHDSI/Vocabulary-v5.0
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'master' of https://github.com/OHDSI/Vocabulary-v5.0
# Conflicts: # Gemscript/load_stage.sql
- Loading branch information
Showing
1 changed file
with
368 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,368 @@ | ||
/************************************************************************** | ||
* Copyright 2016 Observational Health Data Sciences and Informatics (OHDSI) | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
* | ||
* Authors: Christian Reich, Dmitry Dymshyts, Anna Ostropolets | ||
* Date: 2016 | ||
**************************************************************************/ | ||
--this algorithm shows you concept_code and an error type related to this code, | ||
--for ds_stage it gets drug_concept_code | ||
--for relationship_to_concept it gives concept_code_1 | ||
--for internal_relationship it gives concpept_code_1 | ||
--for drug_concept_stage it gives concept_code | ||
-- 1. relationship_to_concept | ||
--incorrect mapping to concept | ||
--different classes in concept_code_1 and concept_id_2 | ||
select error_type, count (1) from ( | ||
select a.concept_code, 'different classes in concept_code_1 and concept_id_2' as error_type from relationship_to_concept r | ||
join drug_concept_stage a on a.concept_code= r.concept_code_1 | ||
join devv5.concept c on c.concept_id = r.concept_id_2 and c.vocabulary_id = 'RxNorm' | ||
where a.concept_class_id != c.concept_class_id | ||
union | ||
--concept_id's that don't exist | ||
select a.concept_code, 'concept_id_2 exists but doesnt belong to any concept' from relationship_to_concept r | ||
join drug_concept_stage a on a.concept_code= r.concept_code_1 | ||
left join devv5.concept c on c.concept_id = r.concept_id_2 | ||
where c.concept_name is null | ||
union | ||
-- 2. ds_stage | ||
--wrong units | ||
select distinct drug_concept_code, 'unit doesnt exist in concept_table' from ds_stage where | ||
( amount_unit not in (select concept_code from drug_concept_stage where concept_class_id ='Unit') | ||
or numerator_unit not in (select concept_code from drug_concept_stage where concept_class_id ='Unit') | ||
or denominator_unit not in (select concept_code from drug_concept_stage where concept_class_id ='Unit') | ||
) | ||
union | ||
--0 in ds_stage values | ||
select drug_concept_code,'0 in values' | ||
from ds_stage where 0 in (numerator_value,amount_value,denominator_value) | ||
|
||
union | ||
-- drug codes are not exist in a drug_concept_stage but present in ds_stage | ||
select distinct s.drug_concept_code, 'ds_stage has drug_codes absent in drug_concept_stage' from ds_stage s | ||
left join drug_concept_stage a on a.concept_code = s.drug_concept_code and a.concept_class_id='Drug Product' | ||
left join drug_concept_stage b on b.concept_code = s.INGREDIENT_CONCEPT_CODE and b.concept_class_id = 'Ingredient' | ||
where a.concept_code is null | ||
union | ||
-- ingredient codes not exist in a drug_concept_stage but present in ds_stage | ||
select distinct s.drug_concept_code, 'ds_stage has ingredient_codes absent in drug_concept_stage' from ds_stage s | ||
left join drug_concept_stage a on a.concept_code = s.drug_concept_code and a.concept_class_id='Drug Product' | ||
left join drug_concept_stage b on b.concept_code = s.INGREDIENT_CONCEPT_CODE and b.concept_class_id = 'Ingredient' | ||
where b.concept_code is null | ||
union | ||
--impossible entries combinations in ds_stage table | ||
select distinct s.drug_concept_code, 'impossible combination of values and units in ds_stage' from ds_stage s where AMOUNT_VALUE is not null and AMOUNT_UNIT is null or | ||
(denominator_VALUE is not null and denominator_UNIT is null) or (NUMERATOR_VALUE is not null and denominator_UNIT is null and DENOMINATOR_VALUE is null and NUMERATOR_UNIT !='%') | ||
or (AMOUNT_VALUE is null and AMOUNT_UNIT is not null) | ||
union | ||
--Different DENOMINATOR_VALUE or DENOMINATOR_unit in the same drug | ||
select distinct a.drug_concept_code, 'Different DENOMINATOR_VALUE or DENOMINATOR_unit in the same drug' | ||
from ds_stage a join ds_stage b on a.drug_concept_code = b.drug_concept_code | ||
and (a.DENOMINATOR_VALUE is null and b.DENOMINATOR_VALUE is not null | ||
or a.DENOMINATOR_VALUE != b.DENOMINATOR_VALUE | ||
or a.DENOMINATOR_unit != b.DENOMINATOR_unit) | ||
union | ||
--ds_stage dublicates | ||
select drug_concept_code, 'ds_stage dublicates' from ( | ||
select drug_concept_code, ingredient_concept_code from ds_stage group by drug_concept_code, ingredient_concept_code having count (1) > 1 | ||
) | ||
union | ||
select drug_concept_code,'missing unit' | ||
from ds_stage where (numerator_value is not null and numerator_unit is null) | ||
OR (denominator_value is not null and denominator_unit is null) | ||
OR (amount_value is not null and amount_unit is null) | ||
union | ||
--3. internal_relationship_dublicates | ||
select concept_code_1, 'internal_relationship_dublicates' from ( | ||
select concept_code_1, concept_code_2 from internal_relationship_stage group by concept_code_1, concept_code_2 having count (1) > 1 | ||
) | ||
union | ||
--4.drug_concept_stage | ||
--duplicates in drug_concept_stage table | ||
select distinct concept_code,'Duplicate concept code' from drug_concept_stage | ||
where concept_code in ( | ||
select concept_code from drug_concept_stage group by concept_code having count(1)>1) | ||
union | ||
--same names for different drug classes | ||
select concept_code, 'same names for basic drug classes' from drug_concept_stage where trim(lower(concept_name)) in ( | ||
select trim(lower(concept_name)) as n from drug_concept_stage where concept_class_id in ('Brand Name', 'Dose Form', 'Unit', 'Ingredient', 'Supplier') and standard_concept='S' group by trim(lower(concept_name)) having count(8)>1) | ||
union | ||
--short names but not a Unit | ||
select concept_code, 'short names but not a Unit' from drug_concept_stage where length(concept_name)<3 and concept_class_id not in ('Unit') | ||
union | ||
--concept_name is null | ||
select concept_code,'concept_name is null' from drug_concept_stage where concept_name is null | ||
union | ||
--relationship_to_concept | ||
--relationship_to_concept concept_code_1_2 duplicates | ||
select concept_code_1, 'relationship_to_concept concept_code_1_2 duplicates' from ( | ||
select concept_code_1, concept_id_2 from relationship_to_concept group by concept_code_1, concept_id_2 having count (1) >1) | ||
union | ||
--relationship_to_concept concept_code_1_precedence duplicates | ||
select concept_code_1, 'relationship_to_concept concept_code_1_2 duplicates' from ( | ||
select concept_code_1, precedence from relationship_to_concept group by concept_code_1, precedence having count (1) >1) | ||
union | ||
--Brand Name doesnt relate to any drug | ||
select distinct a.concept_code, 'Brand Name doesnt relate to any drug' from drug_concept_stage a left join internal_relationship_stage b on a.concept_code = b.concept_code_2 | ||
where a.concept_class_id= 'Brand Name' and b.concept_code_1 is null | ||
union | ||
--Dose Form doesnt relate to any drug | ||
select distinct a.concept_code, 'Dose Form doesnt relate to any drug' from drug_concept_stage a left join internal_relationship_stage b on a.concept_code = b.concept_code_2 | ||
where a.concept_class_id= 'Dose Form' and b.concept_code_1 is null | ||
union | ||
--duplicates in ds_stage | ||
--Concept_code_1 - Precedence duplicates | ||
select concept_code_1, 'Concept_code_1 - precedence duplicates' from ( | ||
select concept_code_1,precedence from relationship_to_concept group by concept_code_1,precedence having count (1) >1 ) | ||
union | ||
----Concept_code_1 - Ingredient duplicates | ||
select concept_code_1, 'Concept_code_1 - precedence duplicates' from ( | ||
select concept_code_1,concept_id_2 from relationship_to_concept group by concept_code_1,concept_id_2 having count (1) >1 ) | ||
union | ||
--Unit without mapping | ||
select CONCEPT_CODE, 'Unit without mapping' from | ||
drug_concept_Stage a left join relationship_to_concept b on a.concept_code = b.concept_code_1 | ||
where concept_class_id in('Unit') and b.concept_code_1 is null | ||
union | ||
--Dose Form without mapping | ||
select CONCEPT_CODE, 'Dose Form without mapping' from | ||
drug_concept_Stage a | ||
left join relationship_to_concept b on a.concept_code = b.concept_code_1 | ||
where concept_class_id in('Dose Form') and b.concept_code_1 is null | ||
union | ||
--duplicates will be present in drug_concept_stage, unable to summarize values | ||
select distinct a.drug_concept_code, 'concept overlaps with other one by target concept, please look also onto rigth sight of query result' from ( | ||
select distinct a.amount_unit, a.numerator_unit ,cs.concept_code, cs.concept_name as canada_name, rc.concept_name as RxName , a.drug_concept_code from ds_stage a join relationship_to_concept b on a.ingredient_concept_code = b.concept_code_1 | ||
join drug_Concept_stage cs on cs.concept_code = a.ingredient_concept_code | ||
join devv5.concept rc on rc.concept_id = b.concept_id_2 | ||
join drug_Concept_stage rd on rd.concept_code = a.drug_concept_code | ||
join ( | ||
select a.drug_concept_code, b.concept_id_2 from ds_stage a join relationship_to_concept b on a.ingredient_concept_code = b.concept_code_1 | ||
group by a.drug_concept_code, b.concept_id_2 having count (1) > 1) c | ||
on c.DRUG_CONCEPT_CODE= a.DRUG_CONCEPT_CODE and c.CONCEPT_ID_2 = b.CONCEPT_ID_2 | ||
where precedence = 1) a | ||
join | ||
( | ||
select distinct a.amount_unit, a.numerator_unit , cs.concept_name as canada_name, rc.concept_name as RxName,a.drug_concept_code from ds_stage a join relationship_to_concept b on a.ingredient_concept_code = b.concept_code_1 | ||
join drug_Concept_stage cs on cs.concept_code = a.ingredient_concept_code | ||
join devv5.concept rc on rc.concept_id = b.concept_id_2 | ||
join drug_Concept_stage rd on rd.concept_code = a.drug_concept_code | ||
join ( | ||
select a.drug_concept_code, b.concept_id_2 from ds_stage a join relationship_to_concept b on a.ingredient_concept_code = b.concept_code_1 | ||
group by a.drug_concept_code, b.concept_id_2 having count (1) > 1) c | ||
on c.DRUG_CONCEPT_CODE= a.DRUG_CONCEPT_CODE and c.CONCEPT_ID_2 = b.CONCEPT_ID_2 | ||
where precedence = 1) b on a.RxName = b.RxName and a.drug_concept_code = b.drug_concept_code and (a.AMOUNT_UNIT !=b.amount_unit or a.NUMERATOR_UNIT != b.NUMERATOR_UNIT or a.NUMERATOR_UNIT is null and b.NUMERATOR_UNIT is not null | ||
or a.AMOUNT_UNIT is null and b.amount_unit is not null) | ||
union | ||
--Improper valid_end_date | ||
select concept_code, 'Improper valid_end_date' from drug_concept_stage where concept_code not in ( | ||
select concept_code from drug_concept_stage | ||
where valid_end_date <=SYSDATE or valid_end_date = to_date ('2099-12-31', 'YYYY-MM-DD') ) | ||
union | ||
--Improper valid_start_date | ||
select concept_code, 'Improper valid_start_date' from drug_concept_stage where valid_start_date > SYSDATE | ||
union | ||
--Wrong vocabulary mapping | ||
select concept_code_1,'Wrong vocabulary mapping' from relationship_to_concept a | ||
join devv5.concept b on a.concept_id_2=b.concept_id | ||
where b.VOCABULARY_ID not in ('ATC','UCUM','RxNorm','RxNorm Extension') | ||
union | ||
--"<=0" in ds_stage values | ||
select drug_concept_code,'0 in values' | ||
from ds_stage where amount_value<=0 or denominator_value<=0 or numerator_value<=0 | ||
union | ||
--pc_stage issues | ||
--pc_stage duplicates | ||
select PACK_CONCEPT_CODE, 'pc_stage duplicates' from ( | ||
select PACK_CONCEPT_CODE, DRUG_CONCEPT_CODE from pc_stage group by DRUG_CONCEPT_CODE, PACK_CONCEPT_CODE having count (1) > 1 ) | ||
union | ||
--non drug as a pack component | ||
select DRUG_CONCEPT_CODE, 'non drug as a pack component' from pc_stage | ||
join drug_concept_stage on DRUG_CONCEPT_CODE = concept_code and concept_class_id !='Drug Product' | ||
union | ||
--wrong drug classes | ||
select concept_code, 'wrong drug classes' from drug_concept_stage where concept_class_id not in ('Ingredient', 'Unit', 'Drug Product', 'Dose Form', 'Supplier', 'Brand Name', 'Device') | ||
union | ||
--wrong domains | ||
select concept_code, 'wrong domain_id' from drug_concept_stage where domain_id not in ('Drug', 'Device') | ||
union | ||
--wrong dosages ,> 1000 | ||
select drug_concept_code, 'wrong dosages > 1000' from ds_stage | ||
|
||
where ( | ||
lower (numerator_unit) in ('mg') | ||
and lower (denominator_unit) in ('ml','g') | ||
or | ||
lower (numerator_unit) in ('g') | ||
and lower (denominator_unit) in ('l') | ||
) | ||
and numerator_value / denominator_value > 1000 | ||
union | ||
select drug_concept_code,'mg/mg >1' from ds_stage where numerator_unit='mg' and denominator_unit='mg' and (numerator_value/denominator_value>1 | ||
or (numerator_value>1 and denominator_vALUE IS NULL)) | ||
union | ||
--wrong dosages ,> 1 | ||
select drug_concept_code, 'wrong dosages > 1' from ds_stage | ||
|
||
where lower (numerator_unit) in ('g') | ||
and lower (denominator_unit) in ('ml') | ||
|
||
and numerator_value / denominator_value > 1 | ||
union | ||
--% in ds_stage | ||
select drug_concept_code, '% in ds_stage' from ds_stage | ||
where numerator_unit='%' or amount_unit ='%' or denominator_unit ='%' | ||
union | ||
-- as we don't have the mapping all the decives should be standard | ||
--nobody cares here in Gemscript | ||
--select concept_code , 'non-standard devices' from drug_concept_stage where domain_id = 'Device' and standard_concept is null | ||
--union | ||
--several attributes but should be the only one | ||
select concept_code_1, 'several attributes but should be the only one' from ( | ||
select concept_code_1,b.concept_class_id from internal_relationship_stage a | ||
join drug_concept_stage b on concept_code = concept_code_2 | ||
where b.concept_class_id in ('Supplier', 'Dose Form', 'Brand Name') | ||
group by concept_code_1,b.concept_class_id having count (1) >1 | ||
) | ||
union | ||
--replacement mappings to several concepts | ||
select concept_code_1, 'several attributes but should be the only one' from ( | ||
select concept_code_1,b.concept_class_id from internal_relationship_stage a | ||
join drug_concept_stage z on z.concept_code = concept_code_1 | ||
join drug_concept_stage b on b.concept_code = concept_code_2 | ||
where b.concept_class_id = z.concept_class_id | ||
group by concept_code_1,b.concept_class_id having count (1) >1 | ||
) | ||
union | ||
--sequence intersection | ||
select a.concept_code, 'sequence intersection' from drug_concept_stage a | ||
join concept b on a.concept_code = b.concept_code | ||
where a.concept_code like 'OMOP%' | ||
union | ||
--invalid_concept_id_2 | ||
select concept_code_1,'invalid_concept_id_2' from relationship_to_concept | ||
join concept on concept_id = concept_id_2 | ||
where invalid_reason is not null | ||
union | ||
--map to non-stand_ingredient | ||
select concept_code_1,'map to non-stand_ingredient' from relationship_to_concept | ||
join drug_concept_stage s on s.concept_code = concept_code_1 | ||
join concept c on c.concept_id = concept_id_2 | ||
where c.standard_concept is null and s.concept_class_id = 'Ingredient' | ||
union | ||
--map to unit that doesn't exist in RxNorm | ||
select a.concept_code_1,'map to unit that doesn''t exist in RxNorm' from relationship_to_concept a | ||
join drug_concept_stage b on concept_code_1=concept_code | ||
join concept c on concept_id_2=c.concept_id | ||
where b.concept_class_id='Unit' and concept_id_2 not in ( | ||
select distinct nvl(AMOUNT_UNIT_CONCEPT_ID,NUMERATOR_UNIT_CONCEPT_ID) from drug_strength a | ||
join concept b on drug_concept_id=concept_id and vocabulary_id='RxNorm' | ||
where nvl(AMOUNT_UNIT_CONCEPT_ID,NUMERATOR_UNIT_CONCEPT_ID) is not null | ||
union select distinct DENOMINATOR_UNIT_CONCEPT_ID from drug_strength a | ||
join concept b on drug_concept_id=concept_id and vocabulary_id='RxNorm' | ||
where DENOMINATOR_UNIT_CONCEPT_ID is not null) | ||
union | ||
select concept_code_1,'Empty conversion factor' | ||
from relationship_to_concept a join drug_concept_stage b on concept_code_1=concept_code and concept_class_id='Unit' | ||
where conversion_factor is null | ||
union | ||
--replacement with invalid concept | ||
select concept_code_1, 'replacement with invalid concept' from internal_relationship_stage | ||
join drug_concept_stage on concept_code = concept_code_2 | ||
where invalid_reason is not null | ||
union | ||
--standard but invalid concept | ||
select concept_code,'standard but invalid concept' from drug_concept_stage where standard_concept='S' and invalid_reason is not null | ||
union | ||
--standard ingredients have replacemt mapping | ||
select concept_code,'standard ingredients have replacemt mapping' from drug_concept_stage | ||
join internal_relationship_stage on concept_code_1 = concept_code | ||
where concept_class_id ='Ingredient' and standard_concept is not null | ||
union | ||
--non-standard ingredients don't have replacemt mapping | ||
--not sure about this, what happens to those non-standard ingedients in CNDV | ||
select concept_code,'non-standard ingredients dont have replacemt mapping ' from drug_concept_stage | ||
left join internal_relationship_stage on concept_code_1 = concept_code | ||
where concept_class_id ='Ingredient' and standard_concept is null and concept_code_2 is null | ||
union | ||
--wrong dosages ,> 1000, with conversion | ||
select drug_concept_code, 'wrong dosages > 1000, with conversion' from ds_stage ds | ||
join relationship_to_concept n on numerator_unit = n.concept_code_1 and n.concept_id_2 = 8576 | ||
join relationship_to_concept d on denominator_unit = d.concept_code_1 and d.concept_id_2 = 8587 | ||
where numerator_value*n.conversion_factor / (denominator_value*d.conversion_factor) > 1000 | ||
) group by error_type | ||
; | ||
--5min 40 sec to proceed | ||
; | ||
select distinct numerator_unit, 'unit doesnt exist in concept_table' from ds_stage where | ||
( amount_unit not in (select concept_code from drug_concept_stage where concept_class_id ='Unit') | ||
or numerator_unit not in (select concept_code from drug_concept_stage where concept_class_id ='Unit') | ||
or denominator_unit not in (select concept_code from drug_concept_stage where concept_class_id ='Unit') | ||
) | ||
union | ||
select distinct amount_unit, 'unit doesnt exist in concept_table' from ds_stage where | ||
( amount_unit not in (select concept_code from drug_concept_stage where concept_class_id ='Unit') | ||
or numerator_unit not in (select concept_code from drug_concept_stage where concept_class_id ='Unit') | ||
or denominator_unit not in (select concept_code from drug_concept_stage where concept_class_id ='Unit') | ||
) | ||
; | ||
|
||
select concept_code_1,b.concept_class_id from internal_relationship_stage a | ||
join drug_concept_stage b on concept_code = concept_code_2 | ||
where b.concept_class_id in ('Supplier', 'Dose Form', 'Brand Name') | ||
group by concept_code_1,b.concept_class_id having count (1) >1 | ||
) zz on concept | ||
; | ||
select * from concept where concept_name like '%synalar N%' | ||
; | ||
select * from ds_stage ds | ||
join drug_concept_stage dc on dc.concept_code = ds.drug_concept_code | ||
where numerator_unit='mg' and denominator_unit='mg' and (numerator_value/denominator_value>1 | ||
or (numerator_value>1 and denominator_vALUE IS NULL)) | ||
; | ||
--wrong dosages ,> 1 | ||
select * from ds_stage ds | ||
join drug_concept_stage dc on dc.concept_code = ds.drug_concept_code | ||
where lower (numerator_unit) in ('g') | ||
and lower (denominator_unit) in ('ml') | ||
and numerator_value / denominator_value > 1 | ||
; | ||
select * from DS_all_tmp where concept_code= '89838998' | ||
; | ||
select * from DS_all where concept_code= '89838998' | ||
; | ||
select * from ds_all | ||
where concept_code in (select concept_code from thin_need_to_map where regexp_like (thin_name , '[[:digit:]\.]+.*/ml.*[[:digit:]\.]+ml')) | ||
and numerator_value is not null and denominator_value is not null and numerator_unit !='%' | ||
; | ||
|
||
update ds_all a | ||
set numerator_value =numerator_value*denominator_value | ||
; | ||
select * from ds_all where regexp_like (concept_name, '[[:digit:]\.]+(litre|ml)') | ||
and not regexp_like (concept_name, '/[[:digit:]\.]+(litre|ml)') and amount_value is not null and AMOUNT_UNIT in ('litre', 'ml') | ||
; | ||
-- need to comment | ||
update ds_all set denominator_value = regexp_substr (regexp_substr(concept_name, ' [[:digit:]\.]+(litre(s?)|ml)') , '[[:digit:]\.]+'), | ||
denominator_unit = regexp_substr (regexp_substr(concept_name, ' [[:digit:]\.]+(litre(s?)|ml)') , '(litre(s?)|ml)') | ||
; | ||
select regexp_substr (regexp_substr(concept_name, ' [[:digit:]\.]+(litre(s?)|ml)') , '[[:digit:]\.]+') as new_denom_value, | ||
regexp_substr (regexp_substr(concept_name, ' [[:digit:]\.]+(litre(s?)|ml)') , '(litre(s?)|ml)') as new_denom_unit, a.* from ds_all a | ||
where regexp_like (concept_name, '\d+(litre(s?)|ml)') and not regexp_like (concept_name, '/[[:digit:]\.]+(litre(s?)|ml)') | ||
and denominator_value is null | ||
; | ||
select * from thin_need_to_map where regexp_Like (thin_name,' [[:digit:]\.]+(litre(s?)|ml)') and domain_id = 'Drug' | ||
; | ||
select * From thin_comp where volume is not null | ||
; |