forked from OHDSI/Vocabulary-v5.0
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathload_stage.sql
352 lines (319 loc) · 9.4 KB
/
load_stage.sql
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
/**************************************************************************
* Copyright 2016 Observational Health Data Sciences and Informatics (OHDSI)
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
* Authors: Dmitry Dymshyts, Timur Vakhitov
* Date: 2020
**************************************************************************/
--1. Update latest_update field to new date
DO $_$
BEGIN
PERFORM VOCABULARY_PACK.SetLatestUpdate(
pVocabularyName => 'CCAM',
pVocabularyDate => (SELECT vocabulary_date FROM sources.ccam_version LIMIT 1),
pVocabularyVersion => (SELECT vocabulary_version FROM sources.ccam_version LIMIT 1),
pVocabularyDevSchema => 'DEV_CCAM'
);
END $_$;
--2. Truncate all working tables
TRUNCATE TABLE concept_stage;
TRUNCATE TABLE concept_relationship_stage;
TRUNCATE TABLE concept_synonym_stage;
TRUNCATE TABLE pack_content_stage;
TRUNCATE TABLE drug_strength_stage;
--1. Prepare hierarchy
DROP TABLE IF EXISTS ccam_hierarchy;
CREATE UNLOGGED TABLE ccam_hierarchy AS
WITH RECURSIVE hierarchy_concepts AS (
SELECT cod_pere,
cod_menu,
NULL AS an_chapter,
rang::TEXT AS chapter,
1 AS LEVEL
FROM sources.ccam_r_menu
WHERE cod_pere = 1
UNION ALL
SELECT c.cod_pere,
c.cod_menu,
hc.chapter AS an_chapter,
CASE
WHEN hc.LEVEL = 1
THEN LPAD(hc.chapter, 2, '0')
ELSE hc.chapter
END || '.' || LPAD(c.rang::TEXT, 2, '0') AS chapter,
hc.LEVEL + 1
FROM sources.ccam_r_menu c
JOIN hierarchy_concepts hc ON hc.cod_menu = c.cod_pere
)
SELECT hc.an_chapter,
hc.chapter AS de_chapter,
de.libelle AS descendant_libelle,
l.cod_acte
FROM hierarchy_concepts hc
JOIN sources.ccam_r_menu de ON de.cod_menu = hc.cod_menu
LEFT JOIN LATERAL(SELECT DISTINCT a.cod_acte FROM sources.ccam_r_acte a WHERE a.menu_cod = de.cod_menu) l ON TRUE;
--2. Fill concept_stage with codes and without names
INSERT INTO concept_stage (
concept_name,
domain_id,
vocabulary_id,
concept_class_id,
standard_concept,
concept_code,
valid_start_date,
valid_end_date,
invalid_reason
)
--load main codes
SELECT concept_name,
domain_id,
vocabulary_id,
concept_class_id,
standard_concept,
concept_code,
valid_start_date,
valid_end_date,
CASE
WHEN valid_end_date = TO_DATE('20991231', 'yyyymmdd')
THEN NULL
ELSE 'D'
END AS invalid_reason
FROM (
SELECT NULL AS concept_name,
'Procedure' AS domain_id,
'CCAM' AS vocabulary_id,
'Procedure' AS concept_class_id,
NULL AS standard_concept,
cod_acte AS concept_code,
MIN(dt_creatio) AS valid_start_date,
COALESCE(MAX(dt_fin), TO_DATE('20991231', 'yyyymmdd')) AS valid_end_date
FROM sources.ccam_r_acte
GROUP BY cod_acte
) AS s0
UNION ALL
--load chapters
SELECT DISTINCT NULL AS concept_name,
'Procedure' AS domain_id,
'CCAM' AS vocabulary_id,
'Proc Hierarchy' AS concept_class_id,
NULL AS standard_concept,
de_chapter AS concept_code,
TO_DATE('19700101', 'yyyymmdd') AS valid_start_date, --chapters have no real date
TO_DATE('20991231', 'yyyymmdd') AS valid_end_date,
NULL AS invalid_reason
FROM ccam_hierarchy
UNION ALL
--load groups
SELECT NULL AS concept_name,
'Procedure' AS domain_id,
'CCAM' AS vocabulary_id,
'Proc Group' AS concept_class_id,
NULL AS standard_concept,
cod_regrou AS concept_code,
TO_DATE('19700101', 'yyyymmdd') AS valid_start_date, --groups have no real date
TO_DATE('20991231', 'yyyymmdd') AS valid_end_date,
NULL AS invalid_reason
FROM sources.ccam_r_regroupement;
--3. Fix invalid_reason for upgraded concepts
UPDATE concept_stage cs
SET invalid_reason = 'U'
FROM (
SELECT FIRST_VALUE(precedent) OVER (
PARTITION BY cod_acte ORDER BY dt_fin DESC
) AS prev_code
FROM sources.ccam_r_acte
WHERE precedent <> ''
) a
WHERE cs.concept_code = a.prev_code;
--4. Append English names
DO $_$
BEGIN
PERFORM VOCABULARY_PACK.ProcessManualConcepts();
END $_$;
--5. Fill concept_relationship_stage with upgraded concepts
INSERT INTO concept_relationship_stage (
concept_code_1,
concept_code_2,
vocabulary_id_1,
vocabulary_id_2,
relationship_id,
valid_start_date,
valid_end_date
)
SELECT s0.concept_code_1,
s0.concept_code_2,
'CCAM' AS vocabulary_id_1,
'CCAM' AS vocabulary_id_2,
'Concept replaced by',
cs.valid_end_date,
TO_DATE('20991231', 'yyyymmdd')
FROM (
SELECT DISTINCT FIRST_VALUE(precedent) OVER (
PARTITION BY cod_acte ORDER BY dt_fin DESC
) AS concept_code_1,
cod_acte AS concept_code_2
FROM sources.ccam_r_acte
WHERE precedent <> ''
) AS s0
JOIN concept_stage cs ON cs.concept_code = s0.concept_code_1;
--6. Fill concept_synonym_stage
INSERT INTO concept_synonym_stage (
synonym_concept_code,
synonym_vocabulary_id,
synonym_name,
language_concept_id
)
--load main codes
SELECT synonym_concept_code,
'CCAM' AS synonym_vocabulary_id,
synonym_name,
4180190 AS language_concept_id --French
FROM (
--there are duplicates here, so "union"
SELECT cod_acte AS synonym_concept_code,
FIRST_VALUE(TRIM(nom_court)) OVER (
PARTITION BY cod_acte ORDER BY dt_effet DESC --get the "last" name
) AS synonym_name
FROM sources.ccam_r_acte
UNION
SELECT cod_acte,
FIRST_VALUE(TRIM(nom_long || nom_long0)) OVER (
PARTITION BY cod_acte ORDER BY dt_effet DESC --get the "last" name
) AS synonym_name
FROM sources.ccam_r_acte
) AS s0
UNION ALL
--load chapters
SELECT DISTINCT de_chapter AS synonym_concept_code,
'CCAM' AS synonym_vocabulary_id,
descendant_libelle AS synonym_name,
4180190 AS language_concept_id --French
FROM ccam_hierarchy
UNION ALL
--load groups
SELECT cod_regrou AS synonym_concept_code,
'CCAM' AS synonym_vocabulary_id,
libelle AS synonym_name,
4180190 AS language_concept_id --French
FROM sources.ccam_r_regroupement;
--7. Load hierarchy to the concept_relationship_stage
INSERT INTO concept_relationship_stage (
concept_code_1,
concept_code_2,
vocabulary_id_1,
vocabulary_id_2,
relationship_id,
valid_start_date,
valid_end_date
)
--hierarchy between chapters
SELECT DISTINCT an_chapter AS concept_code_1,
de_chapter AS concept_code_2,
'CCAM' AS vocabulary_id_1,
'CCAM' AS vocabulary_id_2,
'Subsumes' AS relationship_id,
TO_DATE('19700101', 'yyyymmdd') AS valid_start_date,
TO_DATE('20991231', 'yyyymmdd') AS valid_end_date
FROM ccam_hierarchy
WHERE an_chapter IS NOT NULL --the first element in the source is technical root "ARBORESCENCE CCAM", so skipped
UNION ALL
--hierarchy between chapters and main codes
SELECT de_chapter AS concept_code_1,
cod_acte AS concept_code_2,
'CCAM' AS vocabulary_id_1,
'CCAM' AS vocabulary_id_2,
'Subsumes' AS relationship_id,
TO_DATE('19700101', 'yyyymmdd') AS valid_start_date,
TO_DATE('20991231', 'yyyymmdd') AS valid_end_date
FROM ccam_hierarchy
WHERE cod_acte IS NOT NULL --some chapters have no hierarchy
UNION ALL
--hierarchy between main codes and groups
SELECT i.regrou_cod AS concept_code_1,
cs.concept_code AS concept_code_2,
'CCAM' AS vocabulary_id_1,
'CCAM' AS vocabulary_id_2,
'Subsumes' AS relationship_id,
i.dt_modif AS valid_start_date,
TO_DATE('20991231', 'yyyymmdd') AS valid_end_date
FROM concept_stage cs
JOIN (
SELECT DISTINCT acte_cod,
LAST_VALUE(regrou_cod) OVER (
PARTITION BY acte_cod,
activ_cod ORDER BY dt_modif ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING
) AS regrou_cod, --get the "last" group code
FIRST_VALUE(dt_modif) OVER (
PARTITION BY acte_cod,
activ_cod ORDER BY dt_modif
) AS dt_modif --get the min date (dt_modif=acdt_modif, so it doesn't matter which field we take)
FROM sources.ccam_r_acte_ivite
) i ON i.acte_cod = cs.concept_code;
--8. Append manual relationships
DO $_$
BEGIN
PERFORM VOCABULARY_PACK.ProcessManualRelationships();
END $_$;
--9. Working with replacement mappings
DO $_$
BEGIN
PERFORM VOCABULARY_PACK.CheckReplacementMappings();
END $_$;
--10. Add mapping from deprecated to fresh concepts
DO $_$
BEGIN
PERFORM VOCABULARY_PACK.AddFreshMAPSTO();
END $_$;
--11. Deprecate 'Maps to' mappings to deprecated and upgraded concepts
DO $_$
BEGIN
PERFORM VOCABULARY_PACK.DeprecateWrongMAPSTO();
END $_$;
--12. Delete ambiguous 'Maps to' mappings
DO $_$
BEGIN
PERFORM VOCABULARY_PACK.DeleteAmbiguousMAPSTO();
END $_$;
--13. Update domain_id from SNOMED
UPDATE concept_stage cs
SET domain_id = i.domain_id
FROM (
SELECT DISTINCT crs.concept_code_1,
FIRST_VALUE(c2.domain_id) OVER (
PARTITION BY crs.concept_code_1 ORDER BY CASE c2.domain_id
WHEN 'Condition'
THEN 1
WHEN 'Observation'
THEN 2
WHEN 'Procedure'
THEN 3
WHEN 'Measurement'
THEN 4
WHEN 'Device'
THEN 5
ELSE 6
END
) AS domain_id
FROM concept_relationship_stage crs
JOIN concept c2 ON c2.concept_code = crs.concept_code_2
AND c2.vocabulary_id = crs.vocabulary_id_2
AND c2.vocabulary_id = 'SNOMED'
WHERE crs.relationship_id = 'Maps to'
AND crs.invalid_reason IS NULL
AND crs.vocabulary_id_1 = 'CCAM'
) i
WHERE i.concept_code_1 = cs.concept_code;
--14. Cleaning
DROP TABLE ccam_hierarchy;
-- At the end, the three tables concept_stage, concept_relationship_stage and concept_synonym_stage should be ready to be fed into the generic_update.sql script