1
2 """LOINC handling code.
3
4 http://loinc.org
5
6 license: GPL
7 """
8
9
10
11 __version__ = "$Revision: 1.7 $"
12 __author__ = "K.Hilbert <Karsten.Hilbert@gmx.net>"
13
14 import sys, codecs, logging, csv
15
16
17 if __name__ == '__main__':
18 sys.path.insert(0, '../../')
19 from Gnumed.pycommon import gmPG2, gmTools
20
21
22 _log = logging.getLogger('gm.loinc')
23 _log.info(__version__)
24
25 origin_url = u'http://loinc.org'
26 file_encoding = 'latin1'
27 license_delimiter = u'Clip Here for Data'
28 version_tag = u'LOINC(R) Database Version'
29 name_long = u'LOINC® (Logical Observation Identifiers Names and Codes)'
30 name_short = u'LOINC'
31
32 loinc_fields = u"LOINC_NUM COMPONENT PROPERTY TIME_ASPCT SYSTEM SCALE_TYP METHOD_TYP RELAT_NMS CLASS SOURCE DT_LAST_CH CHNG_TYPE COMMENTS ANSWERLIST STATUS MAP_TO SCOPE NORM_RANGE IPCC_UNITS REFERENCE EXACT_CMP_SY MOLAR_MASS CLASSTYPE FORMULA SPECIES EXMPL_ANSWERS ACSSYM BASE_NAME FINAL NAACCR_ID CODE_TABLE SETROOT PANELELEMENTS SURVEY_QUEST_TEXT SURVEY_QUEST_SRC UNITSREQUIRED SUBMITTED_UNITS RELATEDNAMES2 SHORTNAME ORDER_OBS CDISC_COMMON_TESTS HL7_FIELD_SUBFIELD_ID EXTERNAL_COPYRIGHT_NOTICE EXAMPLE_UNITS INPC_PERCENTAGE LONG_COMMON_NAME".split()
33
34
36
37 cmd = u"""
38 select coalesce (
39 (select term
40 from ref.v_coded_terms
41 where
42 coding_system = 'LOINC'
43 and
44 code = %(loinc)s
45 and
46 lang = i18n.get_curr_lang()
47 ),
48 (select term
49 from ref.v_coded_terms
50 where
51 coding_system = 'LOINC'
52 and
53 code = %(loinc)s
54 and
55 lang = 'en_EN'
56 ),
57 (select term
58 from ref.v_coded_terms
59 where
60 coding_system = 'LOINC'
61 and
62 code = %(loinc)s
63 )
64 )
65 """
66 args = {'loinc': loinc}
67 rows, idx = gmPG2.run_ro_queries(queries = [{'cmd': cmd, 'args': args}])
68
69 return [ r[0] for r in rows ]
70
72
73 _log.debug('splitting LOINC source file [%s]', input_fname)
74
75 if license_fname is None:
76 license_fname = gmTools.get_unique_filename(prefix = 'loinc_license', suffix = '.txt')
77 _log.debug('LOINC header: %s', license_fname)
78
79 if data_fname is None:
80 data_fname = gmTools.get_unique_filename(prefix = 'loinc_data', suffix = '.csv')
81 _log.debug('LOINC data: %s', data_fname)
82
83 loinc_file = codecs.open(input_fname, 'rU', encoding = file_encoding, errors = 'replace')
84 out_file = codecs.open(license_fname, 'w', encoding = 'utf8', errors = 'replace')
85
86 for line in loinc_file:
87
88 if license_delimiter in line:
89 out_file.write(line)
90 out_file.close()
91 out_file = codecs.open(data_fname, 'w', encoding = 'utf8', errors = 'replace')
92 continue
93
94 out_file.write(line)
95
96 out_file.close()
97
98 return data_fname, license_fname
99
101
102 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
103 first_line = csv_file.readline()
104 sniffer = csv.Sniffer()
105 if sniffer.has_header(first_line):
106 pass
107
109
110 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace')
111
112 version = None
113 for line in in_file:
114 if line.startswith(version_tag):
115 version = line[len(version_tag):].strip()
116 break
117
118 in_file.close()
119 return version
120
121 -def loinc_import(data_fname=None, license_fname=None, version=None, conn=None, lang='en_EN'):
122
123 if version is None:
124 version = get_version(license_fname = license_fname)
125
126 if version is None:
127 raise ValueError('cannot detect LOINC version')
128
129 _log.debug('importing LOINC version [%s]', version)
130
131 in_file = codecs.open(license_fname, 'rU', encoding = 'utf8', errors = 'replace')
132 desc = in_file.read()
133 in_file.close()
134
135 args = {'ver': version, 'desc': desc, 'url': origin_url, 'name_long': name_long, 'name_short': name_short, 'lang': lang}
136
137
138 queries = [{
139 'cmd': u"""delete from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
140 'args': args
141 }, {
142 'cmd': u"""
143 insert into ref.data_source (name_long, name_short, version, description, lang, source) values (
144 %(name_long)s,
145 %(name_short)s,
146 %(ver)s,
147 %(desc)s,
148 %(lang)s,
149 %(url)s
150 )""",
151 'args': args
152 }, {
153 'cmd': u"""select pk from ref.data_source where name_short = %(name_short)s and version = %(ver)s""",
154 'args': args
155 }]
156 rows, idx = gmPG2.run_rw_queries(queries = queries, return_data = True)
157 data_src_pk = rows[0][0]
158 _log.debug('data source record created, pk is #%s', data_src_pk)
159
160
161 csv_file = codecs.open(data_fname, 'rU', 'utf8', 'replace')
162 loinc_reader = gmTools.unicode_csv_reader(csv_file, delimiter = "\t", quotechar = '"')
163
164
165 curs = conn.cursor()
166 cmd = u"""delete from ref.loinc_staging"""
167 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
168 curs.close()
169 conn.commit()
170 _log.debug('staging table emptied')
171
172
173 curs = conn.cursor()
174 cmd = u"""insert into ref.loinc_staging values (%s%%s)""" % (u'%s, ' * (len(loinc_fields) - 1))
175 first = False
176 for loinc_line in loinc_reader:
177 if not first:
178 first = True
179 continue
180 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': loinc_line}])
181 curs.close()
182 conn.commit()
183 csv_file.close()
184 _log.debug('staging table loaded')
185
186
187 curs = conn.cursor()
188 args = {'src_pk': data_src_pk}
189 cmd = u"""
190 insert into ref.loinc (
191 fk_data_source,
192
193 term,
194
195 code,
196 comment,
197 component,
198 property,
199 time_aspect,
200 system,
201 scale_type,
202 method_type,
203 related_names_1_old,
204 grouping_class,
205 loinc_internal_source,
206 dt_last_change,
207 change_type,
208 answer_list,
209 code_status,
210 maps_to,
211 scope,
212 normal_range,
213 ipcc_units,
214 reference,
215 exact_component_synonym,
216 molar_mass,
217 grouping_class_type,
218 formula,
219 species,
220 example_answers,
221 acs_synonyms,
222 base_name,
223 final,
224 naa_ccr_id,
225 code_table,
226 is_set_root,
227 panel_elements,
228 survey_question_text,
229 survey_question_source,
230 units_required,
231 submitted_units,
232 related_names_2,
233 short_name,
234 order_obs,
235 cdisc_common_tests,
236 hl7_field_subfield_id,
237 external_copyright_notice,
238 example_units,
239 inpc_percentage,
240 long_common_name
241 )
242
243 select
244
245 %(src_pk)s,
246
247 coalesce (
248 nullif(long_common_name, ''),
249 (
250 coalesce(nullif(component, '') || ':', '') ||
251 coalesce(nullif(property, '') || ':', '') ||
252 coalesce(nullif(time_aspect, '') || ':', '') ||
253 coalesce(nullif(system, '') || ':', '') ||
254 coalesce(nullif(scale_type, '') || ':', '') ||
255 coalesce(nullif(method_type, '') || ':', '')
256 )
257 ),
258
259 nullif(loinc_num, ''),
260 nullif(comments, ''),
261 nullif(component, ''),
262 nullif(property, ''),
263 nullif(time_aspect, ''),
264 nullif(system, ''),
265 nullif(scale_type, ''),
266 nullif(method_type, ''),
267 nullif(related_names_1_old, ''),
268 nullif(class, ''),
269 nullif(source, ''),
270 nullif(dt_last_change, ''),
271 nullif(change_type, ''),
272 nullif(answer_list, ''),
273 nullif(status, ''),
274 nullif(map_to, ''),
275 nullif(scope, ''),
276 nullif(normal_range, ''),
277 nullif(ipcc_units, ''),
278 nullif(reference, ''),
279 nullif(exact_component_synonym, ''),
280 nullif(molar_mass, ''),
281 nullif(class_type, '')::smallint,
282 nullif(formula, ''),
283 nullif(species, ''),
284 nullif(example_answers, ''),
285 nullif(acs_synonyms, ''),
286 nullif(base_name, ''),
287 nullif(final, ''),
288 nullif(naa_ccr_id, ''),
289 nullif(code_table, ''),
290 nullif(is_set_root, '')::boolean,
291 nullif(panel_elements, ''),
292 nullif(survey_question_text, ''),
293 nullif(survey_question_source, ''),
294 nullif(units_required, ''),
295 nullif(submitted_units, ''),
296 nullif(related_names_2, ''),
297 nullif(short_name, ''),
298 nullif(order_obs, ''),
299 nullif(cdisc_common_tests, ''),
300 nullif(hl7_field_subfield_id, ''),
301 nullif(external_copyright_notice, ''),
302 nullif(example_units, ''),
303 nullif(inpc_percentage, ''),
304 nullif(long_common_name, '')
305
306 from
307 ref.loinc_staging
308 """
309
310 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd, 'args': args}])
311
312 curs.close()
313 conn.commit()
314 _log.debug('transfer from staging table to real table done')
315
316
317 curs = conn.cursor()
318 cmd = u"""delete from ref.loinc_staging"""
319 gmPG2.run_rw_queries(link_obj = curs, queries = [{'cmd': cmd}])
320 curs.close()
321 conn.commit()
322 _log.debug('staging table emptied')
323
324 return True
325
326
327
328 if __name__ == "__main__":
329
330 from Gnumed.pycommon import gmLog2
331 from Gnumed.pycommon import gmI18N
332
333 gmI18N.activate_locale()
334
335
336
339
342
343 if (len(sys.argv)) > 1 and (sys.argv[1] == 'test'):
344
345 test_loinc_import()
346
347
348