Mapping ICD10CM Codes with Their Corresponding UMLS Codes

Description

This pretrained model maps ICD10CM codes to corresponding UMLS codes under the Unified Medical Language System (UMLS).

Predicted Entities

umls_code

Open in Colab Copy S3 URI

How to use

documentAssembler = DocumentAssembler()\
.setInputCol("text")\
.setOutputCol("ner_chunk")

sbert_embedder = BertSentenceEmbeddings\
.pretrained("sbiobert_base_cased_mli","en","clinical/models")\
.setInputCols(["ner_chunk"])\
.setOutputCol("sbert_embeddings")

icd10cm_resolver = SentenceEntityResolverModel\
.pretrained("sbiobertresolve_icd10cm","en", "clinical/models") \
.setInputCols(["ner_chunk", "sbert_embeddings"]) \
.setOutputCol("icd10cm_code")\
.setDistanceFunction("EUCLIDEAN")

chunkerMapper = ChunkMapperModel\
.pretrained("icd10cm_umls_mapper", "en", "clinical/models")\
.setInputCols(["icd10cm_code"])\
.setOutputCol("umls_mappings")\
.setRels(["umls_code"])


pipeline = Pipeline(stages = [
documentAssembler,
sbert_embedder,
icd10cm_resolver,
chunkerMapper
])

model = pipeline.fit(spark.createDataFrame([[""]]).toDF("text"))

light_pipeline= LightPipeline(model)

result = light_pipeline.fullAnnotate("Neonatal skin infection")
val documentAssembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("ner_chunk")

val sbert_embedder = BertSentenceEmbeddings
.pretrained("sbiobert_base_cased_mli", "en", "clinical/models")
.setInputCols(Array("ner_chunk"))
.setOutputCol("sbert_embeddings")

val icd10cm_resolver = SentenceEntityResolverModel
.pretrained("sbiobertresolve_icd10cm", "en", "clinical/models")
.setInputCols(Array("ner_chunk", "sbert_embeddings"))
.setOutputCol("rxnorm_code")
.setDistanceFunction("EUCLIDEAN")

val chunkerMapper = ChunkMapperModel
.pretrained("icd10cm_umls_mapper", "en", "clinical/models")
.setInputCols(Array("rxnorm_code"))
.setOutputCol("umls_mappings")
.setRels(Array("umls_code"))

val pipeline = new Pipeline(stages = Array(
documentAssembler,
sbert_embedder,
icd10cm_resolver,
chunkerMapper
))

val data = Seq("Neonatal skin infection").toDS.toDF("text")

val result= pipeline.fit(data).transform(data)
import nlu
nlu.load("en.icd10cm_to_umls").predict("""Neonatal skin infection""")

Results

|    | ner_chunk               | icd10cm_code   | umls_mappings   |
|---:|:------------------------|:---------------|:----------------|
|  0 | Neonatal skin infection | P394           | C0456111        |

Model Information

Model Name: icd10cm_umls_mapper
Compatibility: Healthcare NLP 3.5.3+
License: Licensed
Edition: Official
Input Labels: [icd10cm_code]
Output Labels: [mappings]
Language: en
Size: 942.9 KB