Mapping SNOMED Codes with Their Corresponding UMLS Codes

Description

This pretrained model maps SNOMED codes to corresponding UMLS codes.

Predicted Entities

umls_code

Copy S3 URI

How to use


documentAssembler = DocumentAssembler()\
    .setInputCol("text")\
    .setOutputCol("ner_chunk")

sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli", "en", "clinical/models")\
    .setInputCols(["ner_chunk"])\
    .setOutputCol("sbert_embeddings")\
    .setCaseSensitive(False)

snomed_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_snomed_drug", "en", "clinical/models")\
    .setInputCols(["sbert_embeddings"]) \
    .setOutputCol("snomed_code")\
    .setDistanceFunction("EUCLIDEAN")

resolver2chunk = Resolution2Chunk()\
    .setInputCols(["snomed_code"])\
    .setOutputCol("snomed2chunk")

chunkerMapper = ChunkMapperModel.pretrained("snomed_umls_mapper", "en", "clinical/models")\
    .setInputCols(["snomed2chunk"])\
    .setOutputCol("mappings")\
    .setRels(["umls_code"])

mapper_pipeline = Pipeline(stages = [
    documentAssembler,
    sbert_embedder,
    snomed_resolver,
    resolver2chunk,
    chunkerMapper])

data = spark.createDataFrame([["acebutolol"],["aspirin"]]).toDF("text")

result = mapper_pipeline.fit(data).transform(data)


documentAssembler = nlp.DocumentAssembler()\
    .setInputCol("text")\
    .setOutputCol("ner_chunk")

sbert_embedder = nlp.BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli", "en", "clinical/models")\
    .setInputCols(["ner_chunk"])\
    .setOutputCol("sbert_embeddings")\
    .setCaseSensitive(False)

snomed_resolver = medical.SentenceEntityResolverModel.pretrained("sbiobertresolve_snomed_drug", "en", "clinical/models")\
    .setInputCols(["sbert_embeddings"]) \
    .setOutputCol("snomed_code")\
    .setDistanceFunction("EUCLIDEAN")

resolver2chunk = medical.Resolution2Chunk()\
    .setInputCols(["snomed_code"])\
    .setOutputCol("snomed2chunk")

chunkerMapper = medical.ChunkMapperModel.pretrained("snomed_umls_mapper", "en", "clinical/models")\
    .setInputCols(["snomed2chunk"])\
    .setOutputCol("mappings")\
    .setRels(["umls_code"])

mapper_pipeline = nlp.Pipeline(stages = [
    documentAssembler,
    sbert_embedder,
    snomed_resolver,
    resolver2chunk,
    chunkerMapper])

data = spark.createDataFrame([["acebutolol"],["aspirin"]]).toDF("text")

result = mapper_pipeline.fit(data).transform(data)


val documentAssembler = new DocumentAssembler()
    .setInputCol("text")
    .setOutputCol("ner_chunk")
	
val sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli","en","clinical/models")
    .setInputCols(Array("ner_chunk"))
    .setOutputCol("sbert_embeddings")
    .setCaseSensitive(false)
	
val snomed_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_snomed_drug","en","clinical/models")
    .setInputCols(Array("sbert_embeddings"))
    .setOutputCol("snomed_code")
    .setDistanceFunction("EUCLIDEAN")
	
val resolver2chunk = new Resolution2Chunk()
    .setInputCols(Array("snomed_code"))
    .setOutputCol("snomed2chunk")
	
val chunkerMapper = ChunkMapperModel.pretrained("snomed_umls_mapper","en","clinical/models")
    .setInputCols(Array("snomed2chunk"))
    .setOutputCol("mappings")
    .setRels(["umls_code"])
	
val mapper_pipeline = new Pipeline().setStages(Array(
     documentAssembler,
     sbert_embedder,
     snomed_resolver,
     resolver2chunk,
     chunkerMapper))
	
val data = Seq("acebutolol", "aspirin").toDF("text")

val result = mapper_pipeline.fit(data).transform(data)

Results


+----------+-----------+---------+
|chunk     |snomed_code|umls_code|
+----------+-----------+---------+
|acebutolol|68088000   |C0000946 |
|aspirin   |7947003    |C0004057 |
+----------+-----------+---------+

Model Information

Model Name: snomed_umls_mapper
Compatibility: Healthcare NLP 5.5.1+
License: Licensed
Edition: Official
Input Labels: [ner_chunk]
Output Labels: [mappings]
Language: en
Size: 7.7 MB

References

Trained on concepts from SNOMED for the 2024AB release of the Unified Medical Language System® (UMLS) Knowledge Sources: https://www.nlm.nih.gov/research/umls/index.html