Mapping UMLS Codes with Their Corresponding RxNorm Codes

Description

This pretrained model maps UMLS codes to corresponding RxNorm codes.

Predicted Entities

rxnorm_code

Copy S3 URI

How to use


documentAssembler = DocumentAssembler()\
    .setInputCol("text")\
    .setOutputCol("ner_chunk")

sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli", "en", "clinical/models")\
    .setInputCols(["ner_chunk"])\
    .setOutputCol("sbert_embeddings")\
    .setCaseSensitive(False)

umls_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_umls_drug_substance", "en", "clinical/models")\
    .setInputCols(["sbert_embeddings"]) \
    .setOutputCol("umls_code")\
    .setDistanceFunction("EUCLIDEAN")

resolver2chunk = Resolution2Chunk()\
    .setInputCols(["umls_code"])\
    .setOutputCol("umls2chunk")

chunkerMapper = ChunkMapperModel.pretrained("umls_rxnorm_mapper", "en", "clinical/models")\
    .setInputCols(["umls2chunk"])\
    .setOutputCol("mappings")

mapper_pipeline = Pipeline(stages = [
    documentAssembler,
    sbert_embedder,
    umls_resolver,
    resolver2chunk,
    chunkerMapper])

data = spark.createDataFrame([['Hydrogen peroxide 30 mg'], ['magnesium hydroxide 100 MG'], ['metformin 1000 MG'], ['dilaudid']]).toDF("text")

result = mapper_pipeline.fit(data).transform(data)


documentAssembler = nlp.DocumentAssembler()\
    .setInputCol("text")\
    .setOutputCol("ner_chunk")

sbert_embedder = nlp.BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli", "en", "clinical/models")\
    .setInputCols(["ner_chunk"])\
    .setOutputCol("sbert_embeddings")\
    .setCaseSensitive(False)

umls_resolver = medical.SentenceEntityResolverModel.pretrained("sbiobertresolve_umls_drug_substance", "en", "clinical/models")\
    .setInputCols(["sbert_embeddings"]) \
    .setOutputCol("umls_code")\
    .setDistanceFunction("EUCLIDEAN")

resolver2chunk = medical.Resolution2Chunk()\
    .setInputCols(["umls_code"])\
    .setOutputCol("umls2chunk")

chunkerMapper = medical.ChunkMapperModel.pretrained("umls_rxnorm_mapper", "en", "clinical/models")\
    .setInputCols(["umls2chunk"])\
    .setOutputCol("mappings")

mapper_pipeline = nlp.Pipeline(stages = [
    documentAssembler,
    sbert_embedder,
    umls_resolver,
    resolver2chunk,
    chunkerMapper])

data = spark.createDataFrame([['Hydrogen peroxide 30 mg'], ['magnesium hydroxide 100 MG'], ['metformin 1000 MG'], ['dilaudid']]).toDF("text")

result = mapper_pipeline.fit(data).transform(data)


val documentAssembler = DocumentAssembler()
    .setInputCol("text")
    .setOutputCol("ner_chunk")

val sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli", "en", "clinical/models")
    .setInputCols(Array("ner_chunk"))
    .setOutputCol("sbert_embeddings")
    .setCaseSensitive(False)

val umls_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_umls_drug_substance", "en", "clinical/models")
    .setInputCols(Array("sbert_embeddings"))
    .setOutputCol("umls_code")
    .setDistanceFunction("EUCLIDEAN")

val resolver2chunk = Resolution2Chunk()
    .setInputCols(Array("umls_code"))
    .setOutputCol("umls2chunk")

val chunkerMapper = ChunkMapperModel.pretrained("umls_rxnorm_mapper", "en", "clinical/models")    .setInputCols(Array("umls2chunk"))
    .setOutputCol("mappings")
    	
val mapper_pipeline = new Pipeline().setStages(Array( 
    document_assembler,
    sbert_embedder,
    umls_resolver,
    resolver2chunk,
    chunkerMapper))

val data = Seq(
  ("amlodipine 5 MG"),
  ("magnesium hydroxide 100 MG"),
  ("metformin 1000 MG"),
  ("dilaudid")
).toDF("text")

val result = mapper_pipeline.fit(data).transform(data)

Results


+--------------------------+---------+-----------+
|chunk                     |umls_code|rxnorm_code|
+--------------------------+---------+-----------+
|Hydrogen peroxide 30 mg   |C1126248 |330565     |
|magnesium hydroxide 100 MG|C1134402 |337012     |
|metformin 1000 MG         |C0987664 |316255     |
|dilaudid                  |C0728755 |224913     |
+--------------------------+---------+-----------+

Model Information

Model Name: umls_rxnorm_mapper
Compatibility: Healthcare NLP 5.5.1+
License: Licensed
Edition: Official
Input Labels: [ner_chunk]
Output Labels: [mappings]
Language: en
Size: 3.0 MB

References

Trained on concepts from RXNORM for the 2024AB release of the Unified Medical Language System® (UMLS) Knowledge Sources: https://www.nlm.nih.gov/research/umls/index.html