Mapping Entities (Clinical Drugs) with Corresponding UMLS CUI Codes

Description

This pretrained model maps entities (Clinical Drugs) with their corresponding UMLS CUI codes.

Predicted Entities

umls_code

Copy S3 URI

How to use


document_assembler = DocumentAssembler()\
      .setInputCol('text')\
      .setOutputCol('document')

sentence_detector = SentenceDetector()\
      .setInputCols(["document"])\
      .setOutputCol("sentence")

tokenizer = Tokenizer()\
      .setInputCols("sentence")\
      .setOutputCol("token")

word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
      .setInputCols(["sentence", "token"])\
      .setOutputCol("embeddings")

ner_model = MedicalNerModel.pretrained("ner_posology_greedy", "en", "clinical/models")\
    .setInputCols(["sentence", "token", "embeddings"])\
    .setOutputCol("clinical_ner")

ner_model_converter = NerConverterInternal()\
    .setInputCols(["sentence", "token", "clinical_ner"])\
    .setOutputCol("ner_chunk")

chunkerMapper = ChunkMapperModel.pretrained("umls_clinical_drugs_mapper", "en", "clinical/models")\
      .setInputCols(["ner_chunk"])\
      .setOutputCol("mappings")\
      .setRels(["umls_code"])\
      .setLowerCase(True)

mapper_pipeline = Pipeline().setStages([
        document_assembler,
        sentence_detector,
        tokenizer, 
        word_embeddings,
        ner_model, 
        ner_model_converter, 
        chunkerMapper])

data = spark.createDataFrame([["""The patient was prescribed Neosporin Cream to be applied externally to the infected area, metformin 1000 mg for diabetes management, and acetaminophen 500 mg oral capsule for pain relief."""]]).toDF("text")

result = mapper_pipeline.fit(data).transform(data)


document_assembler = nlp.DocumentAssembler()\
      .setInputCol('text')\
      .setOutputCol('document')

sentence_detector = nlp.SentenceDetector()\
      .setInputCols(["document"])\
      .setOutputCol("sentence")

tokenizer = nlp.Tokenizer()\
      .setInputCols("sentence")\
      .setOutputCol("token")

word_embeddings = nlp.WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
      .setInputCols(["sentence", "token"])\
      .setOutputCol("embeddings")

ner_model = medical.NerModel.pretrained("ner_posology_greedy", "en", "clinical/models")\
    .setInputCols(["sentence", "token", "embeddings"])\
    .setOutputCol("clinical_ner")

ner_model_converter = medical.NerConverterInternal()\
    .setInputCols(["sentence", "token", "clinical_ner"])\
    .setOutputCol("ner_chunk")

chunkerMapper = medical.ChunkMapperModel.pretrained("umls_clinical_drugs_mapper", "en", "clinical/models")\
      .setInputCols(["ner_chunk"])\
      .setOutputCol("mappings")\
      .setRels(["umls_code"])\
      .setLowerCase(True)

mapper_pipeline = Pipeline().setStages([
        document_assembler,
        sentence_detector,
        tokenizer, 
        word_embeddings,
        ner_model, 
        ner_model_converter, 
        chunkerMapper])

data = spark.createDataFrame([["""The patient was prescribed Neosporin Cream to be applied externally to the infected area, metformin 1000 mg for diabetes management, and acetaminophen 500 mg oral capsule for pain relief."""]]).toDF("text")

result = mapper_pipeline.fit(data).transform(data)


val document_assembler = new DocumentAssembler()
      .setInputCol("text")
      .setOutputCol("document")

val sentence_detector = new SentenceDetector()
      .setInputCols(Array("document"))
      .setOutputCol("sentence")

val tokenizer = new Tokenizer()
      .setInputCols("sentence")
      .setOutputCol("token")

val word_embeddings = WordEmbeddingsModel
      .pretrained("embeddings_clinical", "en", "clinical/models")
      .setInputCols(Array("sentence", "token"))
      .setOutputCol("embeddings")

val ner_model = MedicalNerModel
      .pretrained("ner_posology_greedy", "en", "clinical/models")
      .setInputCols(Array("sentence", "token", "embeddings"))
      .setOutputCol("clinical_ner")

val ner_model_converter = new NerConverterInternal()
      .setInputCols(Array("sentence", "token", "clinical_ner"))
      .setOutputCol("ner_chunk")

val chunkerMapper = ChunkMapperModel
      .pretrained("umls_clinical_drugs_mapper", "en", "clinical/models")
      .setInputCols(Array("ner_chunk"))
      .setOutputCol("mappings")
      .setRels(Array("umls_code")) 

val mapper_pipeline = new Pipeline().setStages(Array(
                                                  document_assembler,
                                                  sentence_detector,
                                                  tokenizer, 
                                                  word_embeddings,
                                                  ner_model, 
                                                  ner_model_converter, 
                                                  chunkerMapper))

val data = Seq("""The patient was prescribed Neosporin Cream to be applied externally to the infected area, metformin 1000 mg for diabetes management, and acetaminophen 500 mg oral capsule for pain relief.""").toDF("text")

val result = mapper_pipeline.fit(data).transform(data)

Results


+---------------------------------+---------+
|ner_chunk                        |umls_code|
+---------------------------------+---------+
|Neosporin Cream                  |C0132149 |
|metformin 1000 mg                |C0987664 |
|acetaminophen 500 mg oral capsule|C0691088 |
+---------------------------------+---------+

Model Information

Model Name: umls_clinical_drugs_mapper
Compatibility: Healthcare NLP 5.5.1+
License: Licensed
Edition: Official
Input Labels: [ner_chunk]
Output Labels: [mappings]
Language: en
Size: 31.1 MB

References

Trained on concepts from clinical drugs for the 2024AB release of the Unified Medical Language System® (UMLS) Knowledge Sources: https://www.nlm.nih.gov/research/umls/index.html