Mapping Entities (Drug Substances) with Corresponding UMLS CUI Codes

Description

This pretrained model maps entities (Drug Substances) with their corresponding UMLS CUI codes.

Predicted Entities

umls_code

Open in Colab Copy S3 URI

How to use

document_assembler = DocumentAssembler()\
      .setInputCol('text')\
      .setOutputCol('document')

sentence_detector = SentenceDetector()\
      .setInputCols(["document"])\
      .setOutputCol("sentence")

tokenizer = Tokenizer()\
      .setInputCols("sentence")\
      .setOutputCol("token")

word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
      .setInputCols(["sentence", "token"])\
      .setOutputCol("embeddings")

ner_model = MedicalNerModel.pretrained("ner_posology_greedy", "en", "clinical/models")\
    .setInputCols(["sentence", "token", "embeddings"])\
    .setOutputCol("clinical_ner")

ner_model_converter = NerConverterInternal()\
    .setInputCols("sentence", "token", "clinical_ner")\
    .setOutputCol("ner_chunk")

chunkerMapper = ChunkMapperModel.pretrained("umls_drug_substance_mapper", "en", "clinical/models")\
      .setInputCols(["ner_chunk"])\
      .setOutputCol("mappings")\
      .setRels(["umls_code"])\
      .setLowerCase(True)


mapper_pipeline = Pipeline().setStages([
        document_assembler,
        sentence_detector,
        tokenizer, 
        word_embeddings,
        ner_model, 
        ner_model_converter, 
        chunkerMapper])


test_data = spark.createDataFrame([["The patient was given  metformin, lenvatinib and lavender 700 ml/ml"]]).toDF("text")

result = mapper_pipeline.fit(test_data).transform(test_data) 

val document_assembler = new DocumentAssembler()
       .setInputCol("text")
       .setOutputCol("document")

val sentence_detector = new SentenceDetector()
       .setInputCols(Array("document"))
       .setOutputCol("sentence")

val tokenizer = new Tokenizer()
       .setInputCols("sentence")
       .setOutputCol("token")

val word_embeddings = WordEmbeddingsModel
       .pretrained("embeddings_clinical", "en", "clinical/models")
       .setInputCols(Array("sentence", "token"))
       .setOutputCol("embeddings")

val ner_model = MedicalNerModel
       .pretrained("ner_posology_greedy", "en", "clinical/models")
       .setInputCols(Array("sentence", "token", "embeddings"))
       .setOutputCol("clinical_ner")

val ner_model_converter = new NerConverterInternal()
       .setInputCols("sentence", "token", "clinical_ner")
       .setOutputCol("ner_chunk")

val chunkerMapper = ChunkMapperModel
       .pretrained("umls_drug_substance_mapper", "en", "clinical/models")
       .setInputCols(Array("ner_chunk"))
       .setOutputCol("mappings")
       .setRels(Array("umls_code")) 

val mapper_pipeline = new Pipeline().setStages(Array(
                                                   document_assembler,
                                                   sentence_detector,
                                                   tokenizer, 
                                                   word_embeddings,
                                                   ner_model, 
                                                   ner_model_converter, 
                                                   chunkerMapper))


val test_data = Seq("The patient was given  metformin, lenvatinib and lavender 700 ml/ml").toDF("text")

val result = mapper_pipeline.fit(test_data).transform(test_data) 
import nlu
nlu.load("en.map_entity.umls_drug_substance_mapper").predict("""The patient was given  metformin, lenvatinib and lavender 700 ml/ml""")

Results

+------------------+---------+
|ner_chunk         |umls_code|
+------------------+---------+
|metformin         |C0025598 |
|lenvatinib        |C2986924 |
|lavender 700 ml/ml|C0772360 |
+------------------+---------+

Model Information

Model Name: umls_drug_substance_mapper
Compatibility: Healthcare NLP 4.0.0+
License: Licensed
Edition: Official
Input Labels: [ner_chunk]
Output Labels: [mappings]
Language: en
Size: 30.1 MB

References

2022AA UMLS dataset’s Clinical Drug, Pharmacologic Substance, Antibiotic, Hazardous or Poisonous Substance categories. https://www.nlm.nih.gov/research/umls/index.html