Description
This pretrained model maps entities (Drug Substances) with their corresponding UMLS CUI codes.
Predicted Entities
umls_code
How to use
document_assembler = DocumentAssembler()\
.setInputCol('text')\
.setOutputCol('document')
sentence_detector = SentenceDetector()\
.setInputCols(["document"])\
.setOutputCol("sentence")
tokenizer = Tokenizer()\
.setInputCols("sentence")\
.setOutputCol("token")
word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("embeddings")
ner_model = MedicalNerModel.pretrained("ner_posology_greedy", "en", "clinical/models")\
.setInputCols(["sentence", "token", "embeddings"])\
.setOutputCol("clinical_ner")
ner_model_converter = NerConverterInternal()\
.setInputCols("sentence", "token", "clinical_ner")\
.setOutputCol("ner_chunk")
chunkerMapper = ChunkMapperModel.pretrained("umls_drug_substance_mapper", "en", "clinical/models")\
.setInputCols(["ner_chunk"])\
.setOutputCol("mappings")\
.setRels(["umls_code"])\
.setLowerCase(True)
mapper_pipeline = Pipeline().setStages([
document_assembler,
sentence_detector,
tokenizer,
word_embeddings,
ner_model,
ner_model_converter,
chunkerMapper])
test_data = spark.createDataFrame([["The patient was given metformin, lenvatinib and lavender 700 ml/ml"]]).toDF("text")
result = mapper_pipeline.fit(test_data).transform(test_data)
val document_assembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val sentence_detector = new SentenceDetector()
.setInputCols(Array("document"))
.setOutputCol("sentence")
val tokenizer = new Tokenizer()
.setInputCols("sentence")
.setOutputCol("token")
val word_embeddings = WordEmbeddingsModel
.pretrained("embeddings_clinical", "en", "clinical/models")
.setInputCols(Array("sentence", "token"))
.setOutputCol("embeddings")
val ner_model = MedicalNerModel
.pretrained("ner_posology_greedy", "en", "clinical/models")
.setInputCols(Array("sentence", "token", "embeddings"))
.setOutputCol("clinical_ner")
val ner_model_converter = new NerConverterInternal()
.setInputCols("sentence", "token", "clinical_ner")
.setOutputCol("ner_chunk")
val chunkerMapper = ChunkMapperModel
.pretrained("umls_drug_substance_mapper", "en", "clinical/models")
.setInputCols(Array("ner_chunk"))
.setOutputCol("mappings")
.setRels(Array("umls_code"))
val mapper_pipeline = new Pipeline().setStages(Array(
document_assembler,
sentence_detector,
tokenizer,
word_embeddings,
ner_model,
ner_model_converter,
chunkerMapper))
val test_data = Seq("The patient was given metformin, lenvatinib and lavender 700 ml/ml").toDF("text")
val result = mapper_pipeline.fit(test_data).transform(test_data)
import nlu
nlu.load("en.map_entity.umls_drug_substance_mapper").predict("""The patient was given metformin, lenvatinib and lavender 700 ml/ml""")
Results
+------------------+---------+
|ner_chunk |umls_code|
+------------------+---------+
|metformin |C0025598 |
|lenvatinib |C2986924 |
|lavender 700 ml/ml|C0772360 |
+------------------+---------+
Model Information
Model Name: | umls_drug_substance_mapper |
Compatibility: | Healthcare NLP 4.0.0+ |
License: | Licensed |
Edition: | Official |
Input Labels: | [ner_chunk] |
Output Labels: | [mappings] |
Language: | en |
Size: | 30.1 MB |
References
2022AA UMLS dataset’s Clinical Drug, Pharmacologic Substance, Antibiotic, Hazardous or Poisonous Substance categories. https://www.nlm.nih.gov/research/umls/index.html