Description
This pretrained model maps entities (Clinical Drugs) with their corresponding UMLS CUI codes.
Predicted Entities
umls_code
How to use
document_assembler = DocumentAssembler()\
.setInputCol('text')\
.setOutputCol('document')
sentence_detector = SentenceDetector()\
.setInputCols(["document"])\
.setOutputCol("sentence")
tokenizer = Tokenizer()\
.setInputCols("sentence")\
.setOutputCol("token")
word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("embeddings")
ner_model = MedicalNerModel.pretrained("ner_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token", "embeddings"])\
.setOutputCol("clinical_ner")
ner_model_converter = NerConverterInternal()\
.setInputCols(["sentence", "token", "clinical_ner"])\
.setOutputCol("ner_chunk")
chunkerMapper = ChunkMapperModel.pretrained("umls_clinical_drugs_mapper", "en", "clinical/models")\
.setInputCols(["ner_chunk"])\
.setOutputCol("mappings")\
.setRels(["umls_code"])\
.setLowerCase(True)
mapper_pipeline = Pipeline().setStages([
document_assembler,
sentence_detector,
tokenizer,
word_embeddings,
ner_model,
ner_model_converter,
chunkerMapper])
sample_text="""She was immediately given hydrogen peroxide 30 mg, and has been advised Neosporin Cream for 5 days.
She has a history of taking magnesium hydroxide 100mg/1ml and metformin 1000 mg."""
test_data = spark.createDataFrame([[sample_text]]).toDF("text")
result = mapper_pipeline.fit(test_data).transform(test_data)
val document_assembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val sentence_detector = new SentenceDetector()
.setInputCols(Array("document"))
.setOutputCol("sentence")
val tokenizer = new Tokenizer()
.setInputCols("sentence")
.setOutputCol("token")
val word_embeddings = WordEmbeddingsModel
.pretrained("embeddings_clinical", "en", "clinical/models")
.setInputCols(Array("sentence", "token"))
.setOutputCol("embeddings")
val ner_model = MedicalNerModel
.pretrained("ner_clinical", "en", "clinical/models")
.setInputCols(Array("sentence", "token", "embeddings"))
.setOutputCol("clinical_ner")
val ner_model_converter = new NerConverterInternal()
.setInputCols(Array("sentence", "token", "clinical_ner"))
.setOutputCol("ner_chunk")
val chunkerMapper = ChunkMapperModel
.pretrained("umls_clinical_drugs_mapper", "en", "clinical/models")
.setInputCols(Array("ner_chunk"))
.setOutputCol("mappings")
.setRels(Array("umls_code"))
val mapper_pipeline = new Pipeline().setStages(Array(
document_assembler,
sentence_detector,
tokenizer,
word_embeddings,
ner_model,
ner_model_converter,
chunkerMapper))
val test_data = Seq("She was immediately given hydrogen peroxide 30 mg, and has been advised Neosporin Cream for 5 days. She has a history of taking magnesium hydroxide 100mg/1ml and metformin 1000 mg.").toDF("text")
val result = pipeline.fit(test_data).transform(test_data)
import nlu
nlu.load("en.map_entity.umls_clinical_drugs_mapper").predict("""She was immediately given hydrogen peroxide 30 mg, and has been advised Neosporin Cream for 5 days.
She has a history of taking magnesium hydroxide 100mg/1ml and metformin 1000 mg.""")
Results
+-------------------+---------+
|ner_chunk |umls_code|
+-------------------+---------+
|hydrogen peroxide |C0020281 |
|Neosporin Cream |C0132149 |
|magnesium hydroxide|C0024476 |
|metformin |C0025598 |
+-------------------+---------+
Model Information
Model Name: | umls_clinical_drugs_mapper |
Compatibility: | Healthcare NLP 4.0.0+ |
License: | Licensed |
Edition: | Official |
Input Labels: | [ner_chunk] |
Output Labels: | [mappings] |
Language: | en |
Size: | 23.3 MB |
References
2022AA UMLS dataset’s Clinical Drug category. https://www.nlm.nih.gov/research/umls/index.html