Description
This pretrained model maps entities (Disease or Syndrome) with corresponding UMLS CUI codes.
Predicted Entities
umls_code
How to use
document_assembler = DocumentAssembler()\
.setInputCol('text')\
.setOutputCol('document')
sentence_detector = SentenceDetector()\
.setInputCols(["document"])\
.setOutputCol("sentence")
tokenizer = Tokenizer()\
.setInputCols("sentence")\
.setOutputCol("token")
word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("embeddings")
ner_model = MedicalNerModel.pretrained("ner_clinical_large", "en", "clinical/models")\
.setInputCols(["sentence", "token", "embeddings"])\
.setOutputCol("clinical_ner")
ner_model_converter = NerConverterInternal()\
.setInputCols("sentence", "token", "clinical_ner")\
.setOutputCol("ner_chunk")
chunkerMapper = ChunkMapperModel.pretrained("umls_disease_syndrome_mapper", "en", "clinical/models")\
.setInputCols(["ner_chunk"])\
.setOutputCol("mappings")\
.setRels(["umls_code"])\
.setLowerCase(True)
mapper_pipeline = Pipeline().setStages([
document_assembler,
sentence_detector,
tokenizer,
word_embeddings,
ner_model,
ner_model_converter,
chunkerMapper])
data = spark.createDataFrame([["""A 35-year-old male with a history of obesity and gestational diabetes mellitus and acyclovir allergy."""]]).toDF("text")
result = mapper_pipeline.fit(data).transform(data)
document_assembler = nlp.DocumentAssembler()\
.setInputCol('text')\
.setOutputCol('document')
sentence_detector = nlp.SentenceDetector()\
.setInputCols(["document"])\
.setOutputCol("sentence")
tokenizer = nlp.Tokenizer()\
.setInputCols("sentence")\
.setOutputCol("token")
word_embeddings = nlp.WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("embeddings")
ner_model = medical.NerModel.pretrained("ner_clinical_large", "en", "clinical/models")\
.setInputCols(["sentence", "token", "embeddings"])\
.setOutputCol("clinical_ner")
ner_model_converter = medical.NerConverterInternal()\
.setInputCols("sentence", "token", "clinical_ner")\
.setOutputCol("ner_chunk")
chunkerMapper = medical.ChunkMapperModel.pretrained("umls_disease_syndrome_mapper", "en", "clinical/models")\
.setInputCols(["ner_chunk"])\
.setOutputCol("mappings")\
.setRels(["umls_code"])\
.setLowerCase(True)
mapper_pipeline = nlp.Pipeline().setStages([
document_assembler,
sentence_detector,
tokenizer,
word_embeddings,
ner_model,
ner_model_converter,
chunkerMapper])
data = spark.createDataFrame([["""A 35-year-old male with a history of obesity and gestational diabetes mellitus and acyclovir allergy."""]]).toDF("text")
result = mapper_pipeline.fit(data).transform(data)
val document_assembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val sentence_detector = new SentenceDetector()
.setInputCols(Array("document"))
.setOutputCol("sentence")
val tokenizer = new Tokenizer()
.setInputCols("sentence")
.setOutputCol("token")
val word_embeddings = WordEmbeddingsModel
.pretrained("embeddings_clinical", "en", "clinical/models")
.setInputCols(Array("sentence", "token"))
.setOutputCol("embeddings")
val ner_model = MedicalNerModel
.pretrained("ner_clinical_large", "en", "clinical/models")
.setInputCols(Array("sentence", "token", "embeddings"))
.setOutputCol("clinical_ner")
val ner_model_converter = new NerConverterInternal()
.setInputCols("sentence", "token", "clinical_ner")
.setOutputCol("ner_chunk")
val chunkerMapper = ChunkMapperModel
.pretrained("umls_disease_syndrome_mapper", "en", "clinical/models")
.setInputCols(Array("ner_chunk"))
.setOutputCol("mappings")
.setRels(Array("umls_code"))
val mapper_pipeline = new Pipeline().setStages(Array(
document_assembler,
sentence_detector,
tokenizer,
word_embeddings,
ner_model,
ner_model_converter,
chunkerMapper))
val data = Seq("""A 35-year-old male with a history of obesity and gestational diabetes mellitus and acyclovir allergy.""").toDF("text")
val result = mapper_pipeline.fit(data).transform(data)
Results
+-----------------------------+---------+
|ner_chunk |umls_code|
+-----------------------------+---------+
|obesity |C0028754 |
|gestational diabetes mellitus|C0085207 |
|acyclovir allergy |C0571297 |
+-----------------------------+---------+
Model Information
Model Name: | umls_disease_syndrome_mapper |
Compatibility: | Healthcare NLP 6.0.2+ |
License: | Licensed |
Edition: | Official |
Input Labels: | [ner_chunk] |
Output Labels: | [mappings] |
Language: | en |
Size: | 14.8 MB |
References
Trained on concepts from disease syndrome for the 2025AA release of the Unified Medical Language System® (UMLS) Knowledge Sources: https://www.nlm.nih.gov/research/umls/index.html