Description
This model maps clinical conditions to their corresponding SNOMED (domain: Conditions) codes using sbiobert_base_cased_mli Sentence Bert Embeddings.
How to use
document_assembler = DocumentAssembler()\
.setInputCol("text")\
.setOutputCol("document")
sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare", "en", "clinical/models")\
.setInputCols(["document"])\
.setOutputCol("sentence")
tokenizer = Tokenizer()\
.setInputCols(["sentence"])\
.setOutputCol("token")
word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("embeddings")
ner_jsl = MedicalNerModel.pretrained("ner_jsl", "en", "clinical/models")\
.setInputCols(["sentence", "token", "embeddings"])\
.setOutputCol("ner")
ner_jsl_converter = NerConverterInternal()\
.setInputCols(["sentence", "token", "ner"])\
.setOutputCol("ner_chunk")\
.setWhiteList(["Kidney_Disease", "Cerebrovascular_Disease", "Heart_Disease","Disease_Syndrome_Disorder",
"ImagingFindings", "Symptom", "VS_Finding","EKG_Findings", "Communicable_Disease","Pregnancy",
"Obesity","Hypertension","Overweight","Hyperlipidemia","Triglycerides","Diabetes","Oncological",
"Psychological_Condition","ImagingFindings","Injury_or_Poisoning"])
chunk2doc = Chunk2Doc()\
.setInputCols("ner_chunk")\
.setOutputCol("ner_chunk_doc")
sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli", "en", "clinical/models")\
.setInputCols(["ner_chunk_doc"])\
.setOutputCol("sbert_embeddings")\
.setCaseSensitive(False)
snomed_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_snomed_conditions", "en", "clinical/models") \
.setInputCols(["sbert_embeddings"]) \
.setOutputCol("snomed_code")
snomed_pipeline = Pipeline(stages = [
document_assembler,
sentence_detector,
tokenizer,
word_embeddings,
ner_jsl,
ner_jsl_converter,
chunk2doc,
sbert_embedder,
snomed_resolver
])
sample_text = """Medical professionals rushed in the bustling emergency room to attend to the patient with alarming symptoms.
The attending physician immediately noted signs of respiratory distress, including stridor, a high-pitched sound indicative of upper respiratory tract obstruction.
The patient, struggling to breathe, exhibited dyspnea. Concern heightened when they began experiencing syncope,
a sudden loss of consciousness likely stemming from inadequate oxygenation. Further examination revealed a respiratory tract hemorrhage."""
df= spark.createDataFrame([[sample_text]]).toDF("text")
result= nlpPipeline.fit(df).transform(df)
documentAssembler = nlp.DocumentAssembler()\
.setInputCol("text")\
.setOutputCol("document")
sentenceDetector = nlp.SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\
.setInputCols(["document"])\
.setOutputCol("sentence")
tokenizer = nlp.Tokenizer() \
.setInputCols(["sentence"]) \
.setOutputCol("token")
word_embeddings = nlp.WordEmbeddingsModel.pretrained("embeddings_clinical","en", "clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("embeddings")
ner_jsl = medical.NerModel.pretrained("ner_jsl", "en", "clinical/models") \
.setInputCols(["sentence", "token", "embeddings"]) \
.setOutputCol("ner")
ner_jsl_converter = medical.NerConverterInternal()\
.setInputCols(["sentence", "token", "ner"])\
.setOutputCol("ner_chunk")\
.setWhiteList(["Kidney_Disease", "Cerebrovascular_Disease", "Heart_Disease","Disease_Syndrome_Disorder",
"ImagingFindings", "Symptom", "VS_Finding","EKG_Findings", "Communicable_Disease","Pregnancy",
"Obesity","Hypertension","Overweight","Hyperlipidemia","Triglycerides","Diabetes","Oncological",
"Psychological_Condition","ImagingFindings","Injury_or_Poisoning"])
chunk2doc = nlp.Chunk2Doc() \
.setInputCols("ner_chunk") \
.setOutputCol("ner_chunk_doc")
sbert_embedder = nlp.BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli","en","clinical/models")\
.setInputCols(["ner_chunk_doc"])\
.setOutputCol("sbert_embeddings")\
.setCaseSensitive(False)
snomed_resolver = medical.SentenceEntityResolverModel.pretrained("sbiobertresolve_snomed_conditions", "en", "clinical/models") \
.setInputCols(["sbert_embeddings"]) \
.setOutputCol("snomed_code")\
.setDistanceFunction("EUCLIDEAN")
nlpPipeline= nlp.Pipeline(stages = [
documentAssembler,
sentenceDetector,
tokenizer,
word_embeddings,
ner_jsl,
ner_jsl_converter,
chunk2doc,
sbert_embedder,
snomed_resolver
])
sample_text = """"Medical professionals rushed in the bustling emergency room to attend to the patient with alarming symptoms.
The attending physician immediately noted signs of respiratory distress, including stridor, a high-pitched sound indicative of upper respiratory tract obstruction.
The patient, struggling to breathe, exhibited dyspnea. Concern heightened when they began experiencing syncope,
a sudden loss of consciousness likely stemming from inadequate oxygenation. Further examination revealed a respiratory tract hemorrhage."""
df= spark.createDataFrame([[sample_text]]).toDF("text")
result= nlpPipeline.fit(df).transform(df)
val document_assembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")
.setInputCols(Array("document"))
.setOutputCol("sentence")
val tokenizer = new Tokenizer()
.setInputCols(Array("sentence"))
.setOutputCol("token")
val word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
.setInputCols(Array("sentence","token"))
.setOutputCol("embeddings")
val ner_jsl = MedicalNerModel.pretrained("ner_jsl", "en", "clinical/models")
.setInputCols(Array("sentence","token","embeddings"))
.setOutputCol("ner")
val ner_converter = new NerConverter()
.setInputCols(Array("sentence","token","ner"))
.setOutputCol("ner_chunk")
.setWhiteList(Array("Kidney_Disease", "Cerebrovascular_Disease", "Heart_Disease","Disease_Syndrome_Disorder",
"ImagingFindings", "Symptom", "VS_Finding","EKG_Findings", "Communicable_Disease","Pregnancy",
"Obesity","Hypertension","Overweight","Hyperlipidemia","Triglycerides","Diabetes","Oncological",
"Psychological_Condition","ImagingFindings","Injury_or_Poisoning"))
val chunk2doc = new Chunk2Doc()
.setInputCols("ner_chunk")
.setOutputCol("ner_chunk_doc")
val sbert_embedder = BertSentenceEmbeddings
.pretrained("sbiobert_base_cased_mli","en","clinical/models")
.setInputCols(Array("ner_chunk_doc"))
.setOutputCol("sbert_embeddings")
.setCaseSensitive(False)
val snomed_resolver = SentenceEntityResolverModel
.pretrained("sbiobertresolve_snomed_conditions", "en", "clinical/models")
.setInputCols(Array("sbert_embeddings"))
.setOutputCol("resolution")
.setDistanceFunction("EUCLIDEAN")
val nlpPipeline = new Pipeline().setStages(Array(
document_assembler,
sentenceDetectorDL,
tokenizer,
word_embeddings,
ner_jsl,
ner_jsl_converter,
chunk2doc,
sbert_embedder,
snomed_resolver))
val sample_text = """The patient is a 30-year-old female with a long history of insulin-dependent diabetes, type 2; coronary artery disease; chronic renal insufficiency; peripheral vascular disease, also secondary to diabetes; who was originally admitted to an outside hospital for what appeared to be acute paraplegia, lower extremities. She did receive a course of Bactrim for 14 days for UTI."""
val df= Seq(sample_text).toDF("text")
val result= nlpPipeline.fit(df).transform(df)
Results
| sent_id | ner_chunk | entity | snomed_code | resolutions | all_codes | all_resolutions |
|---------|-------------------------------------|---------------------------|-------------|-------------------------------------|----------------------------------------------------|----------------------------------------------------|
| 1 | respiratory distress | VS_Finding | 271825005 | respiratory distress | ['271825005', '418092006', '75483001', '3738950... | ['respiratory distress', 'respiratory tract con... |
| 1 | stridor | Symptom | 70407001 | stridor | ['70407001', '301826004', '58596002', '30128700... | ['stridor', 'intermittent stridor', 'inhalatory... |
| 1 | high-pitched sound | Symptom | 271661003 | heart sounds exaggerated | ['271661003', '405495005', '300211002', '248615... | ['heart sounds exaggerated', 'high airway press... |
| 1 | upper respiratory tract obstruction | Disease_Syndrome_Disorder | 68372009 | upper respiratory tract obstruction | ['68372009', '79688008', '73342002', '301252002... | ['upper respiratory tract obstruction', 'respir... |
| 2 | struggling to breathe | Symptom | 230145002 | difficulty breathing | ['230145002', '289116005', '386813002', '271825... | ['difficulty breathing', 'difficulty coughing',... |
| 2 | dyspnea | Symptom | 267036007 | dyspnea | ['267036007', '60845006', '25209001', '34560001... | ['dyspnea', 'exertional dyspnea', 'inspiratory ... |
| 3 | syncope | Symptom | 271594007 | syncope | ['271594007', '234167006', '90129003', '4455350... | ['syncope', 'situational syncope', 'tussive syn... |
| 3 | loss of consciousness | Symptom | 44077006 | loss of sensation | ['44077006', '249845008', '68158006', '24656200... | ['loss of sensation', 'loss of sense of positio... |
| 3 | inadequate oxygenation | Symptom | 238161004 | impaired oxygen delivery | ['238161004', '70944005', '238162006', '1238260... | ['impaired oxygen delivery', 'impaired gas exch... |
| 4 | respiratory tract hemorrhage | Disease_Syndrome_Disorder | 95431003 | respiratory tract hemorrhage | ['95431003', '233783005', '15238002', '78144005... | ['respiratory tract hemorrhage', 'tracheal hemo... |
Model Information
| Model Name: | sbiobertresolve_snomed_conditions |
| Compatibility: | Healthcare NLP 6.3.0+ |
| License: | Licensed |
| Edition: | Official |
| Input Labels: | [sentence_embeddings] |
| Output Labels: | [snomed_code] |
| Language: | en |
| Size: | 593.3 MB |
| Case sensitive: | false |