Sentence Entity Resolver for SNOMED (sbiobertresolve_snomed_conditions)

Description

This model maps clinical conditions to their corresponding SNOMED (domain: Conditions) codes using sbiobert_base_cased_mli Sentence Bert Embeddings.

Predicted Entities

Copy S3 URI

How to use

document_assembler = DocumentAssembler()\
    .setInputCol("text")\
    .setOutputCol("document")

sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare", "en", "clinical/models")\
    .setInputCols(["document"])\
    .setOutputCol("sentence")

tokenizer = Tokenizer()\
    .setInputCols(["sentence"])\
    .setOutputCol("token")

word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
    .setInputCols(["sentence", "token"])\
    .setOutputCol("embeddings")

ner_jsl = MedicalNerModel.pretrained("ner_jsl", "en", "clinical/models")\
    .setInputCols(["sentence", "token", "embeddings"])\
    .setOutputCol("ner")\

ner_jsl_converter = NerConverterInternal()\
    .setInputCols(["sentence", "token", "ner"])\
    .setOutputCol("ner_chunk")\
    .setWhiteList(["Kidney_Disease", "Cerebrovascular_Disease", "Heart_Disease","Disease_Syndrome_Disorder",
                  "ImagingFindings", "Symptom", "VS_Finding","EKG_Findings", "Communicable_Disease","Pregnancy",
                  "Obesity","Hypertension","Overweight","Hyperlipidemia","Triglycerides","Diabetes","Oncological",
                  "Psychological_Condition","ImagingFindings","Injury_or_Poisoning"])\

c2doc = Chunk2Doc()\
    .setInputCols("ner_chunk")\
    .setOutputCol("ner_chunk_doc")

sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli", "en", "clinical/models")\
    .setInputCols(["ner_chunk_doc"])\
    .setOutputCol("sbert_embeddings")\
    .setCaseSensitive(False)

resolver = SentenceEntityResolverModel\
    .pretrained("sbiobertresolve_snomed_conditions", "en", "clinical/models")\
    .setInputCols(["sbert_embeddings"]) \
    .setOutputCol("resolution")\
    .setDistanceFunction("EUCLIDEAN")

resolver_pipeline = Pipeline(
    stages = [
    document_assembler,
    sentenceDetectorDL,
    tokenizer,
    word_embeddings,
    ner_jsl,
    ner_jsl_converter,
    c2doc,
    sbert_embedder,
    resolver
    ])


text = [["""Medical professionals rushed in the bustling emergency room to attend to the patient with alarming symptoms.
            The attending physician immediately noted signs of respiratory distress, including stridor, a high-pitched sound indicative of upper respiratory tract obstruction.
            The patient, struggling to breathe, exhibited dyspnea. Concern heightened when they began experiencing syncope,
            a sudden loss of consciousness likely stemming from inadequate oxygenation. Further examination revealed a respiratory tract hemorrhage."""]]


data= spark.createDataFrame(text).toDF('text')
model = resolver_pipeline.fit(data)
result = model.transform(data)
val document_assembler = new DocumentAssembler()
    .setInputCol("text")
    .setOutputCol("document")

val sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")
    .setInputCols(Array("document"))
    .setOutputCol("sentence")

val tokenizer = new Tokenizer()
    .setInputCols(Array("sentence"))
    .setOutputCol("token")

val word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
    .setInputCols(Array("sentence","token"))
    .setOutputCol("embeddings")

val ner_jsl = MedicalNerModel.pretrained("ner_jsl", "en", "clinical/models")
    .setInputCols(Array("sentence","token","embeddings"))
    .setOutputCol("ner")

val ner_converter = new NerConverter()
    .setInputCols(Array("sentence","token","ner"))
    .setOutputCol("ner_chunk")
    .setWhiteList(Array("Kidney_Disease", "Cerebrovascular_Disease", "Heart_Disease","Disease_Syndrome_Disorder",
                  "ImagingFindings", "Symptom", "VS_Finding","EKG_Findings", "Communicable_Disease","Pregnancy",
                  "Obesity","Hypertension","Overweight","Hyperlipidemia","Triglycerides","Diabetes","Oncological",
                  "Psychological_Condition","ImagingFindings","Injury_or_Poisoning"))

val chunk2doc = new Chunk2Doc()
    .setInputCols("ner_chunk")
    .setOutputCol("ner_chunk_doc")

val sbert_embedder = BertSentenceEmbeddings
    .pretrained("sbiobert_base_cased_mli","en","clinical/models")
    .setInputCols(Array("ner_chunk_doc"))
    .setOutputCol("sbert_embeddings")
    .setCaseSensitive(False)

val resolver = SentenceEntityResolverModel
    .pretrained("sbiobertresolve_snomed_conditions", "en", "clinical/models")
    .setInputCols(Array("sbert_embeddings"))
    .setOutputCol("resolution")
    .setDistanceFunction("EUCLIDEAN")

val nlpPipeline = new Pipeline().setStages(Array(
    document_assembler,
    sentenceDetectorDL,
    tokenizer,
    word_embeddings,
    ner_jsl,
    ner_jsl_converter,
    chunk2doc,
    sbert_embedder,
    resolver))

val data = Seq("Medical professionals rushed in the bustling emergency room to attend to the patient with alarming symptoms.The attending physician immediately noted signs of respiratory distress, including stridor, a high-pitched sound indicative of upper respiratory tract obstruction.The patient, struggling to breathe, exhibited dyspnea. Concern heightened when they began experiencing syncope, a sudden loss of consciousness likely stemming from inadequate oxygenation. Further examination revealed a respiratory tract hemorrhage.") .toDF("text")

data= spark.createDataFrame(text).toDF('text')

val result = nlpPipeline.fit(data).transform(data)

Results

+-----------------------------------+-------------------------+-----------+-----------------------------------+--------------------------------------------------+--------------------------------------------------+
|                              chunk|                    label|snomed_code|                         resolution|                                         all_codes|                                   all_resolutions|
+-----------------------------------+-------------------------+-----------+-----------------------------------+--------------------------------------------------+--------------------------------------------------+
|               respiratory distress|               VS_Finding|  271825005|               respiratory distress|271825005:::418092006:::75483001:::373895009:::...|respiratory distress:::respiratory tract conges...|
|                            stridor|                  Symptom|   70407001|                            stridor|70407001:::301826004:::58596002:::301287002:::3...|stridor:::intermittent stridor:::inhalatory str...|
|                 high-pitched sound|                  Symptom|   51406002|                 high pitched voice|51406002:::271661003:::405495005:::23292001:::3...|high pitched voice:::heart sounds exaggerated::...|
|upper respiratory tract obstruction|Disease_Syndrome_Disorder|   68372009|upper respiratory tract obstruction|68372009:::79688008:::73342002:::301252002:::20...|upper respiratory tract obstruction:::respirato...|
|              struggling to breathe|                  Symptom|  289105003|   difficulty controlling breathing|289105003:::230145002:::289116005:::386813002::...|difficulty controlling breathing:::difficulty b...|
|                            dyspnea|                  Symptom|  267036007|                            dyspnea|267036007:::60845006:::25209001:::34560001:::59...|dyspnea:::exertional dyspnea:::inspiratory dysp...|
|                            syncope|                  Symptom|  271594007|                            syncope|271594007:::234167006:::90129003:::445535007:::...|syncope:::situational syncope:::tussive syncope...|
|              loss of consciousness|                  Symptom|  419045004|              loss of consciousness|419045004:::44077006:::44564008:::443371007:::1...|loss of consciousness:::loss of sensation:::los...|
|             inadequate oxygenation|                  Symptom|  238161004|           impaired oxygen delivery|238161004:::70944005:::238162006:::123826004:::...|impaired oxygen delivery:::impaired gas exchang...|
|       respiratory tract hemorrhage|Disease_Syndrome_Disorder|   95431003|       respiratory tract hemorrhage|95431003:::233783005:::15238002:::78144005:::32...|respiratory tract hemorrhage:::tracheal hemorrh...|
+-----------------------------------+-------------------------+-----------+-----------------------------------+--------------------------------------------------+--------------------------------------------------+

Model Information

Model Name: sbiobertresolve_snomed_conditions
Compatibility: Healthcare NLP 5.3.0+
License: Licensed
Edition: Official
Input Labels: [sentence_embeddings]
Output Labels: [snomed_code]
Language: en
Size: 592.3 MB
Case sensitive: false

References

This model is trained with the augmented version of NIH September 2023 SNOMED CT United States (US) Edition.