Description
The model maps extracted medical entities to their corresponding Snomed codes (Clinical Findings) using sbiobert_base_cased_mli
BERT sentence embeddings.
Predicted Entities
How to use
document_assembler = DocumentAssembler()\
.setInputCol("text")\
.setOutputCol("document")
sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare", "en", "clinical/models")\
.setInputCols(["document"])\
.setOutputCol("sentence")
tokenizer = Tokenizer()\
.setInputCols(["sentence"])\
.setOutputCol("token")\
word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("embeddings")
ner_jsl = MedicalNerModel.pretrained("ner_jsl", "en", "clinical/models") \
.setInputCols(["sentence", "token", "embeddings"]) \
.setOutputCol("ner_jsl")
ner_jsl_converter = NerConverterInternal() \
.setInputCols(["sentence", "token", "ner_jsl"]) \
.setOutputCol("ner_jsl_chunk")\
.setWhiteList(["Kidney_Disease", "Cerebrovascular_Disease", "Heart_Disease",
"Disease_Syndrome_Disorder", "ImagingFindings", "Symptom", "VS_Finding",
"EKG_Findings", "Communicable_Disease"])\
chunk2doc = Chunk2Doc()\
.setInputCols("ner_jsl_chunk")\
.setOutputCol("ner_chunk_doc")
sbert_embeddings = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli","en","clinical/models")\
.setInputCols(["ner_chunk_doc"])\
.setOutputCol("sbert_embeddings")\
.setCaseSensitive(False)
snomed_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_snomed_findings", "en", "clinical/models") \
.setInputCols(["sbert_embeddings"]) \
.setOutputCol("snomed_code")\
snomed_pipeline = Pipeline(stages = [
document_assembler,
sentence_detector,
tokenizer,
word_embeddings,
ner_jsl,
ner_jsl_converter,
chunk2doc,
sbert_embeddings,
snomed_resolver
])
data = spark.createDataFrame([["""The patient exhibited recurrent upper respiratory tract infections, subjective fevers, weight loss, and night sweats. Clinically, they appeared cachectic and with hepatosplenomegaly. Laboratory results confirmed pancytopenia."""]]).toDF("text")
model = snomed_pipeline.fit(data)
result = model.transform(data)
val documentAssembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare", "en", "clinical/models")
.setInputCols(Array("document"))
.setOutputCol("sentence")
val tokenizer = new Tokenizer()
.setInputCols(Array("sentence"))
.setOutputCol("token")
val wordEmbeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
.setInputCols(Array("sentence", "token"))
.setOutputCol("embeddings")
val nerJsl = MedicalNerModel.pretrained("ner_jsl", "en", "clinical/models")
.setInputCols(Array("sentence", "token", "embeddings"))
.setOutputCol("ner_jsl")
val nerJslConverter = new NerConverterInternal()
.setInputCols(Array("sentence", "token", "ner_jsl"))
.setOutputCol("ner_jsl_chunk")
.setWhiteList(Array("Kidney_Disease", "Cerebrovascular_Disease", "Heart_Disease",
"Disease_Syndrome_Disorder", "ImagingFindings", "Symptom", "VS_Finding",
"EKG_Findings", "Communicable_Disease"))
val chunk2doc = new Chunk2Doc()
.setInputCols("ner_jsl_chunk")
.setOutputCol("ner_chunk_doc")
val sbertEmbeddings = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli","en","clinical/models")
.setInputCols(Array("ner_chunk_doc"))
.setOutputCol("sbert_embeddings")
.setCaseSensitive(false)
val snomedResolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_snomed_findings", "en", "clinical/models")
.setInputCols(Array("sbert_embeddings"))
.setOutputCol("snomed_code")
val snomedPipeline = new Pipeline().setStages(Array(
documentAssembler,
sentenceDetector,
tokenizer,
wordEmbeddings,
nerJsl,
nerJslConverter,
chunk2doc,
sbertEmbeddings,
snomedResolver
))
val data = Seq("""The patient exhibited recurrent upper respiratory tract infections, subjective fevers, weight loss, and night sweats. Clinically, they appeared cachectic and with hepatosplenomegaly. Laboratory results confirmed pancytopenia.""").toDF("text")
val model = snomedPipeline.fit(data)
val result = model.transform(data)
Results
+----------------------------------+-------------------------+-----------+---------------------------------+--------------------------------------------------+--------------------------------------------------+
| chunk| label|snomed_code| resolution| all_codes| all_resolutions|
+----------------------------------+-------------------------+-----------+---------------------------------+--------------------------------------------------+--------------------------------------------------+
|upper respiratory tract infections|Disease_Syndrome_Disorder| 54150009|upper respiratory tract infection|54150009:::312118003:::54398005:::275498002:::1...|upper respiratory tract infection:::upper respi...|
| fevers| VS_Finding| 386661006| fever|386661006:::77957000:::52715007:::12579009:::27...|fever:::intermittent fever:::cyclic fever:::per...|
| weight loss| Symptom| 89362005| weight loss|89362005:::416528001:::161832001:::426977000:::...|weight loss:::intentional weight loss:::losing ...|
| night sweats| Symptom| 42984000| night sweats|42984000:::67233009:::423052008:::36163009:::89...|night sweats:::night waking:::frequent night wa...|
| cachectic| Symptom| 238108007| cachectic|238108007:::422003001:::284529003:::788876001::...|cachectic:::cachexia associated with aids:::car...|
| hepatosplenomegaly| Symptom| 36760000| hepatosplenomegaly|36760000:::16294009:::19058002:::191382009:::80...|hepatosplenomegaly:::splenomegaly:::congestive ...|
| pancytopenia| Symptom| 127034005| pancytopenia|127034005:::736024007:::5876000:::124961001:::4...|pancytopenia:::drug induced pancytopenia:::panc...|
+----------------------------------+-------------------------+-----------+---------------------------------+--------------------------------------------------+--------------------------------------------------+
Model Information
Model Name: | sbiobertresolve_snomed_findings |
Compatibility: | Healthcare NLP 5.2.1+ |
License: | Licensed |
Edition: | Official |
Input Labels: | [sentence_embeddings] |
Output Labels: | [snomed_code] |
Language: | en |
Size: | 641.0 MB |
Case sensitive: | false |