Description
Assign assertion status to clinical entities extracted by NER based on their context in the text. Also this model is trained on a list of clinical and biomedical datasets curated in-house
Predicted Entities
Present
, Absent
, Possible
, Planned
, Past
, Family
, Hypotetical
, SomeoneElse
How to use
document_assembler = DocumentAssembler()\
.setInputCol("text")\
.setOutputCol("document")
sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\
.setInputCols(["document"])\
.setOutputCol("sentence")
tokenizer = Tokenizer()\
.setInputCols(["sentence"])\
.setOutputCol("token")\
.setSplitChars(["-", "\/"])
word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical","en","clinical/models")\
.setInputCols(["sentence","token"])\
.setOutputCol("embeddings")
# ner_jsl
clinical_ner = MedicalNerModel.pretrained("ner_jsl","en","clinical/models")\
.setInputCols(["sentence","token","embeddings"])\
.setOutputCol("ner")
ner_converter = NerConverterInternal()\
.setInputCols(["sentence","token","ner"])\
.setOutputCol("ner_jsl_chunk")\
.setBlackList(["RelativeDate", "Gender"])
few_shot_assertion_converter = FewShotAssertionSentenceConverter()\
.setInputCols(["sentence","token", "ner_jsl_chunk"])\
.setOutputCol("assertion_sentence")
e5_embeddings = E5Embeddings.pretrained("e5_base_v2_embeddings_medical_assertion_jsl", "en", "clinical/models")\
.setInputCols(["assertion_sentence"])\
.setOutputCol("assertion_embedding")
few_shot_assertion_classifier = FewShotAssertionClassifierModel()\
.pretrained("fewhot_assertion_jsl_e5_base_v2_jsl", "en", "clinical/models")\
.setInputCols(["assertion_embedding"])\
.setOutputCol("assertion")
assertion_pipeline = Pipeline(stages=[
document_assembler,
sentence_detector,
tokenizer,
word_embeddings,
clinical_ner,
ner_converter,
few_shot_assertion_converter,
e5_embeddings,
few_shot_assertion_classifier
])
data = spark.createDataFrame([["""Patient had a headache for the last 2 weeks, and appears anxious when she walks fast. No alopecia noted. She denies pain. Her father is paralyzed and it is a stressor for her. She was bullied by her boss and got antidepressant. We prescribed sleeping pills for her current insomnia."""]]).toDF("text")
result = assertion_pipeline.fit(data).transform(data)
val document_assembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")
.setInputCols(Array("document"))
.setOutputCol("sentence")
val tokenizer = new Tokenizer()
.setInputCols(Array("sentence"))
.setOutputCol("token")
.setSplitChars(Array("-", "\/"))
val word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical","en","clinical/models")
.setInputCols(Array("sentence","token"))
.setOutputCol("embeddings")
// ner_jsl
val clinical_ner = MedicalNerModel.pretrained("ner_jsl","en","clinical/models")
.setInputCols(Array("sentence","token","embeddings"))
.setOutputCol("ner")
val ner_converter = new NerConverterInternal()
.setInputCols(Array("sentence","token","ner"))
.setOutputCol("ner_jsl_chunk")
.setBlackList(Array("RelativeDate", "Gender"))
val few_shot_assertion_converter = new FewShotAssertionSentenceConverter()
.setInputCols(Array("sentence","token", "ner_jsl_chunk"))
.setOutputCol("assertion_sentence")
val e5_embeddings = E5Embeddings.pretrained("e5_base_v2_embeddings_medical_assertion_jsl", "en", "clinical/models")
.setInputCols(Array("assertion_sentence"))
.setOutputCol("assertion_embedding")
val few_shot_assertion_classifier = FewShotAssertionClassifierModel()
.pretrained("fewhot_assertion_jsl_e5_base_v2_jsl", "en", "clinical/models")
.setInputCols(Array("assertion_embedding"))
.setOutputCol("assertion")
val pipeline = new Pipeline().setStages(Array(
document_assembler,
sentence_detector,
tokenizer,
word_embeddings,
clinical_ner,
ner_converter,
few_shot_assertion_converter,
e5_embeddings,
few_shot_assertion_classifier))
val data = Seq(Array("""Patient had a headache for the last 2 weeks, and appears anxious when she walks fast. No alopecia noted. She denies pain. Her father is paralyzed and it is a stressor for her. She was bullied by her boss and got antidepressant. We prescribed sleeping pills for her current insomnia.""")).toDF("text")
val result = pipeline.fit(data).transform(data)
Results
| | chunks | begin | end | entities | assertion | confidence |
|---:|:---------------------|--------:|------:|:----------------|:------------|-------------:|
| 0 | headache | 14 | 21 | Symptom | Past | 0.905649 |
| 1 | for the last 2 weeks | 23 | 42 | Duration | Past | 0.904228 |
| 2 | anxious | 57 | 63 | Symptom | Possible | 0.872409 |
| 3 | alopecia | 89 | 96 | Symptom | Absent | 0.907129 |
| 4 | pain | 116 | 119 | Symptom | Absent | 0.907316 |
| 5 | paralyzed | 136 | 144 | Symptom | Family | 0.889557 |
| 6 | stressor | 158 | 165 | Symptom | Family | 0.890123 |
| 7 | bullied by her boss | 184 | 202 | Symptom | Past | 0.870923 |
| 8 | antidepressant | 212 | 225 | Drug_Ingredient | Present | 0.89228 |
| 9 | sleeping pills | 242 | 255 | Drug_Ingredient | Planned | 0.849468 |
| 10 | insomnia | 273 | 280 | Symptom | Planned | 0.818986 |
Model Information
Model Name: | fewhot_assertion_jsl_e5_base_v2_jsl |
Compatibility: | Healthcare NLP 5.3.3+ |
License: | Licensed |
Edition: | Official |
Input Labels: | [assertion_embedding] |
Output Labels: | [assertion] |
Language: | en |
Size: | 32.1 KB |
Benchmarking
label precision recall f1-score support
Absent 0.97 0.96 0.97 707
Family 0.92 0.91 0.92 283
Hypothetical 0.88 0.83 0.85 386
Past 0.91 0.90 0.91 717
Planned 0.75 0.91 0.82 159
Possible 0.77 0.93 0.84 289
Present 0.94 0.89 0.92 1058
SomeoneElse 0.84 0.87 0.85 148
accuracy - - 0.90 3747
macro-avg 0.87 0.90 0.88 3747
weighted-avg 0.91 0.90 0.91 3747