Description
This model identifies contextual cues within text data to detect absent assertions. It annotates text chunks with assertions using configurable rules, prefix and suffix patterns, and exception patterns.
Predicted Entities
absent
How to use
document_assembler = DocumentAssembler() \
.setInputCol("text") \
.setOutputCol("document")
sentence_detector = SentenceDetector() \
.setInputCols(["document"]) \
.setOutputCol("sentence")
tokenizer = Tokenizer() \
.setInputCols(["sentence"]) \
.setOutputCol("token")
word_embeddings = WordEmbeddingsModel \
.pretrained("embeddings_clinical", "en", "clinical/models") \
.setInputCols(["sentence", "token"]) \
.setOutputCol("embeddings")
clinical_ner = MedicalNerModel \
.pretrained("ner_clinical", "en", "clinical/models") \
.setInputCols(["sentence", "token", "embeddings"]) \
.setOutputCol("ner")
ner_converter = NerConverter() \
.setInputCols(["sentence", "token", "ner"]) \
.setOutputCol("ner_chunk")
#Absent
contextual_assertion_absent = ContextualAssertion\
.pretrained("contextual_assertion_absent" ,"en" ,"clinical/models")\
.setInputCols("sentence", "token", "ner_chunk") \
.setAssertion("absent")\
.setOutputCol("assertionAbsent") \
flattener = Flattener() \
.setInputCols("assertionAbsent") \
.setExplodeSelectedFields({"assertionAbsent":["metadata.ner_chunk as ner_chunk",
"begin as begin",
"end as end",
"metadata.ner_label as ner_label",
"result"]})
pipeline = Pipeline(stages=[
document_assembler,
sentence_detector,
tokenizer,
word_embeddings,
clinical_ner,
ner_converter,
contextual_assertion_absent,
flattener
])
empty_data = spark.createDataFrame([[""]]).toDF("text")
model = pipeline.fit(empty_data)
text = """Patient resting in bed. Patient given azithromycin without any difficulty. Patient has audible wheezing, states chest tightness.
No evidence of hypertension. Patient denies nausea at this time. zofran declined. Patient is also having intermittent sweating
associated with pneumonia."""
data = spark.createDataFrame([[text]]).toDF('text')
result = model.transform(data)
result.show(truncate=False)
val documentAssembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val sentenceDetector = new SentenceDetector()
.setInputCols(Array("document"))
.setOutputCol("sentences")
val tokenizer = new Tokenizer()
.setInputCols(Array("sentences"))
.setOutputCol("tokens")
val embedder = WordEmbeddingsModel
.pretrained("embeddings_clinical", "en", "clinical/models")
.setInputCols(Array("sentences", "tokens"))
.setOutputCol("embeddings")
val nerTagger = MedicalNerModel
.pretrained("ner_clinical", "en", "clinical/models")
.setInputCols(Array("sentences", "tokens", "embeddings"))
.setOutputCol("nerTags")
val nerConverter = new NerConverterInternal()
.setInputCols(Array("sentences", "tokens", "nerTags"))
.setOutputCol("nerChunks")
val contextualAssertionAbsent = ContextualAssertion.pretrained("contextual_assertion_absent", "en", "clinical/models")
.setInputCols("sentences", "tokens", "nerChunks")
.setOutputCol("assertionAbsent")
val flattener = new Flattener()
.setInputCols("assertionAbsent")
.setExplodeSelectedFields(Map("assertionAbsent" -> Array("metadata.ner_chunk as ner_chunk",
"metadata.entity",
"begin as begin",
"end as end",
"metadata.ner_label as ner_label",
"result as result")
))
val emptyDataSet = Seq("").toDS().toDF("text")
val pipeline = new Pipeline()
.setStages(
Array(documentAssembler,
sentenceDetector,
tokenizer,
embedder,
nerTagger,
nerConverter,
contextualAssertionAbsent,
flattener
)).fit(emptyDataSet)
val text = Seq("Patient resting in bed. Patient given azithromycin without any difficulty." +
" Patient has audible wheezing, states chest tightness.No evidence of hypertension. Patient denies" +
" nausea at this time. zofran declined. Patient is also having intermittent sweating associated " +
"with pneumonia. Patient refused pain but tylenol still given. Neither substance abuse nor alcohol" +
" use however cocaine once used in the last year."
).toDS.toDF("text")
val dataSetResult = pipeline.transform(text)
dataSetResult.show(false)
Results
+--------------+-----+---+---------+-----------+
|ner_chunk |begin|end|ner_label|result |
+--------------+-----+---+---------+-----------+
|any difficulty|59 |72 |PROBLEM |absent |
|hypertension |149 |160|PROBLEM |absent |
|nausea |178 |183|PROBLEM |absent |
|zofran |199 |204|TREATMENT|absent |
|pain |309 |312|PROBLEM |absent |
|tylenol |318 |324|TREATMENT|absent |
+--------------+-----+---+---------+-----------+
Model Information
Model Name: | contextual_assertion_absent |
Compatibility: | Healthcare NLP 5.4.0+ |
License: | Licensed |
Edition: | Official |
Input Labels: | [sentence, token, ner_chunk] |
Output Labels: | [assertionAbsent] |
Language: | en |
Size: | 1.3 KB |
Case sensitive: | false |
Benchmarking
label precision recall f1-score support
absent 1.00 0.97 0.98 2594
accuracy - - 0.97 2594
macro_avg 0.50 0.48 0.49 2594
weighted_avg 1.00 0.97 0.98 2594