Detect Possible Assertion Status with Contextual Assertion

Description

This model identifies contextual cues within text data to detect possible assertions. It annotates text chunks with assertions using configurable rules, prefix and suffix patterns, and exception patterns

Predicted Entities

Copy S3 URI

How to use

document_assembler = DocumentAssembler() \
    .setInputCol("text") \
    .setOutputCol("document")

sentence_detector = SentenceDetector() \
    .setInputCols(["document"]) \
    .setOutputCol("sentence")

tokenizer = Tokenizer() \
    .setInputCols(["sentence"]) \
    .setOutputCol("token")

word_embeddings = WordEmbeddingsModel \
    .pretrained("embeddings_clinical", "en", "clinical/models") \
    .setInputCols(["sentence", "token"]) \
    .setOutputCol("embeddings")

clinical_ner = MedicalNerModel \
    .pretrained("ner_clinical", "en", "clinical/models") \
    .setInputCols(["sentence", "token", "embeddings"]) \
    .setOutputCol("ner")

ner_converter = NerConverter() \
    .setInputCols(["sentence", "token", "ner"]) \
    .setOutputCol("ner_chunk")

contextual_assertion_possible = ContextualAssertion.pretrained("contextual_assertion_possible","en","clinical/models")\
    .setInputCols("sentence", "token", "ner_chunk") \
    .setOutputCol("assertion_possible") \


pipeline = Pipeline(
    stages=[
      document_assembler,
      sentence_detector,
      tokenizer,
      word_embeddings,
      clinical_ner,
      ner_converter,
      contextual_assertion_possible
])

empty_data = spark.createDataFrame([[""]]).toDF("text")

model = pipeline.fit(empty_data)
text = """The patient presents with symptoms suggestive of pneumonia, including fever, productive cough, and mild dyspnea.
Chest X-ray findings are compatible with a possible early-stage infection, though bacterial pneumonia cannot be entirely excluded."""

data = spark.createDataFrame([[text]]).toDF('text')

result = model.transform(data)
result.selectExpr("explode(assertion_possible) as assertion").show(truncate=False)
val documentAssembler = new DocumentAssembler()
  .setInputCol("text")
  .setOutputCol("document")

val sentenceDetector = new SentenceDetector()
  .setInputCols(Array("document"))
  .setOutputCol("sentences")

val tokenizer = new Tokenizer()
  .setInputCols(Array("sentences"))
  .setOutputCol("tokens")

val embedder = WordEmbeddingsModel
  .pretrained("embeddings_clinical", "en", "clinical/models")
  .setInputCols(Array("sentences", "tokens"))
  .setOutputCol("embeddings")

val nerTagger = MedicalNerModel
  .pretrained("ner_clinical", "en", "clinical/models")
  .setInputCols(Array("sentences", "tokens", "embeddings"))
  .setOutputCol("nerTags")

val nerConverter = new NerConverterInternal()
  .setInputCols(Array("sentences", "tokens", "nerTags"))
  .setOutputCol("nerChunks")

val contextualAssertionPossible = ContextualAssertion.pretrained("contextual_assertion_possible","en" ,"clinical/models")
  .setInputCols("sentences", "tokens", "nerChunks")
  .setOutputCol("assertion_possible")


val emptyDataSet = Seq("").toDS().toDF("text")

val pipeline = new Pipeline()
  .setStages(
      Array(documentAssembler,
            sentenceDetector,
            tokenizer,
            embedder,
            nerTagger,
            nerConverter,
            contextualAssertionPossible,
  )).fit(emptyDataSet)

val text = Seq("""The patient presents with symptoms suggestive of pneumonia, including fever, productive cough, and mild dyspnea.
Chest X-ray findings are compatible with a possible early-stage infection, though bacterial pneumonia cannot be entirely excluded.""").toDS.toDF("text")

val dataSetResult = pipeline.transform(text)
dataSetResult.show(100)

Results

+---------------------+-----+---+--------+
|ner_chunk            |begin|end|result  |
+---------------------+-----+---+--------+
|symptoms             |26   |33 |possible|
|pneumonia            |49   |57 |possible|
|Chest X-ray          |113  |123|possible|
|early-stage infection|165  |185|possible|
+---------------------+-----+---+--------+

Model Information

Model Name: contextual_assertion_possible
Compatibility: Healthcare NLP 5.5.3+
License: Licensed
Edition: Official
Input Labels: [sentence, token, ner_chunk]
Output Labels: [assertion_possible]
Language: en
Size: 1.7 KB
Case sensitive: false