Detect Conditional Assertion Status with Contextual Assertion

Description

This model identifies contextual cues within text data to detect conditional assertions. It annotates text chunks with assertions using configurable rules, prefix and suffix patterns, and exception patterns.

Predicted Entities

conditional

Copy S3 URI

How to use

document_assembler = DocumentAssembler() \
    .setInputCol("text") \
    .setOutputCol("document")

sentence_detector = SentenceDetector() \
    .setInputCols(["document"]) \
    .setOutputCol("sentence")

tokenizer = Tokenizer() \
    .setInputCols(["sentence"]) \
    .setOutputCol("token")

word_embeddings = WordEmbeddingsModel \
    .pretrained("embeddings_clinical", "en", "clinical/models") \
    .setInputCols(["sentence", "token"]) \
    .setOutputCol("embeddings")

clinical_ner = MedicalNerModel \
    .pretrained("ner_clinical", "en", "clinical/models") \
    .setInputCols(["sentence", "token", "embeddings"]) \
    .setOutputCol("ner")

ner_converter = NerConverter() \
    .setInputCols(["sentence", "token", "ner"]) \
    .setOutputCol("ner_chunk")

contextual_assertion_conditional = ContextualAssertion.pretrained("contextual_assertion_conditional","en","clinical/models")\
    .setInputCols("sentence", "token", "ner_chunk") \
    .setOutputCol("assertion_conditional") \


pipeline = Pipeline(
    stages=[
      document_assembler,
      sentence_detector,
      tokenizer,
      word_embeddings,
      clinical_ner,
      ner_converter,
      contextual_assertion_conditional
])

empty_data = spark.createDataFrame([[""]]).toDF("text")

model = pipeline.fit(empty_data)
text = """The patient reports intermittent chest pain when engaging in physical activity, particularly on exertion. Symptoms appear to be contingent upon increased stress levels and heavy meals."""

data = spark.createDataFrame([[text]]).toDF('text')

result = model.transform(data)
result.selectExpr("explode(assertion_conditional) as result").show(truncate=False) 
val documentAssembler = new DocumentAssembler()
  .setInputCol("text")
  .setOutputCol("document")

val sentenceDetector = new SentenceDetector()
  .setInputCols(Array("document"))
  .setOutputCol("sentences")

val tokenizer = new Tokenizer()
  .setInputCols(Array("sentences"))
  .setOutputCol("tokens")

val embedder = WordEmbeddingsModel
  .pretrained("embeddings_clinical", "en", "clinical/models")
  .setInputCols(Array("sentences", "tokens"))
  .setOutputCol("embeddings")

val nerTagger = MedicalNerModel
  .pretrained("ner_clinical", "en", "clinical/models")
  .setInputCols(Array("sentences", "tokens", "embeddings"))
  .setOutputCol("nerTags")

val nerConverter = new NerConverterInternal()
  .setInputCols(Array("sentences", "tokens", "nerTags"))
  .setOutputCol("nerChunks")

val contextualAssertionConditional = ContextualAssertion.pretrained("contextual_assertion_conditional","en" ,"clinical/models")
  .setInputCols("sentences", "tokens", "nerChunks")
  .setOutputCol("assertion_conditional")


val emptyDataSet = Seq("").toDS().toDF("text")

val pipeline = new Pipeline()
  .setStages(
      Array(documentAssembler,
            sentenceDetector,
            tokenizer,
            embedder,
            nerTagger,
            nerConverter,
            contextualAssertionConditional
  )).fit(emptyDataSet)

val text = Seq("The patient reports intermittent chest pain when engaging in physical activity, particularly on exertion. Symptoms appear to be contingent upon increased stress levels and heavy meals.").toDS.toDF("text")

val dataSetResult = pipeline.transform(text)
dataSetResult.show(100)

Results

+-----------------------+-----+---+-----------+
|ner_chunk              |begin|end|result     |
+-----------------------+-----+---+-----------+
|intermittent chest pain|21   |43 |conditional|
|Symptoms               |107  |114|conditional|
+-----------------------+-----+---+-----------+

Model Information

Model Name: contextual_assertion_conditional
Compatibility: Healthcare NLP 5.5.3+
License: Licensed
Edition: Official
Input Labels: [sentence, token, ner_chunk]
Output Labels: [assertion_conditional]
Language: en
Size: 1.4 KB
Case sensitive: false