Detect Past Assertion Status with Contextual Assertion

Description

This model identifies contextual cues within text data to detect past assertions. It annotates text chunks with assertions using configurable rules, prefix and suffix patterns, and exception patterns.

Predicted Entities

past

Download Copy S3 URI

How to use

document_assembler = DocumentAssembler() \
    .setInputCol("text") \
    .setOutputCol("document")

sentence_detector = SentenceDetector() \
    .setInputCols(["document"]) \
    .setOutputCol("sentence")

tokenizer = Tokenizer() \
    .setInputCols(["sentence"]) \
    .setOutputCol("token")

word_embeddings = WordEmbeddingsModel \
    .pretrained("embeddings_clinical", "en", "clinical/models") \
    .setInputCols(["sentence", "token"]) \
    .setOutputCol("embeddings")

clinical_ner = MedicalNerModel \
    .pretrained("ner_jsl", "en", "clinical/models") \
    .setInputCols(["sentence", "token", "embeddings"]) \
    .setOutputCol("ner")

ner_converter = NerConverter() \
    .setInputCols(["sentence", "token", "ner"]) \
    .setOutputCol("ner_chunk")

contextual_assertion_past = ContextualAssertion.pretrained("contextual_assertion_past","en","clinical/models")\
    .setInputCols("sentence", "token", "ner_chunk") \
    .setOutputCol("assertionPast") \
  

flattener = Flattener() \
    .setInputCols("assertionPast") \
    .setExplodeSelectedFields({"assertionPast":["metadata.ner_chunk as ner_chunk",
                                            "begin as begin",
                                            "end as end",
                                            "metadata.ner_label as ner_label",
                                            "result as result"]})

pipeline = Pipeline(
    stages=[
      document_assembler,
      sentence_detector,
      tokenizer,
      word_embeddings,
      clinical_ner,
      ner_converter,
      contextual_assertion_past,
      flattener

])

empty_data = spark.createDataFrame([[""]]).toDF("text")

model = pipeline.fit(empty_data)
text = """The patient had no history of smoking or alcohol consumption and there was no family history of any types of tumor. """

data = spark.createDataFrame([[text]]).toDF('text')

result = model.transform(data)
result.show(truncate=False)

val documentAssembler = new DocumentAssembler()
  .setInputCol("text")
  .setOutputCol("document")

val sentenceDetector = new SentenceDetector()
  .setInputCols(Array("document"))
  .setOutputCol("sentences")

val tokenizer = new Tokenizer()
  .setInputCols(Array("sentences"))
  .setOutputCol("tokens")

val embedder = WordEmbeddingsModel
  .pretrained("embeddings_clinical", "en", "clinical/models")
  .setInputCols(Array("sentences", "tokens"))
  .setOutputCol("embeddings")

val nerTagger = MedicalNerModel
  .pretrained("ner_clinical", "en", "clinical/models")
  .setInputCols(Array("sentences", "tokens", "embeddings"))
  .setOutputCol("nerTags")

val nerConverter = new NerConverterInternal()
  .setInputCols(Array("sentences", "tokens", "nerTags"))
  .setOutputCol("nerChunks")

val contextualAssertionAbsent = ContextualAssertion.pretrained("contextual_assertion_past","en" ,"clinical/models")
  .setInputCols("sentences", "tokens", "nerChunks")
  .setOutputCol("assertionPast")


val flattener = new Flattener()
  .setInputCols("assertionPast")
  .setExplodeSelectedFields(Map("assertionPast" -> Array("metadata.ner_chunk as ner_chunk",
                                                          "metadata.entity",
                                                          "begin as begin",
                                                          "end as end",
                                                          "metadata.ner_label as ner_label",
                                                          "result as result")
                            ))
  
val emptyDataSet = Seq("").toDS().toDF("text")

val pipeline = new Pipeline()
  .setStages(
      Array(documentAssembler,
            sentenceDetector,
            tokenizer,
            embedder,
            nerTagger,
            nerConverter,
            contextualAssertionAbsent,
            flattener
  )).fit(emptyDataSet)

val text = Seq("The patient had no history of smoking or alcohol consumption and there was no family history of any types of tumor.").toDS.toDF("text")

val dataSetResult = pipeline.transform(text)
dataSetResult.show(100)

Results

+---------+-----+---+-----------+------+
|ner_chunk|begin|end|ner_label  |result|
+---------+-----+---+-----------+------+
|smoking  |30   |36 |Smoking    |past  |
|alcohol  |41   |47 |Alcohol    |past  |
|tumor    |109  |113|Oncological|past  |
+---------+-----+---+-----------+------+

Model Information

Model Name:	contextual_assertion_past
Compatibility:	Healthcare NLP 5.4.0+
License:	Licensed
Edition:	Official
Input Labels:	[sentence, token, chunk]
Output Labels:	[assertionPast]
Language:	en
Size:	1.5 KB
Case sensitive:	false

Benchmarking

       label  precision    recall  f1-score   support
        Past       1.00      0.92      0.96        92
    accuracy        -          -       0.92        92
   macro_avg       0.50      0.46      0.48        92
weighted_avg       1.00      0.92      0.96        92

PREVIOUSMultilabel Classification of NDA Clauses (paragraph, medium)

NEXTJSL_MedM (LLM - q16)