Detect Family Assertion Status with Contextual Assertion

Description

This model identifies contextual cues within text data to detect family assertions. It annotates text chunks with assertions using configurable rules, prefix and suffix patterns, and exception patterns.

Predicted Entities

family

Copy S3 URI

How to use

document_assembler = DocumentAssembler() \
    .setInputCol("text") \
    .setOutputCol("document")

sentence_detector = SentenceDetector() \
    .setInputCols(["document"]) \
    .setOutputCol("sentence")

tokenizer = Tokenizer() \
    .setInputCols(["sentence"]) \
    .setOutputCol("token")

word_embeddings = WordEmbeddingsModel \
    .pretrained("embeddings_clinical", "en", "clinical/models") \
    .setInputCols(["sentence", "token"]) \
    .setOutputCol("embeddings")

clinical_ner = MedicalNerModel \
    .pretrained("ner_clinical", "en", "clinical/models") \
    .setInputCols(["sentence", "token", "embeddings"]) \
    .setOutputCol("ner")

ner_converter = NerConverter() \
    .setInputCols(["sentence", "token", "ner"]) \
    .setOutputCol("ner_chunk")

contextual_assertion_family = ContextualAssertion.pretrained("contextual_assertion_family","en","clinical/models")\
    .setInputCols("sentence", "token", "ner_chunk") \
    .setOutputCol("assertion_family") \


pipeline = Pipeline(
    stages=[
      document_assembler,
      sentence_detector,
      tokenizer,
      word_embeddings,
      clinical_ner,
      ner_converter,
      contextual_assertion_family
])

empty_data = spark.createDataFrame([[""]]).toDF("text")

model = pipeline.fit(empty_data)
text = """Schizophrenia has affected multiple generations in his family. The family has a high prevalence of asthma and allergies."""

data = spark.createDataFrame([[text]]).toDF('text')

result = model.transform(data)
result.select("assertion_family").show(truncate=False)
val documentAssembler = new DocumentAssembler()
  .setInputCol("text")
  .setOutputCol("document")

val sentenceDetector = new SentenceDetector()
  .setInputCols(Array("document"))
  .setOutputCol("sentences")

val tokenizer = new Tokenizer()
  .setInputCols(Array("sentences"))
  .setOutputCol("tokens")

val embedder = WordEmbeddingsModel
  .pretrained("embeddings_clinical", "en", "clinical/models")
  .setInputCols(Array("sentences", "tokens"))
  .setOutputCol("embeddings")

val nerTagger = MedicalNerModel
  .pretrained("ner_clinical", "en", "clinical/models")
  .setInputCols(Array("sentences", "tokens", "embeddings"))
  .setOutputCol("nerTags")

val nerConverter = new NerConverterInternal()
  .setInputCols(Array("sentences", "tokens", "nerTags"))
  .setOutputCol("nerChunks")

val contextualAssertion = ContextualAssertion.pretrained("contextual_assertion_family","en" ,"clinical/models")
  .setInputCols("sentences", "tokens", "nerChunks")
  .setOutputCol("assertion_family")


val emptyDataSet = Seq("").toDS().toDF("text")

val pipeline = new Pipeline()
  .setStages(
      Array(documentAssembler,
            sentenceDetector,
            tokenizer,
            embedder,
            nerTagger,
            nerConverter,
            contextualAssertion,
            flattener
  )).fit(emptyDataSet)

val text = Seq("Diabetes runs in her family, affecting both her parents and grandparents.").toDS.toDF("text")

val dataSetResult = pipeline.transform(text)
dataSetResult.show(truncate=false)

Results

+-------------+-----+---+---------+------+
|ner_chunk    |begin|end|ner_label|result|
+-------------+-----+---+---------+------+
|Schizophrenia|0    |12 |PROBLEM  |family|
|asthma       |99   |104|PROBLEM  |family|
+-------------+-----+---+---------+------+

Model Information

Model Name: contextual_assertion_family
Compatibility: Healthcare NLP 5.5.0+
License: Licensed
Edition: Official
Input Labels: [sentence, token, ner_chunk]
Output Labels: [assertion_family]
Language: en
Size: 1.5 KB
Case sensitive: false