Detect Someone Assertion Status with Contextual Assertion

Description

This model identifies contextual cues within text data, such as negation, uncertainty, and assertion. It is used clinical assertion detection, etc. It annotates text chunks with assertions based on configurable rules, prefix and suffix patterns, and exception patterns.

Predicted Entities

someoneelse

Copy S3 URI

How to use

document_assembler = DocumentAssembler() \
    .setInputCol("text") \
    .setOutputCol("document")

sentence_detector = SentenceDetector() \
    .setInputCols(["document"]) \
    .setOutputCol("sentence")

tokenizer = Tokenizer() \
    .setInputCols(["sentence"]) \
    .setOutputCol("token")

word_embeddings = WordEmbeddingsModel \
    .pretrained("embeddings_clinical", "en", "clinical/models") \
    .setInputCols(["sentence", "token"]) \
    .setOutputCol("embeddings")

clinical_ner = MedicalNerModel \
    .pretrained("ner_clinical", "en", "clinical/models") \
    .setInputCols(["sentence", "token", "embeddings"]) \
    .setOutputCol("ner")

ner_converter = NerConverter() \
    .setInputCols(["sentence", "token", "ner"]) \
    .setOutputCol("ner_chunk")

contextual_assertion_someoneElse = ContextualAssertion\
     .pretrained("contextual_assertion_someone_else" ,"en" ,"clinical/models")\
     .setInputCols("sentence", "token", "ner_chunk")\
     .setOutputCol("assertionSomeoneElse") 
           
flattener =Flattener()\
      .setInputCols("assertionSomeoneElse")\
      .setExplodeSelectedFields({"assertionSomeoneElse": ["result as result",
                                                          "begin as begin ",
                                                          "end as end",
                                                          "metadata.ner_chunk as ner_chunk",
                                                          "metadata.ner_label as ner_label"]                       
                               })            

pipeline = Pipeline(stages=[
    document_assembler,
    sentence_detector,
    tokenizer,
    word_embeddings,
    clinical_ner,
    ner_converter,
    contextual_assertion_someoneElse,
    flattener

])

empty_data = spark.createDataFrame([[""]]).toDF("text")

model = pipeline.fit(empty_data)
text = """Patient has a family history of diabetes. Father diagnosed with heart failure last year. Sister and brother both have asthma.
          Grandfather had cancer in his late 70s. No known family history of substance abuse. Family history of autoimmune diseases is also noted."""

data = spark.createDataFrame([[text]]).toDF('text')

result = model.transform(data) 
result.show(truncate=False) 


    val documentAssembler = new DocumentAssembler()
      .setInputCol("text")
      .setOutputCol("document")

    val sentenceDetector = new SentenceDetector()
      .setInputCols(Array("document"))
      .setOutputCol("sentences")

    val tokenizer = new Tokenizer()
      .setInputCols(Array("sentences"))
      .setOutputCol("tokens")

    val embedder = WordEmbeddingsModel
      .pretrained("embeddings_clinical", "en", "clinical/models")
      .setInputCols(Array("sentences", "tokens"))
      .setOutputCol("embeddings")

    val nerTagger = MedicalNerModel
      .pretrained("ner_clinical", "en", "clinical/models")
      .setInputCols(Array("sentences", "tokens", "embeddings"))
      .setOutputCol("nerTags")

    val nerConverter = new NerConverterInternal()
      .setInputCols(Array("sentences", "tokens", "nerTags"))
      .setOutputCol("nerChunks")

    val contextualAssertionSomeoneElse = ContextualAssertion.pretrained("contextual_assertion_someone_else" ,"en" ,"clinical/models")
      .setInputCols("sentences", "tokens", "nerChunks")
      .setOutputCol("assertionSomeoneElse")

    
    val flattener = new Flattener()
      .setInputCols("assertionSomeoneElse")

    val pipeline = new Pipeline()
      .setStages(Array(documentAssembler,
        sentenceDetector,
        tokenizer,
        embedder,
        nerTagger,
        nerConverter,
        contextualAssertionSomeoneElse,
        flattener
      )).fit(testDataSet)

    val dataSetResult = pipeline.transform(text)
    dataSetResult.show(false)

Results

+-------------------+-----+---+---------+------------+
|ner_chunk          |begin|end|ner_label|result      |
+-------------------+-----+---+---------+------------+
|diabetes           |32   |39 |PROBLEM  |someone_else|
|heart failure      |64   |76 |PROBLEM  |someone_else|
|asthma             |118  |123|PROBLEM  |someone_else|
|cancer             |152  |157|PROBLEM  |someone_else|
|substance abuse    |203  |217|PROBLEM  |someone_else|
|autoimmune diseases|238  |256|PROBLEM  |someone_else|
+-------------------+-----+---+---------+------------+

Model Information

Model Name: contextual_assertion_someone_else
Compatibility: Healthcare NLP 5.4.0+
License: Licensed
Edition: Official
Input Labels: [sentence, token, ner_chunk]
Output Labels: [assertionSomeoneElse]
Language: en
Size: 1.5 KB
Case sensitive: false

Benchmarking

       label  precision    recall  f1-score   support
someone_else       1.00      0.81      0.89       131
    accuracy         -         -       0.81       131
   macro_avg       0.50      0.40      0.45       131
weighted_avg       1.00      0.81      0.89       131