Detect Assertion Status (assertion_bert_classification_jsl)

Description

Assign assertion status to clinical entities.

Predicted Entities

Present, Planned, SomeoneElse, Past, Family, Absent, Hypothetical, Possible

Copy S3 URI

How to use

document_assembler = DocumentAssembler()\
    .setInputCol("text") \
    .setOutputCol("document")

sentence_detector = SentenceDetector()\
    .setInputCols("document")\
    .setOutputCol("sentence")

tokenizer = Tokenizer()\
    .setInputCols(["document"])\
    .setOutputCol("token")
    
embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
    .setInputCols(["sentence", "token"])\
    .setOutputCol("embeddings")\
    .setCaseSensitive(False)

ner = MedicalNerModel.pretrained("ner_clinical", "en", "clinical/models")\
    .setInputCols(["sentence", "token", "embeddings"])\
    .setOutputCol("ner")

ner_converter = NerConverterInternal()\
    .setInputCols(["sentence", "token", "ner"])\
    .setOutputCol("ner_chunk")\
    .setWhiteList(["PROBLEM"])
    
assertion_classifier = BertForAssertionClassification.pretrained("assertion_bert_classification_jsl", "en", "clinical/models")\
    .setInputCols(["sentence", "ner_chunk"])\
    .setOutputCol("assertion_class")
    
pipeline = Pipeline(stages=[
    document_assembler, 
    sentence_detector,
    tokenizer,
    embeddings,
    ner,
    ner_converter,
    assertion_classifier
])

text = """Patient with severe fever and sore throat.
He shows no stomach pain and he maintained on an epidural and PCA for pain control.
He also became short of breath with climbing a flight of stairs.
After CT, lung tumor located at the right lower lobe. Father with Alzheimer.
"""

data = spark.createDataFrame([[text]]).toDF("text")                         
result = pipeline.fit(data).transform(data)

# show results
result.selectExpr("explode(assertion_class) as result")\
      .selectExpr("result.metadata['ner_chunk'] as ner_chunk",
                  "result.begin as begin",
                  "result.begin as end",
                  "result.metadata['ner_label'] as ner_chunk",
                  "result.result as assertion").show(truncate=False)

# Test classifier in Spark NLP pipeline
document_assembler = nlp.DocumentAssembler() \
    .setInputCol("text") \
    .setOutputCol("document")

sentence_detector = nlp.SentenceDetector()\
    .setInputCols("document")\
    .setOutputCol("sentence")
    
tokenizer = nlp.Tokenizer() \
    .setInputCols(["sentence"]) \
    .setOutputCol("token")

embeddings = nlp.WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
    .setInputCols(["sentence", "token"])\
    .setOutputCol("embeddings")\
    .setCaseSensitive(False)

ner = medical.NerModel.pretrained("ner_clinical", "en", "clinical/models")\
    .setInputCols(["sentence", "token", "embeddings"])\
    .setOutputCol("ner")

ner_converter = medical.NerConverterInternal()\
    .setInputCols(["sentence", "token", "ner"])\
    .setOutputCol("ner_chunk")\
    .setWhiteList(["PROBLEM"])
    
assertion_classifier = medical.BertForAssertionClassification.pretrained("assertion_bert_classification_jsl", "en", "clinical/models")\
    .setInputCols(["sentence", "ner_chunk"])\
    .setOutputCol("assertion_class")
    
pipeline = nlp.Pipeline(stages=[
    document_assembler, 
    sentence_detector,
    tokenizer,
    embeddings,
    ner,
    ner_converter,
    assertion_classifier
])

text = """Patient with severe fever and sore throat.
He shows no stomach pain and he maintained on an epidural and PCA for pain control.
He also became short of breath with climbing a flight of stairs.
After CT, lung tumor located at the right lower lobe. Father with Alzheimer.
"""

data = spark.createDataFrame([[text]]).toDF("text")                         
result = pipeline.fit(data).transform(data)

# show results
result.selectExpr("explode(assertion_class) as result")\
      .selectExpr("result.metadata['ner_chunk'] as ner_chunk",
                  "result.begin as begin",
                  "result.begin as end",
                  "result.metadata['ner_label'] as ner_chunk",
                  "result.result as assertion").show(truncate=False)

val document_assembler = new DocumentAssembler() 
    .setInputCol("text") 
    .setOutputCol("document")

val sentence_detector = new SentenceDetector()
    .setInputCols("document")
    .setOutputCol("sentence")

val tokenizer = new Tokenizer()
    .setInputCols("sentences")
    .setOutputCol("token")

val embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
    .setInputCols(Array("sentence", "token"))
    .setOutputCol("embeddings")
    .setCaseSensitive(False)

val ner = MedicalNerModel.pretrained("ner_clinical", "en", "clinical/models")
    .setInputCols(Array("sentence", "token", "embeddings"))
    .setOutputCol("ner")

val ner_converter = new NerConverterInternal()
    .setInputCols(Array("sentence", "token", "ner"))
    .setOutputCol("ner_chunk")
    .setWhiteList(Array("PROBLEM"))
        
val assertion_classifier = BertForAssertionClassification.pretrained("assertion_bert_classification_jsl", "en", "clinical/models")
    .setInputCols(Array("document", "ner_chunk"))
    .setOutputCol("assertion_class")

val pipeline = new Pipeline().setStages(
    Array(
        document_assembler, 
        sentence_detector,
        tokenizer, 
        embeddings,
        ner,
        ner_converter,
        assertion_classifier
))

val text = """Patient with severe fever and sore throat.
He shows no stomach pain and he maintained on an epidural and PCA for pain control.
He also became short of breath with climbing a flight of stairs.
After CT, lung tumor located at the right lower lobe. Father with Alzheimer.
"""
val data = Seq(Array(text)).toDF("text")                         
val result = pipeline.fit(data).transform(data)

Results

|    | ner_chunk       |   begin |   end | ner_chunk   | assertion    |
|---:|:----------------|--------:|------:|:------------|:-------------|
|  0 | severe fever    |      13 |    13 | PROBLEM     | Present      |
|  1 | sore throat     |      30 |    30 | PROBLEM     | Present      |
|  2 | stomach pain    |      55 |    55 | PROBLEM     | Absent       |
|  3 | pain control    |     113 |   113 | PROBLEM     | Hypothetical |
|  4 | short of breath |     142 |   142 | PROBLEM     | Present      |
|  5 | lung tumor      |     202 |   202 | PROBLEM     | Present      |
|  6 | Alzheimer       |     258 |   258 | PROBLEM     | SomeoneElse  |

Model Information

Model Name: assertion_bert_classification_jsl
Compatibility: Healthcare NLP 5.5.3+
License: Licensed
Edition: Official
Input Labels: [document, token]
Output Labels: [assertion_class]
Language: en
Size: 406.3 MB
Case sensitive: true

Benchmarking

       label  precision    recall  f1-score   support
      Absent      0.953     0.975     0.964      1436
      Family      0.930     0.925     0.927       615
Hypothetical      0.898     0.900     0.899       841
        Past      0.936     0.920     0.928      1490
     Planned      0.870     0.840     0.855       326
    Possible      0.887     0.887     0.887       593
     Present      0.949     0.960     0.955      2171
 SomeoneElse      0.891     0.837     0.863       313
    accuracy       -          -       0.930      7785
   macro-avg      0.914     0.906     0.910      7785
weighted-avg      0.930     0.930     0.930      7785