Description
Assign assertion status to clinical entities.
Predicted Entities
Present
, Planned
, SomeoneElse
, Past
, Family
, Absent
, Hypothetical
, Possible
How to use
document_assembler = DocumentAssembler()\
.setInputCol("text") \
.setOutputCol("document")
sentence_detector = SentenceDetector()\
.setInputCols("document")\
.setOutputCol("sentence")
tokenizer = Tokenizer()\
.setInputCols(["document"])\
.setOutputCol("token")
embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("embeddings")\
.setCaseSensitive(False)
ner = MedicalNerModel.pretrained("ner_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token", "embeddings"])\
.setOutputCol("ner")
ner_converter = NerConverterInternal()\
.setInputCols(["sentence", "token", "ner"])\
.setOutputCol("ner_chunk")\
.setWhiteList(["PROBLEM"])
assertion_classifier = BertForAssertionClassification.pretrained("assertion_bert_classification_jsl", "en", "clinical/models")\
.setInputCols(["sentence", "ner_chunk"])\
.setOutputCol("assertion_class")
pipeline = Pipeline(stages=[
document_assembler,
sentence_detector,
tokenizer,
embeddings,
ner,
ner_converter,
assertion_classifier
])
text = """Patient with severe fever and sore throat.
He shows no stomach pain and he maintained on an epidural and PCA for pain control.
He also became short of breath with climbing a flight of stairs.
After CT, lung tumor located at the right lower lobe. Father with Alzheimer.
"""
data = spark.createDataFrame([[text]]).toDF("text")
result = pipeline.fit(data).transform(data)
# show results
result.selectExpr("explode(assertion_class) as result")\
.selectExpr("result.metadata['ner_chunk'] as ner_chunk",
"result.begin as begin",
"result.begin as end",
"result.metadata['ner_label'] as ner_chunk",
"result.result as assertion").show(truncate=False)
# Test classifier in Spark NLP pipeline
document_assembler = nlp.DocumentAssembler() \
.setInputCol("text") \
.setOutputCol("document")
sentence_detector = nlp.SentenceDetector()\
.setInputCols("document")\
.setOutputCol("sentence")
tokenizer = nlp.Tokenizer() \
.setInputCols(["sentence"]) \
.setOutputCol("token")
embeddings = nlp.WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("embeddings")\
.setCaseSensitive(False)
ner = medical.NerModel.pretrained("ner_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token", "embeddings"])\
.setOutputCol("ner")
ner_converter = medical.NerConverterInternal()\
.setInputCols(["sentence", "token", "ner"])\
.setOutputCol("ner_chunk")\
.setWhiteList(["PROBLEM"])
assertion_classifier = medical.BertForAssertionClassification.pretrained("assertion_bert_classification_jsl", "en", "clinical/models")\
.setInputCols(["sentence", "ner_chunk"])\
.setOutputCol("assertion_class")
pipeline = nlp.Pipeline(stages=[
document_assembler,
sentence_detector,
tokenizer,
embeddings,
ner,
ner_converter,
assertion_classifier
])
text = """Patient with severe fever and sore throat.
He shows no stomach pain and he maintained on an epidural and PCA for pain control.
He also became short of breath with climbing a flight of stairs.
After CT, lung tumor located at the right lower lobe. Father with Alzheimer.
"""
data = spark.createDataFrame([[text]]).toDF("text")
result = pipeline.fit(data).transform(data)
# show results
result.selectExpr("explode(assertion_class) as result")\
.selectExpr("result.metadata['ner_chunk'] as ner_chunk",
"result.begin as begin",
"result.begin as end",
"result.metadata['ner_label'] as ner_chunk",
"result.result as assertion").show(truncate=False)
val document_assembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val sentence_detector = new SentenceDetector()
.setInputCols("document")
.setOutputCol("sentence")
val tokenizer = new Tokenizer()
.setInputCols("sentences")
.setOutputCol("token")
val embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
.setInputCols(Array("sentence", "token"))
.setOutputCol("embeddings")
.setCaseSensitive(False)
val ner = MedicalNerModel.pretrained("ner_clinical", "en", "clinical/models")
.setInputCols(Array("sentence", "token", "embeddings"))
.setOutputCol("ner")
val ner_converter = new NerConverterInternal()
.setInputCols(Array("sentence", "token", "ner"))
.setOutputCol("ner_chunk")
.setWhiteList(Array("PROBLEM"))
val assertion_classifier = BertForAssertionClassification.pretrained("assertion_bert_classification_jsl", "en", "clinical/models")
.setInputCols(Array("document", "ner_chunk"))
.setOutputCol("assertion_class")
val pipeline = new Pipeline().setStages(
Array(
document_assembler,
sentence_detector,
tokenizer,
embeddings,
ner,
ner_converter,
assertion_classifier
))
val text = """Patient with severe fever and sore throat.
He shows no stomach pain and he maintained on an epidural and PCA for pain control.
He also became short of breath with climbing a flight of stairs.
After CT, lung tumor located at the right lower lobe. Father with Alzheimer.
"""
val data = Seq(Array(text)).toDF("text")
val result = pipeline.fit(data).transform(data)
Results
| | ner_chunk | begin | end | ner_chunk | assertion |
|---:|:----------------|--------:|------:|:------------|:-------------|
| 0 | severe fever | 13 | 13 | PROBLEM | Present |
| 1 | sore throat | 30 | 30 | PROBLEM | Present |
| 2 | stomach pain | 55 | 55 | PROBLEM | Absent |
| 3 | pain control | 113 | 113 | PROBLEM | Hypothetical |
| 4 | short of breath | 142 | 142 | PROBLEM | Present |
| 5 | lung tumor | 202 | 202 | PROBLEM | Present |
| 6 | Alzheimer | 258 | 258 | PROBLEM | SomeoneElse |
Model Information
Model Name: | assertion_bert_classification_jsl |
Compatibility: | Healthcare NLP 5.5.3+ |
License: | Licensed |
Edition: | Official |
Input Labels: | [document, token] |
Output Labels: | [assertion_class] |
Language: | en |
Size: | 406.3 MB |
Case sensitive: | true |
Benchmarking
label precision recall f1-score support
Absent 0.953 0.975 0.964 1436
Family 0.930 0.925 0.927 615
Hypothetical 0.898 0.900 0.899 841
Past 0.936 0.920 0.928 1490
Planned 0.870 0.840 0.855 326
Possible 0.887 0.887 0.887 593
Present 0.949 0.960 0.955 2171
SomeoneElse 0.891 0.837 0.863 313
accuracy - - 0.930 7785
macro-avg 0.914 0.906 0.910 7785
weighted-avg 0.930 0.930 0.930 7785