Description
Assign assertion status to clinical entities.
Predicted Entities
Present
, Past
, Family
, Absent
, Hypothetical
, Possible
How to use
document_assembler = DocumentAssembler()\
.setInputCol("text") \
.setOutputCol("document")
sentence_detector = SentenceDetector()\
.setInputCols("document")\
.setOutputCol("sentence")
tokenizer = Tokenizer()\
.setInputCols(["document"])\
.setOutputCol("token")
embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("embeddings")\
.setCaseSensitive(False)
ner = MedicalNerModel.pretrained("ner_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token", "embeddings"])\
.setOutputCol("ner")
ner_converter = NerConverterInternal()\
.setInputCols(["sentence", "token", "ner"])\
.setOutputCol("ner_chunk")\
.setWhiteList(["PROBLEM"])
assertion_classifier = BertForAssertionClassification.pretrained("assertion_bert_classification_oncology_onnx", "en", "clinical/models")\
.setInputCols(["sentence", "ner_chunk"])\
.setOutputCol("assertion_class")\
.setCaseSensitive(False)
pipeline = Pipeline(stages=[
document_assembler,
sentence_detector,
tokenizer,
embeddings,
ner,
ner_converter,
assertion_classifier
])
# Generating example
text = """
GENERAL: He is an elderly gentleman in no acute distress. He is sitting up in bed eating his breakfast. He is alert and oriented and answering questions appropriately.
HEENT: Sclerae showed mild arcus senilis in the right. Left was clear. Pupils are equally round and reactive to light. Extraocular movements are intact. Oropharynx is clear.
NECK: Supple. Trachea is midline. No jugular venous pressure distention is noted. No adenopathy in the cervical, supraclavicular, or axillary areas.
ABDOMEN: Soft and not tender. There may be some fullness in the left upper quadrant, although I do not appreciate a true spleen with inspiration.
EXTREMITIES: There is some edema, but no cyanosis and clubbing ."""
data = spark.createDataFrame([[text]]).toDF("text")
result = pipeline.fit(data).transform(data)
# Checking results
result.select("text", "assertion_class.result").show(truncate=False)
# Test classifier in Spark NLP pipeline
document_assembler = nlp.DocumentAssembler() \
.setInputCol("text") \
.setOutputCol("document")
sentence_detector = nlp.SentenceDetector()\
.setInputCols("document")\
.setOutputCol("sentence")
tokenizer = nlp.Tokenizer() \
.setInputCols(["sentence"]) \
.setOutputCol("token")
embeddings = nlp.WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("embeddings")\
.setCaseSensitive(False)
ner = medical.NerModel.pretrained("ner_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token", "embeddings"])\
.setOutputCol("ner")
ner_converter = medical.NerConverterInternal()\
.setInputCols(["sentence", "token", "ner"])\
.setOutputCol("ner_chunk")\
.setWhiteList(["PROBLEM"])
assertion_classifier = medical.BertForAssertionClassification.pretrained("assertion_bert_classification_oncology_onnx", "en", "clinical/models")\
.setInputCols(["sentence", "ner_chunk"])\
.setOutputCol("assertion_class")\
.setCaseSensitive(False)
pipeline = nlp.Pipeline(stages=[
document_assembler,
sentence_detector,
tokenizer,
embeddings,
ner,
ner_converter,
assertion_classifier
])
# Generating example
text = """
GENERAL: He is an elderly gentleman in no acute distress. He is sitting up in bed eating his breakfast. He is alert and oriented and answering questions appropriately.
HEENT: Sclerae showed mild arcus senilis in the right. Left was clear. Pupils are equally round and reactive to light. Extraocular movements are intact. Oropharynx is clear.
NECK: Supple. Trachea is midline. No jugular venous pressure distention is noted. No adenopathy in the cervical, supraclavicular, or axillary areas.
ABDOMEN: Soft and not tender. There may be some fullness in the left upper quadrant, although I do not appreciate a true spleen with inspiration.
EXTREMITIES: There is some edema, but no cyanosis and clubbing ."""
data = spark.createDataFrame([[text]]).toDF("text")
result = pipeline.fit(data).transform(data)
val document_assembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val sentence_detector = new SentenceDetector()
.setInputCols("document")
.setOutputCol("sentence")
val tokenizer = new Tokenizer()
.setInputCols("sentences")
.setOutputCol("token")
val embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
.setInputCols(Array("sentence", "token"))
.setOutputCol("embeddings")
.setCaseSensitive(false)
val ner = MedicalNerModel.pretrained("ner_clinical", "en", "clinical/models")
.setInputCols(Array("sentence", "token", "embeddings"))
.setOutputCol("ner")
val ner_converter = NerConverterInternal()
.setInputCols(Array("sentence", "token", "ner"))
.setOutputCol("ner_chunk")
.setWhiteList(Array("PROBLEM"))
val assertion_classifier = BertForAssertionClassification.pretrained("assertion_bert_classification_oncology_onnx", "en", "clinical/models")
.setInputCols(Array("document", "ner_chunk"))
.setOutputCol("assertion_class")
.setCaseSensitive(false)
val pipeline = new Pipeline().setStages(
Array(
document_assembler,
sentence_detector,
tokenizer,
embeddings,
ner,
ner_converter,
assertion_classifier
))
val text = """
GENERAL: He is an elderly gentleman in no acute distress. He is sitting up in bed eating his breakfast. He is alert and oriented and answering questions appropriately.
HEENT: Sclerae showed mild arcus senilis in the right. Left was clear. Pupils are equally round and reactive to light. Extraocular movements are intact. Oropharynx is clear.
NECK: Supple. Trachea is midline. No jugular venous pressure distention is noted. No adenopathy in the cervical, supraclavicular, or axillary areas.
ABDOMEN: Soft and not tender. There may be some fullness in the left upper quadrant, although I do not appreciate a true spleen with inspiration.
EXTREMITIES: There is some edema, but no cyanosis and clubbing ."""
val data = Seq(text).toDF("text")
val result = pipeline.fit(data).transform(data)
Results
| | chunks | begin | end | entities | assertion | confidence |
|---:|:---------------------------------------------------------------|--------:|------:|:-----------|:------------|-------------:|
| 0 | acute distress | 43 | 56 | PROBLEM | Absent | 0.992191 |
| 1 | mild arcus senilis in the right | 191 | 221 | PROBLEM | Present | 0.99537 |
| 2 | jugular venous pressure distention | 380 | 413 | PROBLEM | Absent | 0.997313 |
| 3 | adenopathy in the cervical, supraclavicular, or axillary areas | 428 | 489 | PROBLEM | Absent | 0.996413 |
| 4 | tender | 514 | 519 | PROBLEM | Absent | 0.995015 |
| 5 | some fullness in the left upper quadrant | 535 | 574 | PROBLEM | Possible | 0.524748 |
| 6 | some edema | 660 | 669 | PROBLEM | Present | 0.987595 |
| 7 | cyanosis | 679 | 686 | PROBLEM | Absent | 0.996593 |
| 8 | clubbing | 692 | 699 | PROBLEM | Absent | 0.996629 |
Model Information
Model Name: | assertion_bert_classification_oncology_onnx |
Compatibility: | Healthcare NLP 6.0.2+ |
License: | Licensed |
Edition: | Official |
Input Labels: | [document, ner_chunk] |
Output Labels: | [assertion_onnx] |
Language: | en |
Size: | 405.6 MB |
Case sensitive: | true |