Normalizing Section Headers in Clinical Notes

Description

This pretrained pipeline normalizes the section headers in clinical notes. It returns two levels of normalization called level_1 and level_2.

Important Note: Mappers extract additional information such as extended descriptions and categories related to Concept codes (such as RxNorm, ICD10, CPT, MESH, NDC, UMLS, etc.). They generally take Concept Codes, which are the outputs of EntityResolvers, as input. When creating a pipeline that contains ‘Mapper’, it is necessary to use the ChunkMapperModel after an EntityResolverModel.

Predicted Entities

Live Demo Open in Colab Copy S3 URI

How to use

document_assembler = DocumentAssembler()\
.setInputCol('text')\
.setOutputCol('document')

sentence_detector = SentenceDetector()\
.setInputCols(["document"])\
.setOutputCol("sentence")

tokenizer = Tokenizer()\
.setInputCols("sentence")\
.setOutputCol("token")

embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en","clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("word_embeddings")

clinical_ner = MedicalNerModel.pretrained("ner_jsl_slim", "en", "clinical/models")\
.setInputCols(["sentence","token", "word_embeddings"])\
.setOutputCol("ner")

ner_converter = NerConverter()\
.setInputCols(["sentence", "token", "ner"])\
.setOutputCol("ner_chunk")\
.setWhiteList(["Header"])

chunkerMapper = ChunkMapperModel.pretrained("normalized_section_header_mapper", "en", "clinical/models") \
.setInputCols("ner_chunk")\
.setOutputCol("mappings")\
.setRel("level_1") #or level_2

pipeline = Pipeline().setStages([document_assembler,
sentence_detector,
tokenizer, 
embeddings,
clinical_ner, 
ner_converter, 
chunkerMapper])


sentences = """ADMISSION DIAGNOSIS Right pleural effusion and suspected malignant mesothelioma.
PRINCIPAL DIAGNOSIS Right pleural effusion, suspected malignant mesothelioma.
GENERAL REVIEW Right pleural effusion, firm nodules, diffuse scattered throughout the right pleura and diaphragmatic surface."""


test_data = spark.createDataFrame([[sentences]]).toDF("text")
result = pipeline.fit(test_data).transform(test_data)
val document_assembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")


val sentence_detector = new SentenceDetector()
.setInputCols(Array("document"))
.setOutputCol("sentence")


val tokenizer = new Tokenizer()
.setInputCols("sentence")
.setOutputCol("token")


val embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en","clinical/models")
.setInputCols(Array("sentence", "token"))
.setOutputCol("word_embeddings")


val clinical_ner = MedicalNerModel.pretrained("ner_jsl_slim", "en", "clinical/models")
.setInputCols(Array("sentence","token", "word_embeddings"))
.setOutputCol("ner")


val ner_converter = new NerConverter()
.setInputCols(Array("sentence", "token", "ner"))
.setOutputCol("ner_chunk")
.setWhiteList(Array("Header"))


val chunkerMapper = ChunkMapperModel.pretrained("normalized_section_header_mapper", "en", "clinical/models") 
.setInputCols("ner_chunk")
.setOutputCol("mappings")
.setRel("level_1") #or level_2


val pipeline = new Pipeline().setStages(Array(document_assembler,
sentence_detector,
tokenizer, 
embeddings,
clinical_ner, 
ner_converter, 
chunkerMapper))


val test_sentence= """ADMISSION DIAGNOSIS Right pleural effusion and suspected malignant mesothelioma.
PRINCIPAL DIAGNOSIS Right pleural effusion, suspected malignant mesothelioma.
GENERAL REVIEW Right pleural effusion, firm nodules, diffuse scattered throughout the right pleura and diaphragmatic surface."""


val test_data = Seq(test_sentence).toDS.toDF("text")
val result = pipeline.fit(test_data).transform(test_data)
import nlu
nlu.load("en.map_entity.section_headers_normalized").predict("""ADMISSION DIAGNOSIS Right pleural effusion and suspected malignant mesothelioma.
PRINCIPAL DIAGNOSIS Right pleural effusion, suspected malignant mesothelioma.
GENERAL REVIEW Right pleural effusion, firm nodules, diffuse scattered throughout the right pleura and diaphragmatic surface.""")

Results

+-------------------+------------------+
|section            |normalized_section|
+-------------------+------------------+
|ADMISSION DIAGNOSIS|DIAGNOSIS         |
|PRINCIPAL DIAGNOSIS|DIAGNOSIS         |
|GENERAL REVIEW     |REVIEW TYPE       |
+-------------------+------------------+

Model Information

|—|—| |Model Name:|normalized_section_header_mapper| |Compatibility:|Healthcare NLP 3.4.2+| |License:|Licensed| |Edition:|Official| |Input Labels:|[ner_chunk]| |Output Labels:|[mappings]| |Language:|en| |Size:|14.2 KB|