Mapping Entities with Corresponding RxNorm Codes According to According to National Institute of Health (NIH) Database

Description

This pretrained model maps entities with their corresponding RxNorm codes according to the National Institute of Health (NIH) database. It returns Rxnorm codes with their NIH Rxnorm Term Types within a parenthesis.

Predicted Entities

rxnorm_code

Open in Colab Copy S3 URI

How to use

document_assembler = DocumentAssembler()\
.setInputCol('text')\
.setOutputCol('document')

sentence_detector = SentenceDetector()\
.setInputCols(["document"])\
.setOutputCol("sentence")

tokenizer = Tokenizer()\
.setInputCols("sentence")\
.setOutputCol("token")

word_embeddings = WordEmbeddingsModel\
.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("embeddings")

posology_ner_model = MedicalNerModel\
.pretrained("ner_posology_greedy", "en", "clinical/models")\
.setInputCols(["sentence", "token", "embeddings"])\
.setOutputCol("posology_ner")

posology_ner_converter = NerConverterInternal()\
.setInputCols("sentence", "token", "posology_ner")\
.setOutputCol("ner_chunk")

chunkerMapper = ChunkMapperModel\
.pretrained("rxnorm_nih_mapper", "en", "clinical/models")\
.setInputCols(["ner_chunk"])\
.setOutputCol("mappings")\
.setRels(["rxnorm_code"])

mapper_pipeline = Pipeline().setStages([
document_assembler,
sentence_detector,
tokenizer, 
word_embeddings,
posology_ner_model, 
posology_ner_converter, 
chunkerMapper])


test_data = spark.createDataFrame([["The patient was given Adapin 10 MG Oral Capsule, acetohexamide and Parlodel"]]).toDF("text")

mapper_model = mapper_pipeline.fit(test_data)

result= mapper_model.transform(test_data)
val document_assembler = new DocumentAssembler()\
.setInputCol("text")\
.setOutputCol("document")

val sentence_detector = new SentenceDetector()\
.setInputCols(Array("document"))\
.setOutputCol("sentence")

val tokenizer = new Tokenizer()\
.setInputCols("sentence")\
.setOutputCol("token")

val word_embeddings = WordEmbeddingsModel
.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(Array("sentence", "token"))\
.setOutputCol("embeddings")

val posology_ner_model = MedicalNerModel
.pretrained("ner_posology_greedy", "en", "clinical/models")\
.setInputCols(Array("sentence", "token", "embeddings"))\
.setOutputCol("posology_ner")

val posology_ner_converter = new NerConverterInternal()\
.setInputCols("sentence", "token", "posology_ner")\
.setOutputCol("ner_chunk")

val chunkerMapper = ChunkMapperModel
.pretrained("rxnorm_nih_mapper", "en", "clinical/models")\
.setInputCols(Array("ner_chunk"))\
.setOutputCol("mappings")\
.setRels(Array("rxnorm_code")) 

val mapper_pipeline = new Pipeline().setStages(Array(
document_assembler,
sentence_detector,
tokenizer, 
word_embeddings,
posology_ner_model, 
posology_ner_converter, 
chunkerMapper))


val data = Seq("The patient was given Adapin 10 MG Oral Capsule, acetohexamide and Parlodel").toDS.toDF("text")

val result = pipeline.fit(data).transform(data) 

Results

+-------------------------+-------------+-----------+
|ner_chunk                |mappings     |relation   |
+-------------------------+-------------+-----------+
|Adapin 10 MG Oral Capsule|1911002 (SY) |rxnorm_code|
|acetohexamide            |12250421 (IN)|rxnorm_code|
|Parlodel                 |829 (BN)     |rxnorm_code|
+-------------------------+-------------+-----------+

Model Information

Model Name: rxnorm_nih_mapper
Compatibility: Healthcare NLP 4.3.0+
License: Licensed
Edition: Official
Input Labels: [chunk]
Output Labels: [mappings]
Language: en
Size: 10.3 MB

References

Trained on February 2023 with NIH data: https://www.nlm.nih.gov/research/umls/rxnorm/docs/rxnormfiles.html