Description
This model maps drug entities to RxNorm codes using bge_base_en_v1_5_onnx embeddings. It leverages contextual embeddings to improve code resolution accuracy for drug concepts.
Predicted Entities
RxNorm Codes
How to use
document_assembler = DocumentAssembler()\
.setInputCol("text")\
.setOutputCol("document")
sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare", "en", "clinical/models")\
.setInputCols(["document"])\
.setOutputCol("sentence")
tokenizer = Tokenizer()\
.setInputCols(["sentence"])\
.setOutputCol("token")
word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("word_embeddings")
ner_posology = MedicalNerModel.pretrained("ner_posology_greedy", "en", "clinical/models")\
.setInputCols(["sentence", "token", "word_embeddings"])\
.setOutputCol("ner")
ner_posology_converter = NerConverterInternal()\
.setInputCols(["sentence", "token", "ner"])\
.setOutputCol("ner_chunk")\
.setWhiteList(["DRUG"])
chunk2doc = Chunk2Doc()\
.setInputCols("ner_chunk")\
.setOutputCol("ner_chunk_doc")
bge_embeddings = BGEEmbeddings.pretrained("bge_base_en_v1_5_onnx", "en")\
.setInputCols(["ner_chunk_doc"])\
.setOutputCol("bge_embeddings")
rxnorm_resolver = SentenceEntityResolverModel.pretrained("bgeresolve_rxnorm", "en", "clinical/models")\
.setInputCols(["bge_embeddings"])\
.setOutputCol("rxnorm_code")\
.setDistanceFunction("EUCLIDEAN")
pipeline = Pipeline().setStages([
document_assembler,
sentence_detector,
tokenizer,
word_embeddings,
ner_posology,
ner_posology_converter,
chunk2doc,
bge_embeddings,
rxnorm_resolver
])
text = """The patient was prescribed aspirin and an Albuterol inhaler for respiratory issues. She also takes Apixaban 5 mg, Metformin 1000 mg for diabetes, and Lisinopril 10 mg for blood pressure."""
data = spark.createDataFrame([[text]]).toDF("text")
result = pipeline.fit(data).transform(data)
document_assembler = nlp.DocumentAssembler()\
.setInputCol("text")\
.setOutputCol("document")
sentence_detector = nlp.SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare", "en", "clinical/models")\
.setInputCols(["document"])\
.setOutputCol("sentence")
tokenizer = nlp.Tokenizer()\
.setInputCols(["sentence"])\
.setOutputCol("token")
word_embeddings = nlp.WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("word_embeddings")
ner_posology = medical.NerModel.pretrained("ner_posology_greedy", "en", "clinical/models")\
.setInputCols(["sentence", "token", "word_embeddings"])\
.setOutputCol("ner")
ner_posology_converter = medical.NerConverter()\
.setInputCols(["sentence", "token", "ner"])\
.setOutputCol("ner_chunk")\
.setWhiteList(["DRUG"])
chunk2doc = nlp.Chunk2Doc()\
.setInputCols("ner_chunk")\
.setOutputCol("ner_chunk_doc")
bge_embeddings = nlp.BGEEmbeddings.pretrained("bge_base_en_v1_5_onnx", "en")\
.setInputCols(["ner_chunk_doc"])\
.setOutputCol("bge_embeddings")
rxnorm_resolver = medical.SentenceEntityResolverModel.pretrained("bgeresolve_rxnorm", "en", "clinical/models")\
.setInputCols(["bge_embeddings"])\
.setOutputCol("rxnorm_code")\
.setDistanceFunction("EUCLIDEAN")
pipeline = nlp.Pipeline().setStages([
document_assembler,
sentence_detector,
tokenizer,
word_embeddings,
ner_posology,
ner_posology_converter,
chunk2doc,
bge_embeddings,
rxnorm_resolver
])
text = """The patient was prescribed aspirin and an Albuterol inhaler for respiratory issues. She also takes Apixaban 5 mg, Metformin 1000 mg for diabetes, and Lisinopril 10 mg for blood pressure."""
data = spark.createDataFrame([[text]]).toDF("text")
result = pipeline.fit(data).transform(data)
val documentAssembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare", "en", "clinical/models")
.setInputCols(Array("document"))
.setOutputCol("sentence")
val tokenizer = new Tokenizer()
.setInputCols(Array("sentence"))
.setOutputCol("token")
val wordEmbeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
.setInputCols(Array("sentence", "token"))
.setOutputCol("word_embeddings")
val nerPosology = MedicalNerModel.pretrained("ner_posology_greedy", "en", "clinical/models")
.setInputCols(Array("sentence", "token", "word_embeddings"))
.setOutputCol("ner")
val nerPosologyConverter = new NerConverterInternal()
.setInputCols(Array("sentence", "token", "ner"))
.setOutputCol("ner_chunk")
.setWhiteList(Array("DRUG"))
val chunk2doc = new Chunk2Doc()
.setInputCols("ner_chunk")
.setOutputCol("ner_chunk_doc")
val bgeEmbeddings = BGEEmbeddings.pretrained("bge_base_en_v1_5_onnx", "en")
.setInputCols(Array("ner_chunk_doc"))
.setOutputCol("bge_embeddings")
val rxnormResolver = SentenceEntityResolverModel.pretrained("bgeresolve_rxnorm", "en", "clinical/models")
.setInputCols(Array("bge_embeddings"))
.setOutputCol("rxnorm_code")
.setDistanceFunction("EUCLIDEAN")
val pipeline = new Pipeline().setStages(Array(
documentAssembler,
sentenceDetector,
tokenizer,
wordEmbeddings,
nerPosology,
nerPosologyConverter,
chunk2doc,
bgeEmbeddings,
rxnormResolver
))
val data = Seq("""The patient was prescribed aspirin and an Albuterol inhaler for respiratory issues. She also takes Apixaban 5 mg, Metformin 1000 mg for diabetes, and Lisinopril 10 mg for blood pressure.""").toDF("text")
val result = pipeline.fit(data).transform(data)
Results
|sent_id|ner_chunk |entity|rxnorm_code|resolutions |all_codes |all_resolutions |
|-------|-----------------|------|-----------|-------------------------|-----------------------------------------------|-----------------------------------------------|
|0 |aspirin |DRUG |1191 |aspirin |[1191, 1295740, 1154070, 1001473, 218266, ...] |[aspirin, aspirin product, aspirin pill, ecpi...]|
|0 |Albuterol inhaler|DRUG |1154602 |albuterol inhalant product|[1154602, 745678, 435, 1154606, 1649559, ...] |[albuterol inhalant product, albuterol metered...]|
|1 |Apixaban 5 mg |DRUG |1364444 |apixaban 5 mg |[1364444, 1364431, 1364446, 1364445, 1364447, ...]|[apixaban 5 mg, apixaban 2.5 mg, apixaban 5 mg...]|
|1 |Metformin 1000 mg|DRUG |316255 |metformin 1000 mg |[316255, 860995, 330861, 860997, 429841, ...] |[metformin 1000 mg, metformin hydrochloride 10...]|
|1 |Lisinopril 10 mg |DRUG |316151 |lisinopril 10 mg |[316151, 314076, 563611, 565846, 567576, ...] |[lisinopril 10 mg, lisinopril 10mg 10 mg, lisi...]|
Model Information
| Model Name: | bgeresolve_rxnorm |
| Compatibility: | Healthcare NLP 6.2.0+ |
| License: | Licensed |
| Edition: | Official |
| Input Labels: | [bge_embeddings] |
| Output Labels: | [rxnorm_code] |
| Language: | en |
| Size: | 1.1 GB |
| Case sensitive: | false |