Sentence Entity Resolver for RxCUI (sbiobert_base_cased_mli embeddings)

Description

This model maps extracted medical entities to RxCUI codes using chunk embeddings.

Predicted Entities

RxCUI Codes and their normalized definition with sbiobert_base_cased_mli embeddings.

Open in Colab Copy S3 URI

How to use

sbiobertresolve_rxcui resolver model must be used with sbiobert_base_cased_mli as embeddings ner_posology_greedy as NER model. DRUG set in .setWhiteList().

document_assembler = DocumentAssembler()\
		.setInputCol("text")\
		.setOutputCol("document")

sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare", "en", "clinical/models") \
		.setInputCols(["document"]) \
		.setOutputCol("sentence")

tokenizer = Tokenizer()\
		.setInputCols(["sentence"])\
		.setOutputCol("token")

word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
		.setInputCols(["sentence", "token"])\
		.setOutputCol("embeddings")

ner_model = MedicalNerModel.pretrained("ner_posology_greedy", "en", "clinical/models") \
		.setInputCols(["sentence", "token", "embeddings"]) \
		.setOutputCol("ner")

ner_converter = NerConverterInternal() \
    .setInputCols(["sentence", "token", "ner"]) \
    .setOutputCol("ner_chunk")\
    .setWhiteList(["DRUG"])\
    .setPreservePosition(False)

chunk2doc = Chunk2Doc()\
    .setInputCols("ner_chunk")\
    .setOutputCol("ner_chunk_doc")

sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli","en","clinical/models")\
    .setInputCols(["ner_chunk_doc"])\
    .setOutputCol("sbert_embeddings")\
    .setCaseSensitive(False)

resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_rxcui","en", "clinical/models") \
    .setInputCols(["sbert_embeddings"]) \
    .setOutputCol("resolution")\
    .setDistanceFunction("EUCLIDEAN")


pipeline = Pipeline(stages=[
    document_assembler,
    sentence_detector,
    tokenizer,
    word_embeddings,
    ner_model,
    ner_converter,
    chunk2doc,
    sbert_embedder,
    resolver])

data = spark.createDataFrame([['''He was seen by the endocrinology service and she was discharged on 50 mg of eltrombopag oral at night, 5 mg amlodipine with meals, and metformin 1000 mg two times a day.''']]).toDF("text")

result = pipeline.fit(data).transform(data)
val document_assembler = new DocumentAssembler()
	.setInputCol("text")
	.setOutputCol("document")
	
val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")
	.setInputCols(Array("document"))
	.setOutputCol("sentence")
	
val tokenizer = new Tokenizer()
	.setInputCols(Array("sentence"))
	.setOutputCol("token")
	
val word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical","en","clinical/models")
	.setInputCols(Array("sentence","token"))
	.setOutputCol("embeddings")
	
val ner_model = MedicalNerModel.pretrained("ner_posology_greedy","en","clinical/models")
	.setInputCols(Array("sentence","token","embeddings"))
	.setOutputCol("ner")
	
val ner_converter = new NerConverterInternal()
	.setInputCols(Array("sentence","token","ner"))
	.setOutputCol("ner_chunk")
	.setWhiteList(Array("DRUG"))
	.setPreservePosition(false)
	
val chunk2doc = new Chunk2Doc()
	.setInputCols("ner_chunk")
	.setOutputCol("ner_chunk_doc")
	
val sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli","en","clinical/models")
	.setInputCols(Array("ner_chunk_doc"))
	.setOutputCol("sbert_embeddings")
	.setCaseSensitive(false)
	
val resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_rxcui","en","clinical/models")
	.setInputCols(Array("sbert_embeddings"))
	.setOutputCol("resolution")
	.setDistanceFunction("EUCLIDEAN")
	
val pipeline = new Pipeline().setStages(Array(
    document_assembler, 
    sentence_detector, 
    tokenizer, 
    word_embeddings, 
    ner_model, 
    ner_converter, 
    chunk2doc, 
    sbert_embedder, 
    resolver))
	
val data = Seq("""He was seen by the endocrinology service and she was discharged on 50 mg of eltrombopag oral at night,5 mg amlodipine with meals,and metformin 1000 mg two times a day.""").toDF("text")
	
val result = pipeline.fit(data).transform(data) 

Results

+-------------------------+-----+---+---------+------+-------------------------+------------------------------------------------------------+
|                    chunk|begin|end|ner_label|  Code|              description|                                                 resolutions|
+-------------------------+-----+---+---------+------+-------------------------+------------------------------------------------------------+
|50 mg of eltrombopag oral|   67| 91|     DRUG|825427|50 mg of eltrombopag oral|eltrombopag 50 MG Oral Tablet:::alpelisib 50 MG Oral Tabl...|
|          5 mg amlodipine|  103|117|     DRUG|197361|          5 mg amlodipine|amlodipine 5 MG Oral Tablet:::levamlodipine 5 MG Oral Tab...|
|        metformin 1000 mg|  135|151|     DRUG|861004|        metformin 1000 mg|metformin hydrochloride 1000 MG Oral Tablet:::cefepime 10...|
+-------------------------+-----+---+---------+------+-------------------------+------------------------------------------------------------+

Model Information

Name: sbiobertresolve_rxcui
Type: SentenceEntityResolverModel
Compatibility: Spark NLP 2.6.5 +
License: Licensed
Edition: Official
Input labels: [ner_chunk, chunk_embeddings]
Output labels: [resolution]
Language: en
Dependencies: sbiobert_base_cased_mli

Data Source

Trained on November 2020 RxNorm Clinical Drugs ontology graph with sbiobert_base_cased_mli embeddings. https://www.nlm.nih.gov/pubs/techbull/nd20/brief/nd20_rx_norm_november_release.html. Sample Content.