Description
This model maps clinical entities and concepts (like drugs/ingredients) to National Drug Codes using sbiobert_base_cased_mli
Sentence Bert Embeddings. It also returns package options and alternative drugs in the all_k_aux_label column.
Predicted Entities
Live Demo Open in Colab Copy S3 URI
How to use
documentAssembler = DocumentAssembler()\
.setInputCol("text")\
.setOutputCol("document")
sentenceDetector = SentenceDetectorDLModel.pretrained()\
.setInputCols(["document"])\
.setOutputCol("sentence")
tokenizer = Tokenizer()\
.setInputCols(["sentence"])\
.setOutputCol("token")
word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("embeddings")
posology_ner = MedicalNerModel.pretrained("ner_posology_greedy", "en", "clinical/models") \
.setInputCols(["sentence", "token", "embeddings"]) \
.setOutputCol("ner")
ner_converter = NerConverter() \
.setInputCols(["sentence", "token", "ner"]) \
.setOutputCol("ner_chunk")\
.setWhiteList(["DRUG"])
c2doc = Chunk2Doc()\
.setInputCols("ner_chunk")\
.setOutputCol("ner_chunk_doc")
sbert_embedder = BertSentenceEmbeddings\
.pretrained('sbiobert_base_cased_mli', 'en','clinical/models')\
.setInputCols(["ner_chunk_doc"])\
.setOutputCol("sentence_embeddings")
ndc_resolver = SentenceEntityResolverModel\
.pretrained("sbiobertresolve_ndc", "en", "clinical/models") \
.setInputCols(["ner_chunk", "sentence_embeddings"]) \
.setOutputCol("ndc")\
.setDistanceFunction("EUCLIDEAN")\
.setCaseSensitive(False)
resolver_pipeline = Pipeline(stages = [
documentAssembler,
sentenceDetector,
tokenizer,
word_embeddings,
posology_ner,
ner_converter,
c2doc,
sbert_embedder,
ndc_resolver
])
data = spark.createDataFrame([["""The patient was given aspirin 81 mg and metformin 500 mg"""]]).toDF("text")
result = resolver_pipeline.fit(data).transform(data)
val documentAssembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val sentenceDetector = SentenceDetectorDLModel.pretrained()
.setInputCols("document")
.setOutputCol("sentence")
val tokenizer = new Tokenizer()
.setInputCols(Array("sentence"))
.setOutputCol("token")
val word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
.setInputCols(Array("sentence", "token"))
.setOutputCol("embeddings")
val clinical_ner = MedicalNerModel.pretrained("ner_posology_greedy", "en", "clinical/models")
.setInputCols(Array("sentence", "token", "embeddings"))
.setOutputCol("ner")
val ner_converter = new NerConverter()
.setInputCols(Array("sentence", "token", "ner"))
.setOutputCol("ner_chunk")
.setWhiteList(Array("DRUG"))
val c2doc = new Chunk2Doc()
.setInputCols("ner_chunk")
.setOutputCol("ner_chunk_doc")
val sbert_embedder = BertSentenceEmbeddings
.pretrained("sbiobert_base_cased_mli", "en","clinical/models")
.setInputCols(Array("ner_chunk_doc"))
.setOutputCol("sentence_embeddings")
val ndc_resolver = SentenceEntityResolverModel
.pretrained("sbiobertresolve_ndc", "en", "clinical/models")
.setInputCols(Array("ner_chunk", "sentence_embeddings"))
.setOutputCol("ndc")
.setDistanceFunction("EUCLIDEAN")
.setCaseSensitive(False)
val resolver_pipeline = new Pipeline().setStages(Array(
documentAssembler,
sentenceDetector,
tokenizer,
word_embeddings,
posology_ner,
ner_converter,
c2doc,
sbert_embedder,
ndc_resolver
))
val clinical_note = Seq("The patient was given aspirin 81 mg and metformin 500 mg").toDS.toDF("text")
val results = resolver_pipeline.fit(clinical_note).transform(clinical_note)
import nlu
nlu.load("en.resolve.ndc").predict("""The patient was given aspirin 81 mg and metformin 500 mg""")
Results
+----------------+----------+----------------------------------------------------------------------------------------------------+
| ner_chunk| ndc_code| aux_labels|
+----------------+----------+----------------------------------------------------------------------------------------------------+
| aspirin 81 mg|41250-0780|{'packages': "['1 BOTTLE, PLASTIC in 1 PACKAGE (41250-780-01) > 120 TABLET, DELAYED RELEASE in 1...|
|metformin 500 mg|62207-0491|{'packages': "['5000 TABLET in 1 POUCH (62207-491-31)', '25000 TABLET in 1 CARTON (62207-491-35)'...|
+----------------+----------+----------------------------------------------------------------------------------------------------+
Model Information
Model Name: | sbiobertresolve_ndc |
Compatibility: | Healthcare NLP 3.3.2+ |
License: | Licensed |
Edition: | Official |
Input Labels: | [sentence_embeddings] |
Output Labels: | [ndc] |
Language: | en |
Size: | 932.2 MB |
Case sensitive: | false |
References
It is trained on U.S. FDA 2022-NDC Codes dataset.