Description
This model maps clinical entities and concepts (like drugs/ingredients) to RxNorm codes without specifying the relations between the entities (relations are calculated on the fly inside the annotator) using sbiobert_base_cased_mli Sentence Bert Embeddings (EntityChunkEmbeddings). Embeddings used in this model are calculated with following weights : {"DRUG": 0.8, "STRENGTH": 0.2, "ROUTE": 0.2, "FORM": 0.2}
. EntityChunkEmbeddings with those weights are required in the pipeline to get best result.
Predicted Entities
RxNorm Codes
, Concept Classes
How to use
documenter = DocumentAssembler() \
.setInputCol("text") \
.setOutputCol("documents")
sentence_detector = SentenceDetector() \
.setInputCols("documents") \
.setOutputCol("sentences")
tokenizer = Tokenizer() \
.setInputCols("sentences") \
.setOutputCol("tokens")
embeddings = WordEmbeddingsModel() \
.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentences", "tokens"])\
.setOutputCol("embeddings")
posology_ner_model = MedicalNerModel()\
.pretrained("ner_posology_large", "en", "clinical/models")\
.setInputCols(["sentences", "tokens", "embeddings"])\
.setOutputCol("ner")
ner_converter = NerConverterInternal()\
.setInputCols(["sentences", "tokens", "ner"])\
.setOutputCol("ner_chunks")
pos_tager = PerceptronModel()\
.pretrained("pos_clinical", "en", "clinical/models")\
.setInputCols(["sentences", "tokens"])\
.setOutputCol("pos_tags")
dependency_parser = DependencyParserModel()\
.pretrained("dependency_conllu", "en")\
.setInputCols(["sentences", "pos_tags", "tokens"])\
.setOutputCol("dependencies")
drug_chunk_embeddings = EntityChunkEmbeddings()\
.pretrained("sbiobert_base_cased_mli","en","clinical/models")\
.setInputCols(["ner_chunks", "dependencies"])\
.setOutputCol("drug_chunk_embeddings")\
.setMaxSyntacticDistance(3)\
.setTargetEntities({"DRUG": ["STRENGTH", "ROUTE", "FORM"]})\
.setEntityWeights({"DRUG": 0.8, "STRENGTH": 0.2, "ROUTE": 0.2, "FORM": 0.2})
rxnorm_resolver = SentenceEntityResolverModel\
.pretrained("sbiobertresolve_rxnorm_augmented_re", "en", "clinical/models")\
.setInputCols(["drug_chunk_embeddings"])\
.setOutputCol("rxnorm_code")\
.setDistanceFunction("EUCLIDEAN")
rxnorm_weighted_pipeline_re = Pipeline(
stages = [
documenter,
sentence_detector,
tokenizer,
embeddings,
posology_ner_model,
ner_converter,
pos_tager,
dependency_parser,
drug_chunk_embeddings,
rxnorm_resolver
])
sampleText = ["The patient was given metformin 500 mg, 2.5 mg of coumadin and then ibuprofen.",
"The patient was given metformin 400 mg, coumadin 5 mg, coumadin, amlodipine 10 MG"]
data_df = spark.createDataFrame(sample_df)
results = rxnorm_weighted_pipeline_re.fit(data_df).transform(data_df)
val documenter = DocumentAssembler()
.setInputCol("text")
.setOutputCol("documents")
val sentence_detector = SentenceDetector()
.setInputCols("documents")
.setOutputCol("sentences")
val tokenizer = Tokenizer()
.setInputCols("sentences")
.setOutputCol("tokens")
val embeddings = WordEmbeddingsModel()
.pretrained("embeddings_clinical", "en", "clinical/models")
.setInputCols(Array("sentences", "tokens"))
.setOutputCol("embeddings")
val posology_ner_model = MedicalNerModel()
.pretrained("ner_posology_large", "en", "clinical/models")
.setInputCols(Array("sentences", "tokens", "embeddings"))
.setOutputCol("ner")
val ner_converter = NerConverterInternal()
.setInputCols(Array("sentences", "tokens", "ner"))
.setOutputCol("ner_chunks")
val pos_tager = PerceptronModel()
.pretrained("pos_clinical", "en", "clinical/models")
.setInputCols(Array("sentences", "tokens"))
.setOutputCol("pos_tags")
val dependency_parser = DependencyParserModel()
.pretrained("dependency_conllu", "en")
.setInputCols(Array("sentences", "pos_tags", "tokens"))
.setOutputCol("dependencies")
val drug_chunk_embeddings = EntityChunkEmbeddings()
.pretrained("sbiobert_base_cased_mli","en","clinical/models")
.setInputCols(Array("ner_chunks", "dependencies"))
.setOutputCol("drug_chunk_embeddings")
.setMaxSyntacticDistance(3)
.setTargetEntities({"DRUG": ["STRENGTH", "ROUTE", "FORM"]})
.setEntityWeights({"DRUG": 0.8, "STRENGTH": 0.2, "ROUTE": 0.2, "FORM": 0.2}
val rxnorm_resolver = SentenceEntityResolverModel
.pretrained("sbiobertresolve_rxnorm_augmented_re", "en", "clinical/models")
.setInputCols(Array("drug_chunk_embeddings"))
.setOutputCol("rxnorm_code")
.setDistanceFunction("EUCLIDEAN")
val rxnorm_weighted_pipeline_re = new PipelineModel().setStages(Array(documenter, sentence_detector, tokenizer, embeddings, posology_ner_model,
ner_converter, pos_tager, dependency_parser, drug_chunk_embeddings, rxnorm_re))
val light_model = LightPipeline(rxnorm_weighted_pipeline_re)
vat sampleText = Array("The patient was given metformin 500 mg, 2.5 mg of coumadin and then ibuprofen.",
"The patient was given metformin 400 mg, coumadin 5 mg, coumadin, amlodipine 10 MG")
val results = rxnorm_weighted_pipeline_re.fit(sampleText).transform(sampleText)
import nlu
nlu.load("en.resolve.rxnorm.augmented_re").predict("""The patient was given metformin 400 mg, coumadin 5 mg, coumadin, amlodipine 10 MG""")
Results
+-----+----------------+--------------------------+--------------------------------------------------+
|index| chunk|rxnorm_code_weighted_08_re| Concept_Name|
+-----+----------------+--------------------------+--------------------------------------------------+
| 0|metformin 500 mg| 860974|metformin hydrochloride 500 MG:::metformin 500 ...|
| 0| 2.5 mg coumadin| 855313|warfarin sodium 2.5 MG [Coumadin]:::warfarin so...|
| 0| ibuprofen| 1747293|ibuprofen Injection:::ibuprofen Pill:::ibuprofe...|
| 1|metformin 400 mg| 332809|metformin 400 MG:::metformin 250 MG Oral Tablet...|
| 1| coumadin 5 mg| 855333|warfarin sodium 5 MG [Coumadin]:::warfarin sodi...|
| 1| coumadin| 202421|Coumadin:::warfarin sodium 2 MG/ML Injectable S...|
| 1|amlodipine 10 MG| 308135|amlodipine 10 MG Oral Tablet:::amlodipine 10 MG...|
+-----+----------------+--------------------------+--------------------------------------------------+
Model Information
Model Name: | sbiobertresolve_rxnorm_augmented_re |
Compatibility: | Healthcare NLP 3.4.0+ |
License: | Licensed |
Edition: | Official |
Input Labels: | [sentence_embeddings] |
Output Labels: | [rxnorm_code] |
Language: | en |
Size: | 759.7 MB |
Case sensitive: | false |