Description
This model maps clinical conditions to their corresponding SNOMED (domain: Drugs) codes using sbiobert_base_cased_mli Sentence Bert Embeddings.
How to use
document_assembler = DocumentAssembler()\
.setInputCol("text")\
.setOutputCol("document")
sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare", "en", "clinical/models")\
.setInputCols(["document"])\
.setOutputCol("sentence")
tokenizer = Tokenizer()\
.setInputCols(["sentence"])\
.setOutputCol("token")
word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("embeddings")
clinical_ner = MedicalNerModel.pretrained("ner_posology", "en", "clinical/models")\
.setInputCols(["sentence", "token", "embeddings"])\
.setOutputCol("ner")
ner_converter = NerConverterInternal()\
.setInputCols(["sentence", "token", "ner"])\
.setOutputCol("ner_chunk")\
.setWhiteList(['DRUG'])
chunk2doc = Chunk2Doc()\
.setInputCols("ner_chunk")\
.setOutputCol("ner_chunk_doc")
sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli", "en", "clinical/models")\
.setInputCols(["ner_chunk_doc"])\
.setOutputCol("sbert_embeddings")\
.setCaseSensitive(False)
snomed_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_snomed_drug", "en", "clinical/models") \
.setInputCols(["sbert_embeddings"]) \
.setOutputCol("snomed_code")
snomed_pipeline = Pipeline(stages = [
document_assembler,
sentence_detector,
tokenizer,
word_embeddings,
clinical_ner,
ner_converter,
chunk2doc,
sbert_embedder,
snomed_resolver
])
sample_text = """John's doctor prescribed aspirin for his heart condition, along with paracetamol for his fever and headache, amoxicillin for his tonsilitis and lansoprazole for his GORD on 2023-12-01."""
df= spark.createDataFrame([[sample_text]]).toDF("text")
result= nlpPipeline.fit(df).transform(df)
documentAssembler = nlp.DocumentAssembler()\
.setInputCol("text")\
.setOutputCol("document")
sentenceDetector = nlp.SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\
.setInputCols(["document"])\
.setOutputCol("sentence")
tokenizer = nlp.Tokenizer() \
.setInputCols(["sentence"]) \
.setOutputCol("token")
word_embeddings = nlp.WordEmbeddingsModel.pretrained("embeddings_clinical","en", "clinical/models")\
.setInputCols(["sentence", "token"])\
.setOutputCol("embeddings")
clinical_ner = medical.NerModel.pretrained("ner_posology", "en", "clinical/models") \
.setInputCols(["sentence", "token", "embeddings"]) \
.setOutputCol("ner")
ner_converter = medical.NerConverterInternal()\
.setInputCols(["sentence", "token", "ner"]) \
.setOutputCol("ner_chunk")\
.setWhiteList(['DRUG'])
chunk2doc = nlp.Chunk2Doc() \
.setInputCols("ner_chunk") \
.setOutputCol("ner_chunk_doc")
sbert_embedder = nlp.BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli","en","clinical/models")\
.setInputCols(["ner_chunk_doc"])\
.setOutputCol("sbert_embeddings")\
.setCaseSensitive(False)
snomed_resolver = medical.SentenceEntityResolverModel.pretrained("sbiobertresolve_snomed_drug", "en", "clinical/models") \
.setInputCols(["sbert_embeddings"]) \
.setOutputCol("snomed_code")\
.setDistanceFunction("EUCLIDEAN")
nlpPipeline= nlp.Pipeline(stages = [
documentAssembler,
sentenceDetector,
tokenizer,
word_embeddings,
clinical_ner,
ner_converter,
chunk2doc,
sbert_embedder,
snomed_resolver
])
sample_text = """John's doctor prescribed aspirin for his heart condition, along with paracetamol for his fever and headache, amoxicillin for his tonsilitis and lansoprazole for his GORD on 2023-12-01."""
df= spark.createDataFrame([[sample_text]]).toDF("text")
result= nlpPipeline.fit(df).transform(df)
val document_assembler = DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare", "en", "clinical/models")
.setInputCols(Array("document"))
.setOutputCol("sentence")
val tokenizer = Tokenizer()
.setInputCols(Array("sentence"))
.setOutputCol("token")
val word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
.setInputCols(Array("sentence", "token"))
.setOutputCol("embeddings")
val clinical_ner = MedicalNerModel.pretrained("ner_posology", "en", "clinical/models")
.setInputCols(Array("sentence", "token", "embeddings"))
.setOutputCol("ner")
val ner_converter = NerConverterInternal()
.setInputCols(Array("sentence", "token", "ner"))
.setOutputCol("ner_chunk")
.setWhiteList(Array("DRUG"))
val c2doc = Chunk2Doc()
.setInputCols("ner_chunk")
.setOutputCol("ner_chunk_doc")
val sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli", "en", "clinical/models")
.setInputCols(Array("ner_chunk_doc"))
.setOutputCol("sentence_embeddings")
val snomed_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_snomed_drug", "en", "clinical/models")
.setInputCols(Array("sentence_embeddings"))
.setOutputCol("snomed_code")
.setDistanceFunction("EUCLIDEAN")
resolver_pipeline = new Pipeline().setStages(
document_assembler,
sentenceDetectorDL,
tokenizer,
word_embeddings,
clinical_ner,
ner_converter,
c2doc,
sbert_embedder,
snomed_resolver)
val sample_text = """John's doctor prescribed aspirin for his heart condition, along with paracetamol for his fever and headache, amoxicillin for his tonsilitis and lansoprazole for his GORD on 2023-12-01."""
val df= Seq(sample_text).toDF("text")
val result= nlpPipeline.fit(df).transform(df)
Results
| sent_id | ner_chunk | entity | snomed_code | resolutions | all_codes | all_resolutions |
|---------|--------------|--------|-------------|--------------|----------------------------------------------------|----------------------------------------------------|
| 0 | aspirin | DRUG | 7947003 | aspirin | ['7947003', '358427004', '426365001', '41256600... | ['aspirin', 'oral aspirin', 'aspirin, buffered'... |
| 0 | paracetamol | DRUG | 387517004 | paracetamol | ['387517004', '90332006', '1285524005', '437876... | ['paracetamol', 'paracetamol product', 'propace... |
| 0 | amoxicillin | DRUG | 27658006 | amoxicillin | ['27658006', '350162003', '372687004', '4274830... | ['amoxicillin', 'oral amoxicillin', 'amoxycilli... |
| 0 | lansoprazole | DRUG | 108666007 | lansoprazole | ['108666007', '437961004', '441863009', '716069... | ['lansoprazole', 'oral form lansoprazole', 'dex... |
Model Information
| Model Name: | sbiobertresolve_snomed_drug |
| Compatibility: | Healthcare NLP 6.3.0+ |
| License: | Licensed |
| Edition: | Official |
| Input Labels: | [sentence_embeddings] |
| Output Labels: | [snomed_code] |
| Language: | en |
| Size: | 325.7 MB |
| Case sensitive: | false |