Description
This model maps clinical abbreviations and acronyms to their meanings using sbiobert_base_cased_mli
Sentence Bert Embeddings. This model is an improved version of the base model, and includes more variational data.
Predicted Entities
Abbreviation Meanings
How to use
document_assembler = DocumentAssembler()\
.setInputCol("text")\
.setOutputCol("document")
tokenizer = Tokenizer()\
.setInputCols(["document"])\
.setOutputCol("token")
word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["document", "token"])\
.setOutputCol("word_embeddings")
clinical_ner = MedicalNerModel.pretrained("ner_abbreviation_clinical", "en", "clinical/models") \
.setInputCols(["document", "token", "word_embeddings"]) \
.setOutputCol("ner")
ner_converter = NerConverterInternal() \
.setInputCols(["document", "token", "ner"]) \
.setOutputCol("ner_chunk")\
.setWhiteList(['ABBR'])
sentence_chunk_embeddings = BertSentenceChunkEmbeddings.pretrained("sbiobert_base_cased_mli", "en", "clinical/models")\
.setInputCols(["document", "ner_chunk"])\
.setOutputCol("sentence_embeddings")\
.setChunkWeight(0.5)\
.setCaseSensitive(True)
abbr_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_clinical_abbreviation_acronym", "en", "clinical/models") \
.setInputCols(["sentence_embeddings"]) \
.setOutputCol("abbr_meaning")\
.setDistanceFunction("EUCLIDEAN")\
resolver_pipeline = Pipeline(
stages = [
document_assembler,
tokenizer,
word_embeddings,
clinical_ner,
ner_converter,
sentence_chunk_embeddings,
abbr_resolver
])
model = resolver_pipeline.fit(spark.createDataFrame([['']]).toDF("text"))
sample_text = "Gravid with estimated fetal weight of 6-6/12 pounds. LOWER EXTREMITIES: No edema. LABORATORY DATA: Laboratory tests include a CBC which is normal. Blood Type: AB positive. Rubella: Immune. VDRL: Nonreactive. Hepatitis C surface antigen: Negative. HIV: Negative. One-Hour Glucose: 117. Group B strep has not been done as yet."
abbr_result = model.transform(spark.createDataFrame([[sample_text]]).toDF('text'))
val document_assembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val tokenizer = new Tokenizer()
.setInputCols("document")
.setOutputCol("token")
val word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
.setInputCols(Array("document", "token"))
.setOutputCol("word_embeddings")
val clinical_ner = MedicalNerModel.pretrained("ner_abbreviation_clinical", "en", "clinical/models")
.setInputCols(Array("document", "token", "word_embeddings"))
.setOutputCol("ner")
val ner_converter = new NerConverterInternal()
.setInputCols(Array("document", "token", "ner"))
.setOutputCol("ner_chunk")
.setWhiteList(Array("ABBR"))
val sentence_chunk_embeddings = BertSentenceChunkEmbeddings.pretrained("sbiobert_base_cased_mli", "en", "clinical/models")
.setInputCols(Array("document", "ner_chunk"))
.setOutputCol("sentence_embeddings")
.setChunkWeight(0.5)
.setCaseSensitive(True)
val abbr_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_clinical_abbreviation_acronym", "en", "clinical/models")
.setInputCols(Array("sentence_embeddings"))
.setOutputCol("abbr_meaning")
.setDistanceFunction("EUCLIDEAN")
val resolver_pipeline = new Pipeline().setStages(document_assembler, tokenizer, word_embeddings, clinical_ner, ner_converter, sentence_chunk_embeddings, abbr_resolver)
val sample_text = Seq("""Gravid with estimated fetal weight of 6-6/12 pounds. LOWER EXTREMITIES: No edema. LABORATORY DATA: Laboratory tests include a CBC which is normal. Blood Type: AB positive. Rubella: Immune. VDRL: Nonreactive. Hepatitis C surface antigen: Negative. HIV: Negative. One-Hour Glucose: 117. Group B strep has not been done as yet.""").toDS().toDF("text")
val abbr_result = resolver_pipeline.fit(sample_text).transform(sample_text)
import nlu
nlu.load("en.resolve.clinical_abbreviation_acronym").predict("""Gravid with estimated fetal weight of 6-6/12 pounds. LOWER EXTREMITIES: No edema. LABORATORY DATA: Laboratory tests include a CBC which is normal. Blood Type: AB positive. Rubella: Immune. VDRL: Nonreactive. Hepatitis C surface antigen: Negative. HIV: Negative. One-Hour Glucose: 117. Group B strep has not been done as yet.""")
Results
| | chunk | abbr_meaning | all_k_results |
|---:|:--------|:-------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
| 0 | CBC | Complete Blood Count | Complete Blood Count:::Complete blood count:::blood group in ABO system:::(complement) component 4:::abortion:::carbohydrate antigen:::clear to auscultation:::carcinoembryonic antigen:::cervical (level) 4 |
| 1 | AB | blood group in ABO system | blood group in ABO system:::abortion |
| 2 | VDRL | Venereal disease research laboratory | Venereal disease research laboratory:::venous blood gas:::leukocyte esterase:::vertical banded gastroplasty |
| 3 | HIV | human immunodeficiency virus | human immunodeficiency virus:::blood group in ABO system:::abortion:::fluorescent in situ hybridization |
Model Information
Model Name: | sbiobertresolve_clinical_abbreviation_acronym |
Compatibility: | Healthcare NLP 3.3.4+ |
License: | Licensed |
Edition: | Official |
Input Labels: | [sentence_embeddings] |
Output Labels: | [output] |
Language: | en |
Size: | 112.3 MB |
Case sensitive: | true |
References
Trained on in-house curated dataset.