Sentence Entity Resolver for SNOMED (sbiobertresolve_snomed_procedures_measurements)

Description

This model maps Procedure and Measurements (Tests) to their corresponding SNOMED codes using sbiobert_base_cased_mli Sentence Bert Embeddings.

Open in Colab Copy S3 URI

How to use



document_assembler = DocumentAssembler()\
      .setInputCol("text")\
      .setOutputCol("document")

sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare", "en", "clinical/models")\
      .setInputCols(["document"])\
      .setOutputCol("sentence")

tokenizer = Tokenizer()\
      .setInputCols(["sentence"])\
      .setOutputCol("token")

word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
      .setInputCols(["sentence", "token"])\
      .setOutputCol("embeddings")

ner_jsl  = MedicalNerModel.pretrained("ner_jsl", "en", "clinical/models")\
      .setInputCols(["sentence", "token", "embeddings"])\
      .setOutputCol("ner_jsl")


ner_jsl_converter  = NerConverterInternal()\
      .setInputCols(["sentence", "token", "ner_jsl"])\
      .setOutputCol("ner_jsl_chunk")\
      .setWhiteList(['Procedure','Test'])

chunk2doc = Chunk2Doc()\
    .setInputCols("ner_jsl_chunk")\
    .setOutputCol("ner_chunk_doc")

sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli", "en", "clinical/models")\
    .setInputCols(["ner_chunk_doc"])\
    .setOutputCol("sbert_embeddings")\
    .setCaseSensitive(False)

snomed_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_snomed_procedures_measurements", "en", "clinical/models") \
    .setInputCols(["sbert_embeddings"]) \
    .setOutputCol("snomed_code")

snomed_pipeline = Pipeline(stages = [
    document_assembler,
    sentence_detector,
    tokenizer,
    word_embeddings,
    ner_jsl,
    ner_jsl_converter,
    chunk2doc,
    sbert_embedder,
    snomed_resolver
])


sample_text = """Based on the severity of her abdominal examination and the persistence of her symptoms, it has been determined that she requires a laparoscopic jejunectomy, possible appendectomy, and cholecystectomy.Laboratory values indicate a white blood cell count of 15.3, plasma hemoglobin level of 12.8, and normal platelet count. Alkaline phosphatase is elevated at 184, while liver function tests are otherwise normal. Electrolyte levels are within the normal range. Glucose levels are at 134, BUN is 4, and creatinine is 0.7."""

df= spark.createDataFrame([[sample_text]]).toDF("text")

result= nlpPipeline.fit(df).transform(df)



documentAssembler = nlp.DocumentAssembler()\
      .setInputCol("text")\
      .setOutputCol("document")

sentenceDetector = nlp.SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\
      .setInputCols(["document"])\
      .setOutputCol("sentence")

tokenizer = nlp.Tokenizer() \
      .setInputCols(["sentence"]) \
      .setOutputCol("token")

word_embeddings = nlp.WordEmbeddingsModel.pretrained("embeddings_clinical","en", "clinical/models")\
      .setInputCols(["sentence", "token"])\
      .setOutputCol("embeddings")

ner_jsl = medical.NerModel.pretrained("ner_jsl", "en", "clinical/models") \
      .setInputCols(["sentence", "token", "embeddings"]) \
      .setOutputCol("ner_jsl")

ner_jsl_converter   = medical.NerConverterInternal()\
      .setInputCols(["sentence", "token", "ner_jsl"])\
      .setOutputCol("ner_jsl_chunk")\
      .setWhiteList(['Procedure','Test'])

chunk2doc = nlp.Chunk2Doc() \
      .setInputCols("ner_jsl_chunk") \
      .setOutputCol("ner_chunk_doc")

sbert_embedder = nlp.BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli","en","clinical/models")\
     .setInputCols(["ner_chunk_doc"])\
     .setOutputCol("sbert_embeddings")\
     .setCaseSensitive(False)

snomed_resolver = medical.SentenceEntityResolverModel.pretrained("sbiobertresolve_snomed_procedures_measurements", "en", "clinical/models") \
     .setInputCols(["sbert_embeddings"]) \
     .setOutputCol("snomed_code")\
     .setDistanceFunction("EUCLIDEAN")

nlpPipeline= nlp.Pipeline(stages = [
    documentAssembler,
    sentenceDetector,
    tokenizer,
    word_embeddings,
    ner_jsl,
    ner_jsl_converter,
    chunk2doc,
    sbert_embedder,
    snomed_resolver
])

sample_text = """Based on the severity of her abdominal examination and the persistence of her symptoms, it has been determined that she requires a laparoscopic jejunectomy, possible appendectomy, and cholecystectomy.Laboratory values indicate a white blood cell count of 15.3, plasma hemoglobin level of 12.8, and normal platelet count. Alkaline phosphatase is elevated at 184, while liver function tests are otherwise normal. Electrolyte levels are within the normal range. Glucose levels are at 134, BUN is 4, and creatinine is 0.7."""

df= spark.createDataFrame([[sample_text]]).toDF("text")

result= nlpPipeline.fit(df).transform(df)





val documentAssembler = new DocumentAssembler()
  .setInputCol("text")
  .setOutputCol("document")

val sentenceDetector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")
  .setInputCols(Array("document"))
  .setOutputCol("sentence")

val tokenizer = new Tokenizer()
  .setInputCols(Array("sentence"))
  .setOutputCol("token")

val wordEmbeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
  .setInputCols(Array("sentence", "token"))
  .setOutputCol("embeddings")

val nerJsl = MedicalNerModel.pretrained("ner_jsl", "en", "clinical/models")
  .setInputCols(Array("sentence", "token", "embeddings"))
  .setOutputCol("ner_jsl")

val nerJslConverter = new NerConverter()
  .setInputCols(Array("sentence", "token", "ner_jsl"))
  .setOutputCol("ner_jsl_chunk")
  .setWhiteList(['Procedure','Test'])

val chunk2doc = new Chunk2Doc()
  .setInputCols(Array("ner_jsl_chunk"))
  .setOutputCol("ner_chunk_doc")

val sbertEmbedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli", "en", "clinical/models")
  .setInputCols(Array("ner_chunk_doc"))
  .setOutputCol("sbert_embeddings")
  .setCaseSensitive(false)

val snomedResolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_snomed_procedures_measurements", "en", "clinical/models")
  .setInputCols(Array("sbert_embeddings"))
  .setOutputCol("snomed_code")
  .setDistanceFunction("EUCLIDEAN")

val nlpPipeline = new Pipeline().setStages(Array(
  documentAssembler,
  sentenceDetector,
  tokenizer,
  wordEmbeddings,
  nerJsl,
  nerJslConverter,
  chunk2doc,
  sbertEmbedder,
  snomedResolver
))


val sample_text = """Based on the severity of her abdominal examination and the persistence of her symptoms, it has been determined that she requires a laparoscopic jejunectomy, possible appendectomy, and cholecystectomy.Laboratory values indicate a white blood cell count of 15.3, plasma hemoglobin level of 12.8, and normal platelet count. Alkaline phosphatase is elevated at 184, while liver function tests are otherwise normal. Electrolyte levels are within the normal range. Glucose levels are at 134, BUN is 4, and creatinine is 0.7."""

val df= Seq(sample_text).toDF("text")

val result= nlpPipeline.fit(df).transform(df)

Results


| sent_id | ner_chunk                | entity    | snomed_code | resolutions                      | all_codes                                          | all_resolutions                                    |
|---------|--------------------------|-----------|-------------|----------------------------------|----------------------------------------------------|----------------------------------------------------|
| 0       | laparoscopic jejunectomy | Procedure | 1220546008  | laparoscopic jejunectomy         | ['1220546008', '6025007', '307195003', '1220549... | ['laparoscopic jejunectomy', 'laparoscopic appe... |
| 0       | appendectomy             | Procedure | 80146002    | appendectomy                     | ['80146002', '17041004', '82730006', '174045003... | ['appendectomy', 'appendicotomy', 'secondary ap... |
| 0       | cholecystectomy          | Procedure | 38102005    | cholecystectomy                  | ['38102005', '6402000', '44337006', '45595009',... | ['cholecystectomy', 'choledochectomy', 'cholecy... |
| 1       | white blood cell count   | Test      | 767002      | white blood cell count           | ['767002', '252305002', '165511009', '44190001'... | ['white blood cell count', 'white blood cell te... |
| 1       | plasma hemoglobin level  | Test      | 104142005   | plasma hemoglobin level          | ['104142005', '271510004', '313995005', '271026... | ['plasma hemoglobin level', 'hemoglobin s level... |
| 1       | platelet count           | Test      | 61928009    | platelet count                   | ['61928009', '250314004', '8574009', '75672003'... | ['platelet count', 'plateletcrit', 'platelet es... |
| 2       | Alkaline phosphatase     | Test      | 88810008    | alkaline phosphatase measurement | ['88810008', '45745006', '271234008', '39096200... | ['alkaline phosphatase measurement', 'alkaline ... |
| 2       | liver function tests     | Test      | 26958001    | liver function tests             | ['26958001', '269856004', '736164009', '2878580... | ['liver function tests', 'liver enzyme levels',... |
| 2       | Electrolyte levels       | Test      | 79301008    | electrolytes measurement         | ['79301008', '276025008', '312474003', '4011420... | ['electrolytes measurement', 'electrolyte regul... |
| 3       | Glucose levels           | Test      | 36048009    | glucose measurement              | ['36048009', '72191006', '302789003', '16688800... | ['glucose measurement', 'plasma glucose', 'capi... |
| 3       | BUN                      | Test      | 24509005    | bun measurement                  | ['24509005', '16227009', '85651007', '174651007... | ['bun measurement', 'cinching', 'bost operation... |
| 3       | creatinine               | Test      | 113075003   | serum creatinine                 | ['113075003', '70901006', '313936008', '2507450... | ['serum creatinine', 'creatinine measurement', ... |

Model Information

Model Name: sbiobertresolve_snomed_procedures_measurements
Compatibility: Healthcare NLP 6.3.0+
License: Licensed
Edition: Official
Input Labels: [sentence_embeddings]
Output Labels: [snomed_code]
Language: en
Size: 340.6 MB
Case sensitive: false