Sentence Entity Resolver for ICD-9-CM

Description

This model maps extracted medical entities to ICD-9-CM codes using sbiobert_base_cased_mli Sentence Bert Embeddings.

Predicted Entities

ICD-9-CM Codes

Copy S3 URI

How to use

document_assembler = DocumentAssembler()\
  .setInputCol("text")\
  .setOutputCol("document")

sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")\
  .setInputCols(["document"])\
  .setOutputCol("sentence")

tokenizer = Tokenizer()\
  .setInputCols(["sentence"])\
  .setOutputCol("token")

word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
  .setInputCols(["sentence","token"])\
  .setOutputCol("embeddings")

clinical_ner = MedicalNerModel.pretrained("ner_clinical", "en", "clinical/models")\
  .setInputCols(["sentence","token","embeddings"])\
  .setOutputCol("ner")

ner_converter = NerConverter()\
  .setInputCols(["sentence","token","ner"])\
  .setOutputCol("ner_chunk")\
  .setWhiteList(['PROBLEM'])

chunk2doc = Chunk2Doc()\
  .setInputCols("ner_chunk")\
  .setOutputCol("ner_chunk_doc")

sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli","en","clinical/models")\
  .setInputCols(["ner_chunk_doc"])\
  .setOutputCol("sbert_embeddings")\
  .setCaseSensitive(False)

icd9_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_icd9","en", "clinical/models") \
  .setInputCols(["sbert_embeddings"]) \
  .setOutputCol("resolution")\
  .setDistanceFunction("EUCLIDEAN")

nlpPipeline = Pipeline(
    stages=[
      document_assembler,
      sentence_detector,
      tokenizer,
      word_embeddings,
      clinical_ner,
      ner_converter,
      chunk2doc,
      sbert_embedder,
      icd9_resolver])


clinical_note = ["""A 28-year-old female with a history of gestational diabetes mellitus diagnosed eight years prior to presentation and type two diabetes mellitus, associated with an acute alcoholic hepatitis and obesity."""]


data= spark.createDataFrame([clinical_note]).toDF('text')
result = nlpPipeline.fit(data).transform(data)
val document_assembler = new DocumentAssembler()
  .setInputCol("text")
  .setOutputCol("document")

val sentence_detector = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare","en","clinical/models")
  .setInputCols(Array("document"))
  .setOutputCol("sentence")

val tokenizer = new Tokenizer()
  .setInputCols(Array("sentence"))
  .setOutputCol("token")

val word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
  .setInputCols(Array("sentence","token"))
  .setOutputCol("embeddings")

val clinical_ner = MedicalNerModel.pretrained("ner_clinical", "en", "clinical/models")
  .setInputCols(Array("sentence","token","embeddings"))
  .setOutputCol("ner")

val ner_converter = new NerConverter()
  .setInputCols(Array("sentence","token","ner"))
  .setOutputCol("ner_chunk")
  .setWhiteList(Array("PROBLEM"))

val chunk2doc = new Chunk2Doc()
  .setInputCols("ner_chunk")
  .setOutputCol("ner_chunk_doc")

val sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli","en","clinical/models")
  .setInputCols(Array("ner_chunk_doc"))
  .setOutputCol("sbert_embeddings")
  .setCaseSensitive(False)

val icd9_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_icd9","en", "clinical/models") 
  .setInputCols(Array("sbert_embeddings")) 
  .setOutputCol("resolution")
  .setDistanceFunction("EUCLIDEAN")

val pipeline = new Pipeline().setStages(
    Array(
        document_assembler, 
        sentence_detector, 
        tokenizer, 
        word_embeddings, 
        clinical_ner, 
        ner_converter, 
        chunk2doc, 
        sbert_embedder, 
        icd9_resolver))

val data = Seq("A 28-year-old female with a history of gestational diabetes mellitus diagnosed eight years prior to presentation and type two diabetes mellitus, associated with an acute alcoholic hepatitis and obesity.").toDS.toDF("text")

val result = pipeline.fit(data).transform(data)

Results

+-----------------------------+-------+---------+------------------------------------------------------------------+--------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
|                        chunk| entity|ıcd9_code|                                                        resolution|                                                                   all_k_results|                                                               all_k_resolutions|
+-----------------------------+-------+---------+------------------------------------------------------------------+--------------------------------------------------------------------------------+--------------------------------------------------------------------------------+
|gestational diabetes mellitus|PROBLEM|   V12.21|hx gestational diabetes [Personal history of gestational diabetes]|V12.21:::775.1:::V18.0:::249:::250:::249.7:::249.71:::249.9:::249.61:::648.0:...|hx gestational diabetes [Personal history of gestational diabetes]:::neonat d...|
|   type two diabetes mellitus|PROBLEM|      249|         secondary diabetes mellitus [Secondary diabetes mellitus]|249:::250:::V18.0:::775.1:::249.7:::249.71:::249.9:::249.6:::V77.1:::249.5:::...|secondary diabetes mellitus [Secondary diabetes mellitus]:::diabetes mellitus...|
| an acute alcoholic hepatitis|PROBLEM|    571.1|             acute alcoholic hepatitis [Acute alcoholic hepatitis]|571.1:::303.0:::571.2:::303.01:::303.02:::571.0:::902.22:::070.0:::864.19:::0...|acute alcoholic hepatitis [Acute alcoholic hepatitis]:::acute alcoholic intox...|
|                      obesity|PROBLEM|    278.0|                   overweight and obesity [Overweight and obesity]|278.0:::278.01:::278.02:::649.11:::V77.8:::278.00:::278:::649.12:::729.31:::2...|overweight and obesity [Overweight and obesity]:::morbid obesity [Morbid obes...|
+-----------------------------+-------+---------+------------------------------------------------------------------+--------------------------------------------------------------------------------+--------------------------------------------------------------------------------+

Model Information

Model Name: sbiobertresolve_icd9
Compatibility: Healthcare NLP 5.3.1+
License: Licensed
Edition: Official
Input Labels: [sentence_embeddings]
Output Labels: [icd9cm_code]
Language: en
Size: 86.7 MB
Case sensitive: false