Description
Map clinical NER entities to LOINC codes.
Predicted Entities
LOINC codes - per input NER entity
Live Demo Open in Colab Copy S3 URI
How to use
documentAssembler = DocumentAssembler()\
.setInputCol('text')\
.setOutputCol('document')
sentenceDetector = SentenceDetector() \
.setInputCols(["document"]) \
.setOutputCol("sentence")
tokenizer = Tokenizer() \
.setInputCols(["sentence"]) \
.setOutputCol("token")
stopwords = StopWordsCleaner.pretrained()\
.setInputCols("token")\
.setOutputCol("cleanTokens")\
.setCaseSensitive(False)
word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
.setInputCols(["sentence", "cleanTokens"])\
.setOutputCol("embeddings")
clinical_ner = MedicalNerModel.pretrained("ner_clinical", "en", "clinical/models") \
.setInputCols(["sentence", "token", "embeddings"]) \
.setOutputCol("ner")
ner_converter = NerConverter() \
.setInputCols(["sentence", "cleanTokens", "ner"]) \
.setOutputCol("ner_chunk")
chunk2doc = Chunk2Doc()\
.setInputCols("ner_chunk")\
.setOutputCol("ner_chunk_doc")
sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli","en","clinical/models")\
.setInputCols(["ner_chunk_doc"])\
.setOutputCol("sbert_embeddings")
resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_loinc","en", "clinical/models") \
.setInputCols(["sbert_embeddings"]) \
.setOutputCol("resolution")\
.setDistanceFunction("EUCLIDEAN")
pipeline_loinc = Pipeline(stages = [documentAssembler, sentenceDetector, tokenizer, stopwords, word_embeddings, clinical_ner, ner_converter, chunk2doc, sbert_embedder, resolver])
data = spark.createDataFrame([["""A 28-year-old female with a history of gestational diabetes mellitus diagnosed eight years prior to presentation and subsequent type two diabetes mellitus (T2DM), one prior episode of HTG-induced pancreatitis three years prior to presentation, associated with an acute hepatitis, and obesity with a body mass index (BMI) of 33.5 kg/m2, presented with a one-week history of polyuria, polydipsia, poor appetite, and vomiting."""]]).toDF("text")
results = pipeline_loinc.fit(data).transform(data)
val documentAssembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val sentenceDetector = new SentenceDetector()
.setInputCols("document")
.setOutputCol("sentence")
val tokenizer = new Tokenizer()
.setInputCols("sentence")
.setOutputCol("token")
val stopwords = StopWordsCleaner.pretrained()
.setInputCols("token")
.setOutputCol("cleanTokens")
.setCaseSensitive(False)
val word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
.setInputCols(Array("sentence", "cleanTokens"))
.setOutputCol("embeddings")
val clinical_ner = MedicalNerModel.pretrained("ner_clinical", "en", "clinical/models")
.setInputCols(Array("sentence", "token", "embeddings"))
.setOutputCol("ner")
val ner_converter = new NerConverter()
.setInputCols(Array("sentence", "cleanTokens", "ner"))
.setOutputCol("ner_chunk")
val chunk2doc = new Chunk2Doc()
.setInputCols("ner_chunk")
.setOutputCol("ner_chunk_doc")
val sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli","en","clinical/models")
.setInputCols("ner_chunk_doc")
.setOutputCol("sbert_embeddings")
val resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_loinc","en", "clinical/models")
.setInputCols(Array("sbert_embeddings"))
.setOutputCol("resolution")
.setDistanceFunction("EUCLIDEAN")
val pipeline_loinc = new Pipeline().setStages(Array(documentAssembler, sentenceDetector, tokenizer, stopwords, word_embeddings, clinical_ner, ner_converter, chunk2doc, sbert_embedder, resolver))
val data = Seq("""A 28-year-old female with a history of gestational diabetes mellitus diagnosed eight years prior to presentation and subsequent type two diabetes mellitus (T2DM), one prior episode of HTG-induced pancreatitis three years prior to presentation, associated with an acute hepatitis, and obesity with a body mass index (BMI) of 33.5 kg/m2, presented with a one-week history of polyuria, polydipsia, poor appetite, and vomiting.""").toDS().toDF("text")
val results = pipeline_loinc.fit(data).transform(data)
import nlu
nlu.load("en.resolve.loinc").predict("""A 28-year-old female with a history of gestational diabetes mellitus diagnosed eight years prior to presentation and subsequent type two diabetes mellitus (T2DM), one prior episode of HTG-induced pancreatitis three years prior to presentation, associated with an acute hepatitis, and obesity with a body mass index (BMI) of 33.5 kg/m2, presented with a one-week history of polyuria, polydipsia, poor appetite, and vomiting.""")
Results
| | chunk | loinc_code |
|---:|:--------------------------------------|:-------------|
| 0 | gestational diabetes mellitus | 45636-8 |
| 1 | subsequent type two diabetes mellitus | 44877-9 |
| 2 | T2DM | 45636-8 |
| 3 | HTG-induced pancreatitis | 66667-7 |
| 4 | an acute hepatitis | 45690-5 |
| 5 | obesity | 73708-0 |
| 6 | a body mass index | 59574-4 |
| 7 | BMI | 59574-4 |
| 8 | polyuria | 28239-2 |
| 9 | polydipsia | 90552-1 |
| 10 | poor appetite | 28387-9 |
| 11 | vomiting | 81224-8 |
Model Information
Model Name: | sbiobertresolve_loinc |
Compatibility: | Healthcare NLP 3.0.0+ |
License: | Licensed |
Edition: | Official |
Input Labels: | [sentence_embeddings] |
Output Labels: | [loinc_code] |
Language: | en |
Data Source
Trained on standard LOINC coding system.