Sentence Entity Resolver for SNOMED Veterinary

Description

This model maps veterinary-related entities and concepts to SNOMED codes using sbiobert_base_cased_mli Sentence Bert Embeddings

Predicted Entities

Copy S3 URI

How to use

document_assembler = DocumentAssembler()\
    .setInputCol("text")\
    .setOutputCol("document")

sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare", "en", "clinical/models")\
    .setInputCols(["document"])\
    .setOutputCol("sentence")

tokenizer = Tokenizer()\
    .setInputCols(["sentence"])\
    .setOutputCol("token")

word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")\
    .setInputCols(["sentence", "token"])\
    .setOutputCol("word_embeddings")

ner = MedicalNerModel.pretrained("ner_clinical", "en", "clinical/models")\
    .setInputCols(["sentence", "token", "word_embeddings"])\
    .setOutputCol("ner")\

ner_converter = NerConverterInternal()\
    .setInputCols(["sentence", "token", "ner"])\
    .setOutputCol("ner_chunk")\
    .setWhiteList(['PROBLEM'])

c2doc = Chunk2Doc()\
    .setInputCols("ner_chunk")\
    .setOutputCol("ner_chunk_doc")

sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli", "en", "clinical/models")\
    .setInputCols(["ner_chunk_doc"])\
    .setOutputCol("sentence_embeddings")\
    .setCaseSensitive(False)

snomed_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_snomed_veterinary_wip", "en", "clinical/models") \
    .setInputCols(["sentence_embeddings"]) \
    .setOutputCol("snomed_code")\
    .setDistanceFunction("EUCLIDEAN")

resolver_pipeline = Pipeline(stages = [document_assembler,
                                       sentenceDetectorDL,
                                       tokenizer,
                                       word_embeddings,
                                       ner,
                                       ner_converter,
                                       c2doc,
                                       sbert_embedder,
                                       snomed_resolver])
text = [["The veterinary team is observing the patient for signs of lymphoblastic lymphoma, while also treating the  Arthritis condition, and closely observing for any potential cases of mink distemper in the facility."]]

data = spark.createDataFrame(text, StringType()).toDF("text")

result = resolver_pipeline.fit(spark.createDataFrame([[""]]).toDF("text")).transform(data)

val document_assembler = new DocumentAssembler()
    .setInputCol("text")
    .setOutputCol("document")

val sentenceDetectorDL = SentenceDetectorDLModel.pretrained("sentence_detector_dl_healthcare", "en", "clinical/models")
    .setInputCols("document")
    .setOutputCol("sentence")

val tokenizer = new Tokenizer()
    .setInputCols("sentence")
    .setOutputCol("token")

val word_embeddings = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
    .setInputCols(Array("sentence", "token"))
    .setOutputCol("word_embeddings")

val ner = MedicalNerModel.pretrained("ner_clinical", "en", "clinical/models")
    .setInputCols(Array("sentence", "token", "word_embeddings"))
    .setOutputCol("ner")

val ner_converter = new NerConverterInternal()
    .setInputCols(Array("sentence", "token", "ner"))
    .setOutputCol("ner_chunk")
    .setWhiteList("PROBLEM")

val c2doc = new Chunk2Doc()
    .setInputCols("ner_chunk")
    .setOutputCol("ner_chunk_doc")

val sbert_embedder = BertSentenceEmbeddings.pretrained("sbiobert_base_cased_mli", "en", "clinical/models")
    .setInputCols("ner_chunk_doc")
    .setOutputCol("sentence_embeddings")
    .setCaseSensitive(False)

val snomed_resolver = SentenceEntityResolverModel.pretrained("sbiobertresolve_snomed_veterinary_wip", "en", "clinical/models")
    .setInputCols("sentence_embeddings") 
    .setOutputCol("snomed_code")
    .setDistanceFunction("EUCLIDEAN")

val resolver_pipeline = new PipelineModel().setStages(Array(document_assembler,
                                       sentenceDetectorDL,
                                       tokenizer,
                                       word_embeddings,
                                       ner,
                                       ner_converter,
                                       c2doc,
                                       sbert_embedder,
                                       snomed_resolver))

val data= Seq("The veterinary team is observing the patient for signs of lymphoblastic lymphoma, while also treating the  Arthritis condition, and closely observing for any potential cases of mink distemper in the facility.").toDF("text")

val result = resolver_pipeline.fit(data).transform(data)

Results

+------------------------+-------+---------------+-------------------------+------------------------------------------------------------+------------------------------------------------------------+
|               ner_chunk| entity|    snomed_code|              description|                                                   all_codes|                                                 resolutions|
+------------------------+-------+---------------+-------------------------+------------------------------------------------------------+------------------------------------------------------------+
|  lymphoblastic lymphoma|PROBLEM|312281000009102|   lymphoblastic lymphoma|312281000009102:::360351000009103:::91857003:::302841002:...|lymphoblastic lymphoma:::cutaneous epitheliotropic lympho...|
|the  Arthritis condition|PROBLEM|309181000009103|immune-mediated arthritis|309181000009103:::298162008:::35771000009105:::3117810000...|immune-mediated arthritis:::arthritis of shoulder joint::...|
|          mink distemper|PROBLEM|348361000009108|           mink distemper|348361000009108:::86031000009108:::207191000009103:::1901...|mink distemper:::dendropicos obsoletus:::xenops minutus o...|
+------------------------+-------+---------------+-------------------------+------------------------------------------------------------+------------------------------------------------------------+

Model Information

Model Name: sbiobertresolve_snomed_veterinary_wip
Compatibility: Healthcare NLP 5.3.2+
License: Licensed
Edition: Official
Input Labels: [sentence_embeddings]
Output Labels: [snomed_code]
Language: en
Size: 167.8 MB
Case sensitive: false

References

This model is trained with the Veterinary Extension to SNOMED CT(R): April 1, 2024 Release version, available at the website https://vtsl.vetmed.vt.edu/extension/