Veterinary Clinical Diagnosis Embeddings (ONNX)

Description

This model is trained on large scale real world veterinary medical records to capture the terminology, structure, and diagnostic patterns found in animal health notes, enabling accurate downstream tasks such as disease syndrome classification, information extraction, and clinical text analysis.

Copy S3 URI

How to use

from sparknlp.base import *
from sparknlp.annotator import *
from pyspark.ml import Pipeline

documentAssembler = DocumentAssembler() \
    .setInputCol("text") \
    .setOutputCol("document")

tokenizer = Tokenizer() \
    .setInputCols("document") \
    .setOutputCol("token")

embeddings = BertEmbeddings.pretrained("vetclinical_bert_onnx", "en", "clinical/models") \
    .setInputCols(["document", "token"]) \
    .setOutputCol("embeddings")

pipeline = Pipeline(stages=[
    documentAssembler, 
    tokenizer, 
    embeddings
])

data = spark.createDataFrame([[
    "Dog presented with vomiting, lethargy, and decreased appetite for three days."
]]).toDF("text")

model = pipeline.fit(data)
result = model.transform(data)

result.select("embeddings.embeddings").show()
from johnsnowlabs import nlp, medical

documentAssembler = nlp.DocumentAssembler() \
    .setInputCol("text") \
    .setOutputCol("document")

tokenizer = nlp.Tokenizer() \
    .setInputCols("document") \
    .setOutputCol("token")

embeddings = medical.BertEmbeddings.pretrained("vetclinical_bert_onnx", "en", "clinical/models") \
    .setInputCols(["document", "token"]) \
    .setOutputCol("embeddings")

pipeline = nlp.Pipeline(stages=[
    documentAssembler, 
    tokenizer, 
    embeddings
])

data = spark.createDataFrame([[
    "Dog presented with vomiting, lethargy, and decreased appetite for three days."
]]).toDF("text")

model = pipeline.fit(data)
result = model.transform(data)

result.select("embeddings.embeddings").show()
import com.johnsnowlabs.nlp.base._
import com.johnsnowlabs.nlp.annotator._
import org.apache.spark.ml.Pipeline

val documentAssembler = new DocumentAssembler()
  .setInputCol("text")
  .setOutputCol("document")

val tokenizer = new Tokenizer()
  .setInputCols(Array("document"))
  .setOutputCol("token")

val embeddings = BertEmbeddings.pretrained("vetclinical_bert_onnx", "en", "clinical/models")
  .setInputCols(Array("document", "token"))
  .setOutputCol("embeddings")

val pipeline = new Pipeline().setStages(Array(
  documentAssembler,
  tokenizer,
  embeddings
))

val data = spark.createDataFrame(Seq(
  "Dog presented with vomiting, lethargy, and decreased appetite for three days."
).map(Tuple1(_))).toDF("text")

val model = pipeline.fit(data)
val result = model.transform(data)

result.select("embeddings.embeddings").show(false)

Results


+--------------------+
|          embeddings|
+--------------------+
|[[-0.12431372, -0...|
+--------------------+

Model Information

Model Name: vetclinical_bert_onnx
Compatibility: Healthcare NLP 6.2.0+
License: Licensed
Edition: Official
Input Labels: [document, token]
Output Labels: [embeddings]
Language: en
Size: 402.8 MB
Case sensitive: false
Max sentence length: 512