Description
This model is trained on large scale real world veterinary medical records to capture the terminology, structure, and diagnostic patterns found in animal health notes, enabling accurate downstream tasks such as disease syndrome classification, information extraction, and clinical text analysis.
How to use
from sparknlp.base import *
from sparknlp.annotator import *
from pyspark.ml import Pipeline
documentAssembler = DocumentAssembler() \
.setInputCol("text") \
.setOutputCol("document")
tokenizer = Tokenizer() \
.setInputCols("document") \
.setOutputCol("token")
embeddings = BertEmbeddings.pretrained("vetclinical_bert_onnx", "en", "clinical/models") \
.setInputCols(["document", "token"]) \
.setOutputCol("embeddings")
pipeline = Pipeline(stages=[
documentAssembler,
tokenizer,
embeddings
])
data = spark.createDataFrame([[
"Dog presented with vomiting, lethargy, and decreased appetite for three days."
]]).toDF("text")
model = pipeline.fit(data)
result = model.transform(data)
result.select("embeddings.embeddings").show()
from johnsnowlabs import nlp, medical
documentAssembler = nlp.DocumentAssembler() \
.setInputCol("text") \
.setOutputCol("document")
tokenizer = nlp.Tokenizer() \
.setInputCols("document") \
.setOutputCol("token")
embeddings = medical.BertEmbeddings.pretrained("vetclinical_bert_onnx", "en", "clinical/models") \
.setInputCols(["document", "token"]) \
.setOutputCol("embeddings")
pipeline = nlp.Pipeline(stages=[
documentAssembler,
tokenizer,
embeddings
])
data = spark.createDataFrame([[
"Dog presented with vomiting, lethargy, and decreased appetite for three days."
]]).toDF("text")
model = pipeline.fit(data)
result = model.transform(data)
result.select("embeddings.embeddings").show()
import com.johnsnowlabs.nlp.base._
import com.johnsnowlabs.nlp.annotator._
import org.apache.spark.ml.Pipeline
val documentAssembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val tokenizer = new Tokenizer()
.setInputCols(Array("document"))
.setOutputCol("token")
val embeddings = BertEmbeddings.pretrained("vetclinical_bert_onnx", "en", "clinical/models")
.setInputCols(Array("document", "token"))
.setOutputCol("embeddings")
val pipeline = new Pipeline().setStages(Array(
documentAssembler,
tokenizer,
embeddings
))
val data = spark.createDataFrame(Seq(
"Dog presented with vomiting, lethargy, and decreased appetite for three days."
).map(Tuple1(_))).toDF("text")
val model = pipeline.fit(data)
val result = model.transform(data)
result.select("embeddings.embeddings").show(false)
Results
+--------------------+
| embeddings|
+--------------------+
|[[-0.12431372, -0...|
+--------------------+
Model Information
| Model Name: | vetclinical_bert_onnx |
| Compatibility: | Healthcare NLP 6.2.0+ |
| License: | Licensed |
| Edition: | Official |
| Input Labels: | [document, token] |
| Output Labels: | [embeddings] |
| Language: | en |
| Size: | 402.8 MB |
| Case sensitive: | false |
| Max sentence length: | 512 |