Description
The model classifies documents as either IBD or Not IBD. It is designed to distinguish between documents that are likely related to patients with inflammatory bowel disease (IBD) and those that are not suggestive of IBD.
How to use
from sparknlp.base import DocumentAssembler
from sparknlp_jsl.annotator import Tokenizer, BertForSequenceClassification
from pyspark.ml import Pipeline
document_assembler = DocumentAssembler() \
.setInputCol("text") \
.setOutputCol("document")
tokenizer = Tokenizer() \
.setInputCols(["document"]) \
.setOutputCol("token")
bfsc_loaded = BertForSequenceClassification.pretrained("bert_sequence_classifier_ibd_onnx", "en", "clinical/models") \
.setInputCols(['document', 'token']) \
.setOutputCol("label")
pipeline = Pipeline(stages=[
document_assembler,
tokenizer,
bfsc_loaded
])
data = spark.createDataFrame([
["Patient with inflammatory bowel disease and colon inflammation."],
["Normal colonoscopy findings, no evidence of inflammation."],
]).toDF("text")
model = pipeline.fit(data)
result = model.transform(data)
result.select("text", "label.result").show(truncate=False)
from johnsnowlabs import nlp, medical
document_assembler = nlp.DocumentAssembler() \
.setInputCol("text") \
.setOutputCol("document")
tokenizer = nlp.Tokenizer() \
.setInputCols(["document"]) \
.setOutputCol("token")
bfsc_loaded = nlp.BertForSequenceClassification.pretrained("bert_sequence_classifier_ibd_onnx", "en", "clinical/models") \
.setInputCols(['document', 'token']) \
.setOutputCol("label")
pipeline = nlp.Pipeline(stages=[
document_assembler,
tokenizer,
bfsc_loaded
])
data = spark.createDataFrame([
["Patient with inflammatory bowel disease and colon inflammation."],
["Normal colonoscopy findings, no evidence of inflammation."],
]).toDF("text")
model = pipeline.fit(data)
result = model.transform(data)
result.select("text", "label.result").show(truncate=False)
import com.johnsnowlabs.nlp.base._
import com.johnsnowlabs.nlp.annotator._
import org.apache.spark.ml.Pipeline
val documentAssembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val tokenizer = new Tokenizer()
.setInputCols(Array("document"))
.setOutputCol("token")
val bfscLoaded = BertForSequenceClassification.pretrained("bert_sequence_classifier_ibd_onnx", "en", "clinical/models")
.setInputCols(Array("document", "token"))
.setOutputCol("label")
val pipeline = new Pipeline().setStages(Array(
documentAssembler,
tokenizer,
bfscLoaded
))
val data = Seq(
("Patient with inflammatory bowel disease and colon inflammation."),
("Normal colonoscopy findings, no evidence of inflammation.")
).toDF("text")
val model = pipeline.fit(data)
val result = model.transform(data)
result.select("text", "label.result").show(false)
Results
+---------------------------------------------------------------+---------+
|text |result |
+---------------------------------------------------------------+---------+
|Patient with inflammatory bowel disease and colon inflammation.|[IBD] |
|Normal colonoscopy findings, no evidence of inflammation. |[Not IBD]|
+---------------------------------------------------------------+---------+
Model Information
| Model Name: | bert_sequence_classifier_ibd_onnx |
| Compatibility: | Healthcare NLP 6.1.0+ |
| License: | Licensed |
| Edition: | Official |
| Input Labels: | [document, token] |
| Output Labels: | [label] |
| Language: | en |
| Size: | 405.5 MB |