BioClinicalBERT IBD Classifier (ONNX)

Description

The model classifies documents as either IBD or Not IBD. It is designed to distinguish between documents that are likely related to patients with inflammatory bowel disease (IBD) and those that are not suggestive of IBD.

Copy S3 URI

How to use

from sparknlp.base import DocumentAssembler
from sparknlp_jsl.annotator import Tokenizer, BertForSequenceClassification
from pyspark.ml import Pipeline

document_assembler = DocumentAssembler() \
    .setInputCol("text") \
    .setOutputCol("document")

tokenizer = Tokenizer() \
    .setInputCols(["document"]) \
    .setOutputCol("token")

bfsc_loaded = BertForSequenceClassification.pretrained("bert_sequence_classifier_ibd_onnx", "en", "clinical/models") \
    .setInputCols(['document', 'token']) \
    .setOutputCol("label")

pipeline = Pipeline(stages=[
    document_assembler,
    tokenizer,
    bfsc_loaded
])

data = spark.createDataFrame([
    ["Patient with inflammatory bowel disease and colon inflammation."],
    ["Normal colonoscopy findings, no evidence of inflammation."],
]).toDF("text")

model = pipeline.fit(data)
result = model.transform(data)

result.select("text", "label.result").show(truncate=False)

from johnsnowlabs import nlp, medical

document_assembler = nlp.DocumentAssembler() \
    .setInputCol("text") \
    .setOutputCol("document")

tokenizer = nlp.Tokenizer() \
    .setInputCols(["document"]) \
    .setOutputCol("token")

bfsc_loaded = nlp.BertForSequenceClassification.pretrained("bert_sequence_classifier_ibd_onnx", "en", "clinical/models") \
    .setInputCols(['document', 'token']) \
    .setOutputCol("label")

pipeline = nlp.Pipeline(stages=[
    document_assembler,
    tokenizer,
    bfsc_loaded
])

data = spark.createDataFrame([
    ["Patient with inflammatory bowel disease and colon inflammation."],
    ["Normal colonoscopy findings, no evidence of inflammation."],
]).toDF("text")

model = pipeline.fit(data)
result = model.transform(data)

result.select("text", "label.result").show(truncate=False)

import com.johnsnowlabs.nlp.base._
import com.johnsnowlabs.nlp.annotator._
import org.apache.spark.ml.Pipeline

val documentAssembler = new DocumentAssembler()
  .setInputCol("text")
  .setOutputCol("document")

val tokenizer = new Tokenizer()
  .setInputCols(Array("document"))
  .setOutputCol("token")

val bfscLoaded = BertForSequenceClassification.pretrained("bert_sequence_classifier_ibd_onnx", "en", "clinical/models")
  .setInputCols(Array("document", "token"))
  .setOutputCol("label")

val pipeline = new Pipeline().setStages(Array(
  documentAssembler,
  tokenizer,
  bfscLoaded
))

val data = Seq(
  ("Patient with inflammatory bowel disease and colon inflammation."),
  ("Normal colonoscopy findings, no evidence of inflammation.")
).toDF("text")

val model = pipeline.fit(data)
val result = model.transform(data)

result.select("text", "label.result").show(false)

Results


+---------------------------------------------------------------+---------+
|text                                                           |result   |
+---------------------------------------------------------------+---------+
|Patient with inflammatory bowel disease and colon inflammation.|[IBD]    |
|Normal colonoscopy findings, no evidence of inflammation.      |[Not IBD]|
+---------------------------------------------------------------+---------+

Model Information

Model Name: bert_sequence_classifier_ibd_onnx
Compatibility: Healthcare NLP 6.1.0+
License: Licensed
Edition: Official
Input Labels: [document, token]
Output Labels: [label]
Language: en
Size: 405.5 MB