Detect Adverse Drug Events (MedicalBertForTokenClassification - ONNX)

Description

Detect adverse reactions of drugs in texts excahnged over twitter. This model is trained with the BertForTokenClassification method from the transformers library and imported into Spark NLP.

Predicted Entities

O, B-ADE, I-ADE, PAD

Copy S3 URI

How to use

from sparknlp.base import DocumentAssembler
from sparknlp_jsl.annotator import MedicalBertForTokenClassifier
from sparknlp.annotator import Tokenizer, NerConverter
from pyspark.sql.types import StringType
from pyspark.ml import Pipeline

document_assembler = (
    DocumentAssembler()
    .setInputCol("text")
    .setOutputCol("document")
)

tokenizer = (
    Tokenizer()
    .setInputCols(["document"])
    .setOutputCol("token")
)

token_classifier = (
    MedicalBertForTokenClassifier.pretrained(
        "bert_token_classifier_ade_tweet_binary_onnx",
        "en",
        "clinical/models"
    )
    .setInputCols(["token", "document"])
    .setOutputCol("ner")
    .setCaseSensitive(True)
)

ner_converter = (
    NerConverterInternal()
    .setInputCols(["document", "token", "ner"])
    .setOutputCol("ner_chunk")
)

pipeline = Pipeline(stages=[
    document_assembler,
    tokenizer,
    token_classifier,
    ner_converter
])

data = spark.createDataFrame(
    [
        ("I used to be on paxil but that made me more depressed and prozac made me angry",),
        ("Maybe cos of the insulin blocking effect of seroquel but i do feel sugar crashes when eat fast carbs.",)
    ],
    StringType()
).toDF("text")

model = pipeline.fit(data)
result = model.transform(data)

from johnsnowlabs import nlp, medical

document_assembler = nlp.DocumentAssembler()\
    .setInputCol("text")\
    .setOutputCol("document")


tokenizer = nlp.Tokenizer()\
    .setInputCols(["document"])\
    .setOutputCol("token")


token_classifier = medical.BertForTokenClassifier.pretrained(
        "bert_token_classifier_ade_tweet_binary_onnx",
        "en",
        "clinical/models"
    )\
    .setInputCols(["token", "document"])\
    .setOutputCol("ner")\
    .setCaseSensitive(True)


ner_converter = medical.NerConverterInternal()\
    .setInputCols(["document", "token", "ner"])\
    .setOutputCol("ner_chunk")


pipeline = Pipeline(stages=[
    document_assembler,
    tokenizer,
    token_classifier,
    ner_converter
])

data = spark.createDataFrame(
    [
        ("I used to be on paxil but that made me more depressed and prozac made me angry",),
        ("Maybe cos of the insulin blocking effect of seroquel but i do feel sugar crashes when eat fast carbs.",)
    ],
    StringType()
).toDF("text")

model = pipeline.fit(data)
result = model.transform(data)

import com.johnsnowlabs.nlp.base.DocumentAssembler
import com.johnsnowlabs.nlp.annotators.Tokenizer
import com.johnsnowlabs.nlp.annotators.ner.NerConverter
import com.johnsnowlabs.nlp.annotators.classifier.dl.MedicalBertForTokenClassifier
import org.apache.spark.ml.Pipeline

val documentAssembler = new DocumentAssembler()
  .setInputCol("text")
  .setOutputCol("document")

val tokenizer = new Tokenizer()
  .setInputCols("document")
  .setOutputCol("token")

val tokenClassifier = MedicalBertForTokenClassifier
  .pretrained("bert_token_classifier_ade_tweet_binary_onnx", "en", "clinical/models")
  .setInputCols(Array("token", "document"))
  .setOutputCol("ner")
  .setCaseSensitive(true)

val nerConverter = new  NerConverterInternal()
  .setInputCols(Array("document", "token", "ner"))
  .setOutputCol("ner_chunk")

val pipeline = new Pipeline()
  .setStages(Array(
    documentAssembler,
    tokenizer,
    tokenClassifier,
    nerConverter
  ))

val data = spark.createDataFrame(Seq(
  Tuple1("I used to be on paxil but that made me more depressed and prozac made me angry"),
  Tuple1("Maybe cos of the insulin blocking effect of seroquel but i do feel sugar crashes when eat fast carbs.")
)).toDF("text")

val model = pipeline.fit(data)
val result = model.transform(data)

Results


+----------------+------+
|text            |entity|
+----------------+------+
|depressed       |ADE   |
|angry           |ADE   |
|insulin blocking|ADE   |
|sugar crashes   |ADE   |
+----------------+------+

Model Information

Model Name: bert_token_classifier_ade_tweet_binary_onnx
Compatibility: Healthcare NLP 6.1.1+
License: Licensed
Edition: Official
Input Labels: [document, token]
Output Labels: [ner]
Language: en
Size: 403.7 MB
Case sensitive: true
Max sentence length: 128