Description
Detect adverse reactions of drugs in texts excahnged over twitter. This model is trained with the BertForTokenClassification method from the transformers library and imported into Spark NLP.
Predicted Entities
O, B-ADE, I-ADE, PAD
How to use
from sparknlp.base import DocumentAssembler
from sparknlp_jsl.annotator import MedicalBertForTokenClassifier
from sparknlp.annotator import Tokenizer, NerConverter
from pyspark.sql.types import StringType
from pyspark.ml import Pipeline
document_assembler = (
    DocumentAssembler()
    .setInputCol("text")
    .setOutputCol("document")
)
tokenizer = (
    Tokenizer()
    .setInputCols(["document"])
    .setOutputCol("token")
)
token_classifier = (
    MedicalBertForTokenClassifier.pretrained(
        "bert_token_classifier_ade_tweet_binary_onnx",
        "en",
        "clinical/models"
    )
    .setInputCols(["token", "document"])
    .setOutputCol("ner")
    .setCaseSensitive(True)
)
ner_converter = (
    NerConverterInternal()
    .setInputCols(["document", "token", "ner"])
    .setOutputCol("ner_chunk")
)
pipeline = Pipeline(stages=[
    document_assembler,
    tokenizer,
    token_classifier,
    ner_converter
])
data = spark.createDataFrame(
    [
        ("I used to be on paxil but that made me more depressed and prozac made me angry",),
        ("Maybe cos of the insulin blocking effect of seroquel but i do feel sugar crashes when eat fast carbs.",)
    ],
    StringType()
).toDF("text")
model = pipeline.fit(data)
result = model.transform(data)
from johnsnowlabs import nlp, medical
document_assembler = nlp.DocumentAssembler()\
    .setInputCol("text")\
    .setOutputCol("document")
tokenizer = nlp.Tokenizer()\
    .setInputCols(["document"])\
    .setOutputCol("token")
token_classifier = medical.BertForTokenClassifier.pretrained(
        "bert_token_classifier_ade_tweet_binary_onnx",
        "en",
        "clinical/models"
    )\
    .setInputCols(["token", "document"])\
    .setOutputCol("ner")\
    .setCaseSensitive(True)
ner_converter = medical.NerConverterInternal()\
    .setInputCols(["document", "token", "ner"])\
    .setOutputCol("ner_chunk")
pipeline = Pipeline(stages=[
    document_assembler,
    tokenizer,
    token_classifier,
    ner_converter
])
data = spark.createDataFrame(
    [
        ("I used to be on paxil but that made me more depressed and prozac made me angry",),
        ("Maybe cos of the insulin blocking effect of seroquel but i do feel sugar crashes when eat fast carbs.",)
    ],
    StringType()
).toDF("text")
model = pipeline.fit(data)
result = model.transform(data)
import com.johnsnowlabs.nlp.base.DocumentAssembler
import com.johnsnowlabs.nlp.annotators.Tokenizer
import com.johnsnowlabs.nlp.annotators.ner.NerConverter
import com.johnsnowlabs.nlp.annotators.classifier.dl.MedicalBertForTokenClassifier
import org.apache.spark.ml.Pipeline
val documentAssembler = new DocumentAssembler()
  .setInputCol("text")
  .setOutputCol("document")
val tokenizer = new Tokenizer()
  .setInputCols("document")
  .setOutputCol("token")
val tokenClassifier = MedicalBertForTokenClassifier
  .pretrained("bert_token_classifier_ade_tweet_binary_onnx", "en", "clinical/models")
  .setInputCols(Array("token", "document"))
  .setOutputCol("ner")
  .setCaseSensitive(true)
val nerConverter = new  NerConverterInternal()
  .setInputCols(Array("document", "token", "ner"))
  .setOutputCol("ner_chunk")
val pipeline = new Pipeline()
  .setStages(Array(
    documentAssembler,
    tokenizer,
    tokenClassifier,
    nerConverter
  ))
val data = spark.createDataFrame(Seq(
  Tuple1("I used to be on paxil but that made me more depressed and prozac made me angry"),
  Tuple1("Maybe cos of the insulin blocking effect of seroquel but i do feel sugar crashes when eat fast carbs.")
)).toDF("text")
val model = pipeline.fit(data)
val result = model.transform(data)
Results
+----------------+------+
|text            |entity|
+----------------+------+
|depressed       |ADE   |
|angry           |ADE   |
|insulin blocking|ADE   |
|sugar crashes   |ADE   |
+----------------+------+
Model Information
| Model Name: | bert_token_classifier_ade_tweet_binary_onnx | 
| Compatibility: | Healthcare NLP 6.1.1+ | 
| License: | Licensed | 
| Edition: | Official | 
| Input Labels: | [document, token] | 
| Output Labels: | [ner] | 
| Language: | en | 
| Size: | 403.7 MB | 
| Case sensitive: | true | 
| Max sentence length: | 128 |