Description
This ZeroShot model is trained to extract and link entities in a document. Users needs to define entitit labels or an input schema as explained in the example section.
How to use
document_assembler = DocumentAssembler()\
.setInputCol("text")\
.setOutputCol("document")
zero_shot = PretrainedZeroShotMultiTask.pretrained("zeroshot_multitask_generic", "en", "clinical/models")\
.setInputCols(["document"])\
.setOutputCol("extractions")\
.setEntityThreshold(0.4)\
.setEntities(["problem", "treatment", "test", "body_part", "drug_dosage", "severity", "date"])\
.setStructures([
("problem_info", [
"text::str::medical condition, symptom, or diagnosis",
"assertion::[present|absent|hypothetical|possible|conditional|associated_with_someone_else]",
]),
("treatment_info", [
"text::str::drug, medication, procedure, or therapy",
"assertion::[present|absent|hypothetical|possible|conditional|associated_with_someone_else]",
]),
])\
.setClassifications([
("cancer_type", ["lung", "breast", "colorectal", "other", "not_cancer"]),
("urgency", ["routine", "urgent", "emergent"]),
])\
.setRelations([
"treatment_improves_problem",
"treatment_causes_problem",
"treatment_administered_for_problem",
"test_reveals_problem",
"test_conducted_for_problem",
])
pipeline = Pipeline(
stages = [
document_assembler,
zero_shot
])
text = f"""
Jennifer Smith is 28-year-old female with a history of gestational diabetes mellitus diagnosed eight years prior to presentation and subsequent type two diabetes mellitus (T2DM), one prior episode of HTG-induced pancreatitis three years prior to presentation, and associated with an acute hepatitis, presented with a one-week history of polyuria, poor appetite, and vomiting.
"""
data = spark.createDataFrame([[text]]).toDF("text")
results = pipeline.fit(data).transform(data)
results.select("completions").show(truncate=False)
from johnsnowlabs import nlp, medical
document_assembler = nlp.DocumentAssembler()\
.setInputCol("text")\
.setOutputCol("document")
zero_shot = medical.PretrainedZeroShotMultiTask.pretrained("zeroshot_multitask_generic", "en", "clinical/models")\
.setInputCols(["document"])\
.setOutputCol("extractions")\
.setEntityThreshold(0.4)\
.setEntities(["problem", "treatment", "test", "body_part", "drug_dosage", "severity", "date"])\
.setStructures([
("problem_info", [
"text::str::medical condition, symptom, or diagnosis",
"assertion::[present|absent|hypothetical|possible|conditional|associated_with_someone_else]",
]),
("treatment_info", [
"text::str::drug, medication, procedure, or therapy",
"assertion::[present|absent|hypothetical|possible|conditional|associated_with_someone_else]",
]),
])\
.setClassifications([
("cancer_type", ["lung", "breast", "colorectal", "other", "not_cancer"]),
("urgency", ["routine", "urgent", "emergent"]),
])\
.setRelations([
"treatment_improves_problem",
"treatment_causes_problem",
"treatment_administered_for_problem",
"test_reveals_problem",
"test_conducted_for_problem",
])
pipeline = nlp.Pipeline(
stages = [
document_assembler,
zero_shot
])
text = f"""
Jennifer Smith is 28-year-old female with a history of gestational diabetes mellitus diagnosed eight years prior to presentation and subsequent type two diabetes mellitus (T2DM), one prior episode of HTG-induced pancreatitis three years prior to presentation, and associated with an acute hepatitis, presented with a one-week history of polyuria, poor appetite, and vomiting.
"""
data = spark.createDataFrame([[text]]).toDF("text")
results = pipeline.fit(data).transform(data)
results.select("completions").show(truncate=False)
val document_assembler = new DocumentAssembler()
.setInputCol("text")
.setOutputCol("document")
val zero_shot = PretrainedZeroShotMultiTask.pretrained("zeroshot_multitask_generic", "en", "clinical/models")
.setInputCols("document")
.setOutputCol("extractions")
.setEntityThreshold(0.4)
.setEntities(Array("problem", "treatment", "test", "body_part", "drug_dosage", "severity", "date"))
.setStructures(Array(
("problem_info", Array(
"text::str::medical condition, symptom, or diagnosis",
"assertion::[present|absent|hypothetical|possible|conditional|associated_with_someone_else]"
)),
("treatment_info", Array(
"text::str::drug, medication, procedure, or therapy",
"assertion::[present|absent|hypothetical|possible|conditional|associated_with_someone_else]"
))
))
.setClassifications(Array(
("cancer_type", Array("lung", "breast", "colorectal", "other", "not_cancer")),
("urgency", Array("routine", "urgent", "emergent"))
))
.setRelations(Array(
"treatment_improves_problem",
"treatment_causes_problem",
"treatment_administered_for_problem",
"test_reveals_problem",
"test_conducted_for_problem"
))
val pipeline = new Pipeline().setStages(Array(
document_assembler,
zero_shot
))
val text = f"""
Jennifer Smith is 28-year-old female with a history of gestational diabetes mellitus diagnosed eight years prior to presentation and subsequent type two diabetes mellitus (T2DM), one prior episode of HTG-induced pancreatitis three years prior to presentation, and associated with an acute hepatitis, presented with a one-week history of polyuria, poor appetite, and vomiting.
"""
val data = Seq(text).toDF("text")
val results = pipeline.fit(data).transform(data)
Results
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
| extractions|
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|[{chunk, 62, 64, $99, {sentence -> 0, entity -> drug_dosage, confidence -> 0.54619294}, []}, {category, 0, 91, other, {sentence -> 0, confidence -> 0.4580742, task -> cancer_typ...|
+------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
Model Information
| Model Name: | zeroshot_multitask_generic |
| Compatibility: | Healthcare NLP 6.3.0+ |
| License: | Licensed |
| Edition: | Official |
| Language: | en |
| Size: | 844.4 MB |