class LegalClassifierDLApproach extends ClassifierDLApproach
MedicalBertForSequenceClassification can load Bert Models with sequence classification/regression head on top (a linear layer on top of the pooled output) e.g. for multi-class document classification tasks.
Pretrained models can be loaded with pretrained
of the companion object:
val sequenceClassifier = MedicalBertForSequenceClassification.pretrained() .setInputCols("token", "document") .setOutputCol("label")
The default model is "bert_sequence_classifier_ade"
, if no name is provided.
For available pretrained models please see the Models Hub.
Models from the HuggingFace 🤗 Transformers library are also compatible with Spark NLP 🚀. The Spark NLP Workshop example shows how to import them https://github.com/JohnSnowLabs/spark-nlp/discussions/5669.
Example
import spark.implicits._ import com.johnsnowlabs.nlp.base._ import com.johnsnowlabs.nlp.annotator._ import org.apache.spark.ml.Pipeline val documentAssembler = new DocumentAssembler() .setInputCol("text") .setOutputCol("document") val tokenizer = new Tokenizer() .setInputCols("document") .setOutputCol("token") val sequenceClassifier = MedicalBertForSequenceClassification.pretrained() .setInputCols("token", "document") .setOutputCol("label") .setCaseSensitive(true) val pipeline = new Pipeline().setStages(Array( documentAssembler, tokenizer, sequenceClassifier )) val data = Seq("John Lenon was born in London and lived in Paris. My name is Sarah and I live in London").toDF("text") val result = pipeline.fit(data).transform(data) result.select("label.result").show(false) +--------------------+ |result | +--------------------+ |[True, False] | +--------------------+
- See also
MedicalBertForSequenceClassification for sequnece-level classification
Annotators Main Page for a list of transformer based classifiers
- Alphabetic
- By Inheritance
- LegalClassifierDLApproach
- ClassifierDLApproach
- ClassifierEncoder
- EvaluationDLParams
- ParamsAndFeaturesWritable
- HasFeatures
- AnnotatorApproach
- CanBeLazy
- DefaultParamsWritable
- MLWritable
- HasOutputAnnotatorType
- HasOutputAnnotationCol
- HasInputAnnotationCols
- Estimator
- PipelineStage
- Logging
- Params
- Serializable
- Serializable
- Identifiable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
Type Members
-
type
AnnotatorType = String
- Definition Classes
- HasOutputAnnotatorType
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
$[T](param: Param[T]): T
- Attributes
- protected
- Definition Classes
- Params
-
def
$$[T](feature: StructFeature[T]): T
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
$$[K, V](feature: MapFeature[K, V]): Map[K, V]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
$$[T](feature: SetFeature[T]): Set[T]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
$$[T](feature: ArrayFeature[T]): Array[T]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
_fit(dataset: Dataset[_], recursiveStages: Option[PipelineModel]): ClassifierDLModel
- Attributes
- protected
- Definition Classes
- AnnotatorApproach
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
val
batchSize: IntParam
- Definition Classes
- ClassifierEncoder
-
def
beforeTraining(spark: SparkSession): Unit
- Definition Classes
- AnnotatorApproach
-
def
buildDatasetWithLabels(dataset: Dataset[_], inputCols: String): (DataFrame, Array[String])
- Attributes
- protected
- Definition Classes
- ClassifierEncoder
-
final
def
checkSchema(schema: StructType, inputAnnotatorType: String): Boolean
- Attributes
- protected
- Definition Classes
- HasInputAnnotationCols
-
final
def
clear(param: Param[_]): LegalClassifierDLApproach.this.type
- Definition Classes
- Params
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
val
configProtoBytes: IntArrayParam
- Definition Classes
- ClassifierEncoder
-
final
def
copy(extra: ParamMap): Estimator[ClassifierDLModel]
- Definition Classes
- AnnotatorApproach → Estimator → PipelineStage → Params
-
def
copyValues[T <: Params](to: T, extra: ParamMap): T
- Attributes
- protected
- Definition Classes
- Params
-
final
def
defaultCopy[T <: Params](extra: ParamMap): T
- Attributes
- protected
- Definition Classes
- Params
-
val
description: String
- Definition Classes
- ClassifierDLApproach → AnnotatorApproach
-
val
dropout: FloatParam
- Definition Classes
- ClassifierDLApproach
-
val
enableOutputLogs: BooleanParam
- Definition Classes
- EvaluationDLParams
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
val
evaluationLogExtended: BooleanParam
- Definition Classes
- EvaluationDLParams
-
def
explainParam(param: Param[_]): String
- Definition Classes
- Params
-
def
explainParams(): String
- Definition Classes
- Params
-
def
extractInputs(encoder: ClassifierDatasetEncoder, dataframe: DataFrame): (Array[Array[Float]], Array[String])
- Attributes
- protected
- Definition Classes
- ClassifierEncoder
-
final
def
extractParamMap(): ParamMap
- Definition Classes
- Params
-
final
def
extractParamMap(extra: ParamMap): ParamMap
- Definition Classes
- Params
-
val
features: ArrayBuffer[Feature[_, _, _]]
- Definition Classes
- HasFeatures
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
final
def
fit(dataset: Dataset[_]): ClassifierDLModel
- Definition Classes
- AnnotatorApproach → Estimator
-
def
fit(dataset: Dataset[_], paramMaps: Seq[ParamMap]): Seq[ClassifierDLModel]
- Definition Classes
- Estimator
- Annotations
- @Since( "2.0.0" )
-
def
fit(dataset: Dataset[_], paramMap: ParamMap): ClassifierDLModel
- Definition Classes
- Estimator
- Annotations
- @Since( "2.0.0" )
-
def
fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): ClassifierDLModel
- Definition Classes
- Estimator
- Annotations
- @Since( "2.0.0" ) @varargs()
-
def
get[T](feature: StructFeature[T]): Option[T]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
get[K, V](feature: MapFeature[K, V]): Option[Map[K, V]]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
get[T](feature: SetFeature[T]): Option[Set[T]]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
get[T](feature: ArrayFeature[T]): Option[Array[T]]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
final
def
get[T](param: Param[T]): Option[T]
- Definition Classes
- Params
-
def
getBatchSize: Int
- Definition Classes
- ClassifierEncoder
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getConfigProtoBytes: Option[Array[Byte]]
- Definition Classes
- ClassifierEncoder
-
final
def
getDefault[T](param: Param[T]): Option[T]
- Definition Classes
- Params
-
def
getDropout: Float
- Definition Classes
- ClassifierDLApproach
-
def
getEnableOutputLogs: Boolean
- Definition Classes
- EvaluationDLParams
-
def
getInputCols: Array[String]
- Definition Classes
- HasInputAnnotationCols
-
def
getLabelColumn: String
- Definition Classes
- ClassifierEncoder
-
def
getLazyAnnotator: Boolean
- Definition Classes
- CanBeLazy
-
def
getLr: Float
- Definition Classes
- ClassifierEncoder
-
def
getMaxEpochs: Int
- Definition Classes
- ClassifierEncoder
-
final
def
getOrDefault[T](param: Param[T]): T
- Definition Classes
- Params
-
final
def
getOutputCol: String
- Definition Classes
- HasOutputAnnotationCol
-
def
getOutputLogsPath: String
- Definition Classes
- EvaluationDLParams
-
def
getParam(paramName: String): Param[Any]
- Definition Classes
- Params
-
def
getRandomSeed: Int
- Definition Classes
- ClassifierEncoder
-
def
getValidationSplit: Float
- Definition Classes
- EvaluationDLParams
-
final
def
hasDefault[T](param: Param[T]): Boolean
- Definition Classes
- Params
-
def
hasParam(paramName: String): Boolean
- Definition Classes
- Params
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
val
inputAnnotatorTypes: Array[AnnotatorType]
- Definition Classes
- ClassifierDLApproach → HasInputAnnotationCols
-
final
val
inputCols: StringArrayParam
- Attributes
- protected
- Definition Classes
- HasInputAnnotationCols
-
final
def
isDefined(param: Param[_]): Boolean
- Definition Classes
- Params
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
final
def
isSet(param: Param[_]): Boolean
- Definition Classes
- Params
-
def
isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
val
labelColumn: Param[String]
- Definition Classes
- ClassifierEncoder
-
val
lazyAnnotator: BooleanParam
- Definition Classes
- CanBeLazy
-
def
loadSavedModel(): TensorflowWrapper
- Definition Classes
- ClassifierDLApproach
-
def
log: Logger
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logName: String
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
val
lr: FloatParam
- Definition Classes
- ClassifierEncoder
-
val
maxEpochs: IntParam
- Definition Classes
- ClassifierEncoder
-
def
msgHelper(schema: StructType): String
- Attributes
- protected
- Definition Classes
- HasInputAnnotationCols
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
onTrained(model: ClassifierDLModel, spark: SparkSession): Unit
- Definition Classes
- AnnotatorApproach
-
def
onWrite(path: String, spark: SparkSession): Unit
- Attributes
- protected
- Definition Classes
- ParamsAndFeaturesWritable
-
val
optionalInputAnnotatorTypes: Array[String]
- Definition Classes
- HasInputAnnotationCols
-
val
outputAnnotatorType: String
- Definition Classes
- ClassifierDLApproach → HasOutputAnnotatorType
-
final
val
outputCol: Param[String]
- Attributes
- protected
- Definition Classes
- HasOutputAnnotationCol
-
val
outputLogsPath: Param[String]
- Definition Classes
- EvaluationDLParams
-
lazy val
params: Array[Param[_]]
- Definition Classes
- Params
-
val
randomSeed: IntParam
- Definition Classes
- ClassifierEncoder
-
def
save(path: String): Unit
- Definition Classes
- MLWritable
- Annotations
- @Since( "1.6.0" ) @throws( ... )
-
def
set[T](feature: StructFeature[T], value: T): LegalClassifierDLApproach.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
set[K, V](feature: MapFeature[K, V], value: Map[K, V]): LegalClassifierDLApproach.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
set[T](feature: SetFeature[T], value: Set[T]): LegalClassifierDLApproach.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
set[T](feature: ArrayFeature[T], value: Array[T]): LegalClassifierDLApproach.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
final
def
set(paramPair: ParamPair[_]): LegalClassifierDLApproach.this.type
- Attributes
- protected
- Definition Classes
- Params
-
final
def
set(param: String, value: Any): LegalClassifierDLApproach.this.type
- Attributes
- protected
- Definition Classes
- Params
-
final
def
set[T](param: Param[T], value: T): LegalClassifierDLApproach.this.type
- Definition Classes
- Params
-
def
setBatchSize(batch: Int): LegalClassifierDLApproach.this.type
- Definition Classes
- ClassifierEncoder
-
def
setConfigProtoBytes(bytes: Array[Int]): LegalClassifierDLApproach.this.type
- Definition Classes
- ClassifierEncoder
-
def
setDefault[T](feature: StructFeature[T], value: () ⇒ T): LegalClassifierDLApproach.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
setDefault[K, V](feature: MapFeature[K, V], value: () ⇒ Map[K, V]): LegalClassifierDLApproach.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
setDefault[T](feature: SetFeature[T], value: () ⇒ Set[T]): LegalClassifierDLApproach.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
setDefault[T](feature: ArrayFeature[T], value: () ⇒ Array[T]): LegalClassifierDLApproach.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
final
def
setDefault(paramPairs: ParamPair[_]*): LegalClassifierDLApproach.this.type
- Attributes
- protected
- Definition Classes
- Params
-
final
def
setDefault[T](param: Param[T], value: T): LegalClassifierDLApproach.this.type
- Attributes
- protected[org.apache.spark.ml]
- Definition Classes
- Params
-
def
setDropout(dropout: Float): LegalClassifierDLApproach.this.type
- Definition Classes
- ClassifierDLApproach
-
def
setEnableOutputLogs(enableOutputLogs: Boolean): LegalClassifierDLApproach.this.type
- Definition Classes
- EvaluationDLParams
-
def
setEvaluationLogExtended(evaluationLogExtended: Boolean): LegalClassifierDLApproach.this.type
- Definition Classes
- EvaluationDLParams
-
final
def
setInputCols(value: String*): LegalClassifierDLApproach.this.type
- Definition Classes
- HasInputAnnotationCols
-
def
setInputCols(value: Array[String]): LegalClassifierDLApproach.this.type
- Definition Classes
- HasInputAnnotationCols
-
def
setLabelColumn(column: String): LegalClassifierDLApproach.this.type
- Definition Classes
- ClassifierEncoder
-
def
setLazyAnnotator(value: Boolean): LegalClassifierDLApproach.this.type
- Definition Classes
- CanBeLazy
-
def
setLr(lr: Float): LegalClassifierDLApproach.this.type
- Definition Classes
- ClassifierEncoder
-
def
setMaxEpochs(epochs: Int): LegalClassifierDLApproach.this.type
- Definition Classes
- ClassifierEncoder
-
final
def
setOutputCol(value: String): LegalClassifierDLApproach.this.type
- Definition Classes
- HasOutputAnnotationCol
-
def
setOutputLogsPath(path: String): LegalClassifierDLApproach.this.type
- Definition Classes
- EvaluationDLParams
-
def
setRandomSeed(seed: Int): LegalClassifierDLApproach.this.type
- Definition Classes
- ClassifierEncoder
-
def
setTestDataset(er: ExternalResource): LegalClassifierDLApproach.this.type
- Definition Classes
- EvaluationDLParams
-
def
setTestDataset(path: String, readAs: Format, options: Map[String, String]): LegalClassifierDLApproach.this.type
- Definition Classes
- EvaluationDLParams
-
def
setValidationSplit(validationSplit: Float): LegalClassifierDLApproach.this.type
- Definition Classes
- EvaluationDLParams
-
def
setVerbose(verbose: Level): LegalClassifierDLApproach.this.type
- Definition Classes
- EvaluationDLParams
-
def
setVerbose(verbose: Int): LegalClassifierDLApproach.this.type
- Definition Classes
- EvaluationDLParams
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
val
testDataset: ExternalResourceParam
- Definition Classes
- EvaluationDLParams
-
def
toString(): String
- Definition Classes
- Identifiable → AnyRef → Any
-
def
train(dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): LegalClassifierDLModel
- Definition Classes
- LegalClassifierDLApproach → ClassifierDLApproach → AnnotatorApproach
-
final
def
transformSchema(schema: StructType): StructType
- Definition Classes
- AnnotatorApproach → PipelineStage
-
def
transformSchema(schema: StructType, logging: Boolean): StructType
- Attributes
- protected
- Definition Classes
- PipelineStage
- Annotations
- @DeveloperApi()
-
val
uid: String
- Definition Classes
- LegalClassifierDLApproach → ClassifierDLApproach → Identifiable
-
def
validate(schema: StructType): Boolean
- Attributes
- protected
- Definition Classes
- AnnotatorApproach
-
val
validationSplit: FloatParam
- Definition Classes
- EvaluationDLParams
-
val
verbose: IntParam
- Definition Classes
- EvaluationDLParams
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
write: MLWriter
- Definition Classes
- ParamsAndFeaturesWritable → DefaultParamsWritable → MLWritable