com.johnsnowlabs.nlp.annotators.assertion.logreg

AssertionLogRegApproach

Companion object AssertionLogRegApproach

class AssertionLogRegApproach extends AnnotatorApproach[AssertionLogRegModel] with Windowing with CheckLicense

This is a classification method, which uses LogisticRegression algorithm Contains all the methods for training a AssertionLogRegModel, together with trainWithChunk, trainWithStartEnd.

Example

Training with Glove Embeddings

First define pipeline stages to extract embeddings and text chunks

 val documentAssembler = new DocumentAssembler()
  .setInputCol("text")
  .setOutputCol("document")

val tokenizer = new Tokenizer()
  .setInputCols("document")
  .setOutputCol("token")

val glove = WordEmbeddingsModel.pretrained("embeddings_clinical", "en", "clinical/models")
  .setInputCols("document", "token")
  .setOutputCol("word_embeddings")
  .setCaseSensitive(false)

val chunkAssembler = new Doc2Chunk()
  .setInputCols("document")
  .setChunkCol("target")
  .setOutputCol("chunk")

Then the AssertionLogRegApproach model is defined. Label column is needed in the dataset for training.

val assertion = new AssertionLogRegApproach()
  .setLabelCol("label")
  .setInputCols("document", "chunk", "word_embeddings")
  .setOutputCol("assertion")
  .setReg(0.01)
  .setBefore(11)
  .setAfter(13)
  .setStartCol("start")
  .setEndCol("end")

val assertionPipeline = new Pipeline().setStages(Array(
  documentAssembler,
  sentenceDetector,
  tokenizer,
  embeddings,
  nerModel,
  nerConverter,
  assertion
))

val assertionModel = assertionPipeline.fit(dataset)

Linear Supertypes

CheckLicense, Windowing, AnnotatorApproach[AssertionLogRegModel], CanBeLazy, DefaultParamsWritable, MLWritable, HasOutputAnnotatorType, HasOutputAnnotationCol, HasInputAnnotationCols, Estimator[AssertionLogRegModel], PipelineStage, Logging, Params, Serializable, Serializable, Identifiable, AnyRef, Any

Ordering

Grouped
Alphabetic
By Inheritance

Inherited

AssertionLogRegApproach
CheckLicense
Windowing
AnnotatorApproach
CanBeLazy
DefaultParamsWritable
MLWritable
HasOutputAnnotatorType
HasOutputAnnotationCol
HasInputAnnotationCols
Estimator
PipelineStage
Logging
Params
Serializable
Serializable
Identifiable
AnyRef
Any

Hide All
Show All

Visibility

Public
All

Instance Constructors

new AssertionLogRegApproach()
new AssertionLogRegApproach(uid: String)
uid
a unique identifier for the instantiated AnnotatorModel

Type Members

type AnnotatorType = String

Definition Classes
HasOutputAnnotatorType
case class VectorizedChunk(vector: Vector, begin: Int, end: Int, sentenceId: Int, chunkId: Int) extends Product with Serializable

Attributes
protected
Definition Classes
Windowing

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def $[T](param: Param[T]): T

Attributes
protected
Definition Classes
Params
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def _fit(dataset: Dataset[_], recursiveStages: Option[PipelineModel]): AssertionLogRegModel

Attributes
protected
Definition Classes
AnnotatorApproach
lazy val after: Int

Definition Classes
AssertionLogRegApproach → Windowing
val afterParam: IntParam
Amount of tokens from the context after the target (Default: 10)
def applyWindow(tokenizedSentence: WordpieceEmbeddingsSentence, s: Int, e: Int, embeddingsDim: Int): Array[Double]

Definition Classes
Windowing
def applyWindowContext(tokenizedSentence: WordpieceEmbeddingsSentence, s: Int, e: Int, embeddingsDim: Int): (Array[Array[Float]], Array[Array[Float]], Array[Array[Float]])

Definition Classes
Windowing
def applyWindowUdf(embeddingsDim: Int): UserDefinedFunction

Definition Classes
Windowing
def applyWindowUdfChunk(embeddingsDim: Int): UserDefinedFunction

Definition Classes
Windowing
final def asInstanceOf[T0]: T0

Definition Classes
Any
lazy val before: Int

Definition Classes
AssertionLogRegApproach → Windowing
val beforeParam: IntParam
Amount of tokens from the context before the target (Default: 10)
def beforeTraining(spark: SparkSession): Unit

Definition Classes
AnnotatorApproach
final def checkSchema(schema: StructType, inputAnnotatorType: String): Boolean

Attributes
protected
Definition Classes
HasInputAnnotationCols
def checkValidEnvironment(spark: Option[SparkSession], scopes: Seq[String]): Unit

Definition Classes
CheckLicense
def checkValidScope(scope: String): Unit

Definition Classes
CheckLicense
def checkValidScopeAndEnvironment(scope: String, spark: Option[SparkSession], checkLp: Boolean): Unit

Definition Classes
CheckLicense
def checkValidScopesAndEnvironment(scopes: Seq[String], spark: Option[SparkSession], checkLp: Boolean): Unit

Definition Classes
CheckLicense
final def clear(param: Param[_]): AssertionLogRegApproach.this.type

Definition Classes
Params
def clone(): AnyRef

Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws( ... ) @native()
final def copy(extra: ParamMap): Estimator[AssertionLogRegModel]

Definition Classes
AnnotatorApproach → Estimator → PipelineStage → Params
def copyValues[T <: Params](to: T, extra: ParamMap): T

Attributes
protected
Definition Classes
Params
final def defaultCopy[T <: Params](extra: ParamMap): T

Attributes
protected
Definition Classes
Params
val description: String

Definition Classes
AssertionLogRegApproach → AnnotatorApproach
val eNetParam: DoubleParam
Elastic net parameter (Default: 0.9)
val endCol: Param[String]
Column that contains the token number for the end of the target
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def explainParam(param: Param[_]): String

Definition Classes
Params
def explainParams(): String

Definition Classes
Params
final def extractParamMap(): ParamMap

Definition Classes
Params
final def extractParamMap(extra: ParamMap): ParamMap

Definition Classes
Params
def finalize(): Unit

Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def fit(dataset: Dataset[_]): AssertionLogRegModel

Definition Classes
AnnotatorApproach → Estimator
def fit(dataset: Dataset[_], paramMaps: Seq[ParamMap]): Seq[AssertionLogRegModel]

Definition Classes
Estimator
Annotations
@Since( "2.0.0" )
def fit(dataset: Dataset[_], paramMap: ParamMap): AssertionLogRegModel

Definition Classes
Estimator
Annotations
@Since( "2.0.0" )
def fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): AssertionLogRegModel

Definition Classes
Estimator
Annotations
@Since( "2.0.0" ) @varargs()
final def get[T](param: Param[T]): Option[T]

Definition Classes
Params
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
Annotations
@native()
final def getDefault[T](param: Param[T]): Option[T]

Definition Classes
Params
def getInputCols: Array[String]

Definition Classes
HasInputAnnotationCols
def getLazyAnnotator: Boolean

Definition Classes
CanBeLazy
final def getOrDefault[T](param: Param[T]): T

Definition Classes
Params
final def getOutputCol: String

Definition Classes
HasOutputAnnotationCol
def getParam(paramName: String): Param[Any]

Definition Classes
Params
final def hasDefault[T](param: Param[T]): Boolean

Definition Classes
Params
def hasParam(paramName: String): Boolean

Definition Classes
Params
def hashCode(): Int

Definition Classes
AnyRef → Any
Annotations
@native()
def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean

Attributes
protected
Definition Classes
Logging
def initializeLogIfNecessary(isInterpreter: Boolean): Unit

Attributes
protected
Definition Classes
Logging
val inputAnnotatorTypes: Array[String]
Input annotator types: DOCUMENT, CHUNK, WORD_EMBEDDINGS
Input annotator types: DOCUMENT, CHUNK, WORD_EMBEDDINGS

Definition Classes
AssertionLogRegApproach → HasInputAnnotationCols
final val inputCols: StringArrayParam

Attributes
protected
Definition Classes
HasInputAnnotationCols
final def isDefined(param: Param[_]): Boolean

Definition Classes
Params
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def isSet(param: Param[_]): Boolean

Definition Classes
Params
def isTraceEnabled(): Boolean

Attributes
protected
Definition Classes
Logging
def l2norm(xs: Array[Double]): Double

Definition Classes
Windowing
val label: Param[String]
Column with one label per document
val lazyAnnotator: BooleanParam

Definition Classes
CanBeLazy
def log: Logger

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logDebug(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logError(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logInfo(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logName: String

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logTrace(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String, throwable: Throwable): Unit

Attributes
protected
Definition Classes
Logging
def logWarning(msg: ⇒ String): Unit

Attributes
protected
Definition Classes
Logging
val maxIter: IntParam
Max number of iterations for algorithm (Default: 26)
def msgHelper(schema: StructType): String

Attributes
protected
Definition Classes
HasInputAnnotationCols
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def normalize(vec: Array[Double]): Array[Double]

Definition Classes
Windowing
final def notify(): Unit

Definition Classes
AnyRef
Annotations
@native()
final def notifyAll(): Unit

Definition Classes
AnyRef
Annotations
@native()
def onTrained(model: AssertionLogRegModel, spark: SparkSession): Unit

Definition Classes
AnnotatorApproach
val optionalInputAnnotatorTypes: Array[String]

Definition Classes
HasInputAnnotationCols
val outputAnnotatorType: AnnotatorType
Output annotator types: ASSERTION
Output annotator types: ASSERTION

Definition Classes
AssertionLogRegApproach → HasOutputAnnotatorType
final val outputCol: Param[String]

Attributes
protected
Definition Classes
HasOutputAnnotationCol
lazy val params: Array[Param[_]]

Definition Classes
Params
val regParam: DoubleParam
Regularization parameter (Default: 0.00192)
def save(path: String): Unit

Definition Classes
MLWritable
Annotations
@Since( "1.6.0" ) @throws( ... )
final def set(paramPair: ParamPair[_]): AssertionLogRegApproach.this.type

Attributes
protected
Definition Classes
Params
final def set(param: String, value: Any): AssertionLogRegApproach.this.type

Attributes
protected
Definition Classes
Params
final def set[T](param: Param[T], value: T): AssertionLogRegApproach.this.type

Definition Classes
Params
def setAfter(a: Int): AssertionLogRegApproach.this.type
Amount of tokens from the context after the target (Default: 10)
def setBefore(b: Int): AssertionLogRegApproach.this.type
Amount of tokens from the context before the target (Default: 10)
final def setDefault(paramPairs: ParamPair[_]*): AssertionLogRegApproach.this.type

Attributes
protected
Definition Classes
Params
final def setDefault[T](param: Param[T], value: T): AssertionLogRegApproach.this.type

Attributes
protected[org.apache.spark.ml]
Definition Classes
Params
def setEndCol(end: String): AssertionLogRegApproach.this.type
Column that contains the token number for the end of the target
def setEnet(enet: Double): AssertionLogRegApproach.this.type
Elastic net parameter (Default: 0.9)
final def setInputCols(value: String*): AssertionLogRegApproach.this.type

Definition Classes
HasInputAnnotationCols
def setInputCols(value: Array[String]): AssertionLogRegApproach.this.type

Definition Classes
HasInputAnnotationCols
def setLabelCol(label: String): AssertionLogRegApproach.this.type
Column with one label per document
def setLazyAnnotator(value: Boolean): AssertionLogRegApproach.this.type

Definition Classes
CanBeLazy
def setMaxIter(max: Int): AssertionLogRegApproach.this.type
Max number of iterations for algorithm (Default: 26)
final def setOutputCol(value: String): AssertionLogRegApproach.this.type

Definition Classes
HasOutputAnnotationCol
def setReg(lambda: Double): AssertionLogRegApproach.this.type
Regularization parameter (Default: 0.00192)
def setStartCol(start: String): AssertionLogRegApproach.this.type
Column that contains the token number for the start of the target
val startCol: Param[String]
Column that contains the token number for the start of the target
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
Identifiable → AnyRef → Any
def tokenIndexToChunkIndex(doc: Array[TokenPieceEmbeddings], start: Int, end: Int): (Int, Int)

Definition Classes
Windowing
def train(dataset: Dataset[_], recursivePipeline: Option[PipelineModel] = None): AssertionLogRegModel
This is a main point of interest of this class.
This is a main point of interest of this class. It trains the dataset with recursive pipeline and uses methods trainWithChunk() and trainwithStartEnd() The choice of training happens based on the startCol value of the DL Approach
dataset
a collection of inputs to train
recursivePipeline
an instance of PipelineModel
returns
an instance of trained AssertionLogRegModel

Definition Classes
AssertionLogRegApproach → AnnotatorApproach
final def transformSchema(schema: StructType): StructType

Definition Classes
AnnotatorApproach → PipelineStage
def transformSchema(schema: StructType, logging: Boolean): StructType

Attributes
protected
Definition Classes
PipelineStage
Annotations
@DeveloperApi()
val uid: String

Definition Classes
AssertionLogRegApproach → Identifiable
def validate(schema: StructType): Boolean

Attributes
protected
Definition Classes
AnnotatorApproach
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... ) @native()
def write: MLWriter

Definition Classes
DefaultParamsWritable → MLWritable

Packages

AssertionLogRegApproach

Companion object AssertionLogRegApproach

class AssertionLogRegApproach extends AnnotatorApproach[AssertionLogRegModel] with Windowing with CheckLicense

Example

Training with Glove Embeddings

Instance Constructors

Type Members

Value Members

Inherited from CheckLicense

Inherited from Windowing

Inherited from AnnotatorApproach[AssertionLogRegModel]

Inherited from CanBeLazy

Inherited from DefaultParamsWritable

Inherited from MLWritable

Inherited from HasOutputAnnotatorType

Inherited from HasOutputAnnotationCol

Inherited from HasInputAnnotationCols

Inherited from Estimator[AssertionLogRegModel]

Inherited from PipelineStage

Inherited from Logging

Inherited from Params

Inherited from Serializable

Inherited from Serializable

Inherited from Identifiable

Inherited from AnyRef

Inherited from Any

Parameters

Annotator types

Members

Parameter setters

Packages

AssertionLogRegApproach 

Companion object AssertionLogRegApproach

class AssertionLogRegApproach extends AnnotatorApproach[AssertionLogRegModel] with Windowing with CheckLicense

Example

Training with Glove Embeddings

Instance Constructors

Type Members

Value Members

Inherited from CheckLicense

Inherited from Windowing

Inherited from AnnotatorApproach[AssertionLogRegModel]

Inherited from CanBeLazy

Inherited from DefaultParamsWritable

Inherited from MLWritable

Inherited from HasOutputAnnotatorType

Inherited from HasOutputAnnotationCol

Inherited from HasInputAnnotationCols

Inherited from Estimator[AssertionLogRegModel]

Inherited from PipelineStage

Inherited from Logging

Inherited from Params

Inherited from Serializable

Inherited from Serializable

Inherited from Identifiable

Inherited from AnyRef

Inherited from Any

Parameters

Annotator types

Members

Parameter setters

AssertionLogRegApproach