com.johnsnowlabs.nlp.annotators.splitter

DocumentSplitterParams

trait DocumentSplitterParams extends Params

A trait that contains all the params that InternalDocumentSplitter has.

See also: InternalDocumentSplitter

Linear Supertypes

Params, Serializable, Serializable, Identifiable, AnyRef, Any

Known Subclasses

InternalDocumentSplitter

Ordering

Grouped
Alphabetic
By Inheritance

Inherited

DocumentSplitterParams
Params
Serializable
Serializable
Identifiable
AnyRef
Any

Hide All
Show All

Visibility

Public
All

Abstract Value Members

abstract def copy(extra: ParamMap): Params

Definition Classes
Params
abstract val uid: String

Definition Classes
Identifiable

Concrete Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def $[T](param: Param[T]): T

Attributes
protected
Definition Classes
Params
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
val caseSensitive: BooleanParam
Whether to use case sensitive when matching regex (Default: false)
final def clear(param: Param[_]): DocumentSplitterParams.this.type

Definition Classes
Params
def clone(): AnyRef

Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws( ... ) @native()
def copyValues[T <: Params](to: T, extra: ParamMap): T

Attributes
protected
Definition Classes
Params
val customBoundsStrategy: Param[String]
Sets the custom bounds strategy for text parsing using regular expressions.
final def defaultCopy[T <: Params](extra: ParamMap): T

Attributes
protected
Definition Classes
Params
val enableSentenceIncrement: BooleanParam
Controls whether the sentence index should be incremented in the metadata of the annotator.
Controls whether the sentence index should be incremented in the metadata of the annotator. When set to true, the annotator will increment the sentence index in the metadata for each split documents. Default: false
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def equals(arg0: Any): Boolean

Definition Classes
AnyRef → Any
def explainParam(param: Param[_]): String

Definition Classes
Params
def explainParams(): String

Definition Classes
Params
final def extractParamMap(): ParamMap

Definition Classes
Params
final def extractParamMap(extra: ParamMap): ParamMap

Definition Classes
Params
def finalize(): Unit

Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def get[T](param: Param[T]): Option[T]

Definition Classes
Params
def getCaseSensitive: Boolean
Gets whether to use case sensitive when matching values (Default: false)
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
Annotations
@native()
def getCustomBoundsStrategy: String
Gets customBoundsStrategy param
final def getDefault[T](param: Param[T]): Option[T]

Definition Classes
Params
def getEnableSentenceIncrement: Boolean
Gets whether the sentence index should be incremented in the metadata of the annotator.
def getMaxLength: Int
Gets maxLength param
def getMetaDataFields: Array[String]
Gets metaDataFields param
final def getOrDefault[T](param: Param[T]): T

Definition Classes
Params
def getParam(paramName: String): Param[Any]

Definition Classes
Params
def getSentenceAwareness: Boolean
Gets sentenceAwareness param
def getSplitMode: String
Gets splitMode param
final def hasDefault[T](param: Param[T]): Boolean

Definition Classes
Params
def hasParam(paramName: String): Boolean

Definition Classes
Params
def hashCode(): Int

Definition Classes
AnyRef → Any
Annotations
@native()
final def isDefined(param: Param[_]): Boolean

Definition Classes
Params
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
final def isSet(param: Param[_]): Boolean

Definition Classes
Params
val maxLength: IntParam
The maximum length for text parsing based on the specified mode.
val metaDataFields: StringArrayParam
Metadata fields to add specified data in columns to the metadata of the split documents.
Metadata fields to add specified data in columns to the metadata of the split documents. You should set column names to read columns. Default: Array.empty
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
Annotations
@native()
final def notifyAll(): Unit

Definition Classes
AnyRef
Annotations
@native()
lazy val params: Array[Param[_]]

Definition Classes
Params
val sentenceAwareness: BooleanParam
Whether to split document by sentence awareness if possible.
Whether to split document by sentence awareness if possible. If true, it can stop the split process before maxLength. If true, You should supply sentences from inputCols. Default: false.
final def set(paramPair: ParamPair[_]): DocumentSplitterParams.this.type

Attributes
protected
Definition Classes
Params
final def set(param: String, value: Any): DocumentSplitterParams.this.type

Attributes
protected
Definition Classes
Params
final def set[T](param: Param[T], value: T): DocumentSplitterParams.this.type

Definition Classes
Params
def setCaseSensitive(value: Boolean): DocumentSplitterParams.this.type
Whether to use case sensitive when matching regex (Default: false)
def setCustomBoundsStrategy(value: String): DocumentSplitterParams.this.type
Sets the custom bounds strategy for text parsing using regular expressions.
Sets the custom bounds strategy for text parsing using regular expressions.
value
The custom bounds strategy to be set. It should be one of the following values:
- "none": No custom bounds are applied.
- "prepend": Custom bounds are prepended to the split documents.
- "append": Custom bounds are appended to the split documents.
- Default: "prepend".
final def setDefault(paramPairs: ParamPair[_]*): DocumentSplitterParams.this.type

Attributes
protected
Definition Classes
Params
final def setDefault[T](param: Param[T], value: T): DocumentSplitterParams.this.type

Attributes
protected[org.apache.spark.ml]
Definition Classes
Params
def setEnableSentenceIncrement(value: Boolean): DocumentSplitterParams.this.type
Controls whether the sentence index should be incremented in the metadata of the annotator.
Controls whether the sentence index should be incremented in the metadata of the annotator. When set to true, the annotator will increment the sentence index in the metadata for each split documents. Default: false
def setMaxLength(value: Int): DocumentSplitterParams.this.type
Sets the maximum length for text parsing based on the specified mode.
def setMetaDataFields(value: Array[String]): DocumentSplitterParams.this.type
Sets metadata fields to add specified data in columns to the metadata of the split documents.
Sets metadata fields to add specified data in columns to the metadata of the split documents. You should set column names to read columns. Default: Array.empty
def setSentenceAwareness(value: Boolean): DocumentSplitterParams.this.type
Sets whether to split document by sentence awareness if possible.
Sets whether to split document by sentence awareness if possible. If true, it can stop the split process before maxLength. If true, You should supply sentences from inputCols. Default: false.
def setSplitMode(value: String): DocumentSplitterParams.this.type
Sets the split mode to determine how text should be segmented.
Sets the split mode to determine how text should be segmented. Default: 'regex'
value
The split mode to be set. It should be one of the following values:
- "char": Split text based on individual characters.
- "token": Split text based on tokens. You should supply tokens from inputCols.
- "sentence": Split text based on sentences. You should supply sentences from inputCols.
- "recursive": Split text recursively using a specific algorithm.
- "regex": Split text based on a regular expression pattern.
val splitMode: Param[String]
The split mode to determine how text should be segmented.
The split mode to determine how text should be segmented. Default: 'regex'
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
def toString(): String

Definition Classes
Identifiable → AnyRef → Any
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... ) @native()

Packages

DocumentSplitterParams

trait DocumentSplitterParams extends Params

Abstract Value Members

Concrete Value Members

Inherited from Params

Inherited from Serializable

Inherited from Serializable

Inherited from Identifiable

Inherited from AnyRef

Inherited from Any

getParam

param

setParam

Ungrouped

Packages

DocumentSplitterParams 

trait DocumentSplitterParams extends Params

Abstract Value Members

Concrete Value Members

Inherited from Params

Inherited from Serializable

Inherited from Serializable

Inherited from Identifiable

Inherited from AnyRef

Inherited from Any

getParam

param

setParam

Ungrouped

DocumentSplitterParams