trait DocumentSplitterParams extends Params
A trait that contains all the params that InternalDocumentSplitter has.
- See also
- Grouped
- Alphabetic
- By Inheritance
- DocumentSplitterParams
- Params
- Serializable
- Serializable
- Identifiable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Abstract Value Members
Concrete Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
$[T](param: Param[T]): T
- Attributes
- protected
- Definition Classes
- Params
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
val
caseSensitive: BooleanParam
Whether to use case sensitive when matching regex (Default: false)
-
final
def
clear(param: Param[_]): DocumentSplitterParams.this.type
- Definition Classes
- Params
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
copyValues[T <: Params](to: T, extra: ParamMap): T
- Attributes
- protected
- Definition Classes
- Params
-
val
customBoundsStrategy: Param[String]
Sets the custom bounds strategy for text parsing using regular expressions.
-
final
def
defaultCopy[T <: Params](extra: ParamMap): T
- Attributes
- protected
- Definition Classes
- Params
-
val
enableSentenceIncrement: BooleanParam
Controls whether the sentence index should be incremented in the metadata of the annotator.
Controls whether the sentence index should be incremented in the metadata of the annotator. When set to true, the annotator will increment the sentence index in the metadata for each split documents. Default: false
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
explainParam(param: Param[_]): String
- Definition Classes
- Params
-
def
explainParams(): String
- Definition Classes
- Params
-
final
def
extractParamMap(): ParamMap
- Definition Classes
- Params
-
final
def
extractParamMap(extra: ParamMap): ParamMap
- Definition Classes
- Params
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
final
def
get[T](param: Param[T]): Option[T]
- Definition Classes
- Params
-
def
getCaseSensitive: Boolean
Gets whether to use case sensitive when matching values (Default: false)
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getCustomBoundsStrategy: String
Gets customBoundsStrategy param
-
final
def
getDefault[T](param: Param[T]): Option[T]
- Definition Classes
- Params
-
def
getEnableSentenceIncrement: Boolean
Gets whether the sentence index should be incremented in the metadata of the annotator.
-
def
getMaxLength: Int
Gets maxLength param
-
def
getMetaDataFields: Array[String]
Gets metaDataFields param
-
final
def
getOrDefault[T](param: Param[T]): T
- Definition Classes
- Params
-
def
getParam(paramName: String): Param[Any]
- Definition Classes
- Params
-
def
getSentenceAwareness: Boolean
Gets sentenceAwareness param
-
def
getSplitMode: String
Gets splitMode param
-
final
def
hasDefault[T](param: Param[T]): Boolean
- Definition Classes
- Params
-
def
hasParam(paramName: String): Boolean
- Definition Classes
- Params
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isDefined(param: Param[_]): Boolean
- Definition Classes
- Params
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
final
def
isSet(param: Param[_]): Boolean
- Definition Classes
- Params
-
val
maxLength: IntParam
The maximum length for text parsing based on the specified mode.
-
val
metaDataFields: StringArrayParam
Metadata fields to add specified data in columns to the metadata of the split documents.
Metadata fields to add specified data in columns to the metadata of the split documents. You should set column names to read columns. Default: Array.empty
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
lazy val
params: Array[Param[_]]
- Definition Classes
- Params
-
val
sentenceAwareness: BooleanParam
Whether to split document by sentence awareness if possible.
Whether to split document by sentence awareness if possible. If true, it can stop the split process before maxLength. If true, You should supply sentences from inputCols. Default: false.
-
final
def
set(paramPair: ParamPair[_]): DocumentSplitterParams.this.type
- Attributes
- protected
- Definition Classes
- Params
-
final
def
set(param: String, value: Any): DocumentSplitterParams.this.type
- Attributes
- protected
- Definition Classes
- Params
-
final
def
set[T](param: Param[T], value: T): DocumentSplitterParams.this.type
- Definition Classes
- Params
-
def
setCaseSensitive(value: Boolean): DocumentSplitterParams.this.type
Whether to use case sensitive when matching regex (Default: false)
-
def
setCustomBoundsStrategy(value: String): DocumentSplitterParams.this.type
Sets the custom bounds strategy for text parsing using regular expressions.
Sets the custom bounds strategy for text parsing using regular expressions.
- value
The custom bounds strategy to be set. It should be one of the following values:
- "none": No custom bounds are applied.
- "prepend": Custom bounds are prepended to the split documents.
- "append": Custom bounds are appended to the split documents.
- Default: "prepend".
-
final
def
setDefault(paramPairs: ParamPair[_]*): DocumentSplitterParams.this.type
- Attributes
- protected
- Definition Classes
- Params
-
final
def
setDefault[T](param: Param[T], value: T): DocumentSplitterParams.this.type
- Attributes
- protected[org.apache.spark.ml]
- Definition Classes
- Params
-
def
setEnableSentenceIncrement(value: Boolean): DocumentSplitterParams.this.type
Controls whether the sentence index should be incremented in the metadata of the annotator.
Controls whether the sentence index should be incremented in the metadata of the annotator. When set to true, the annotator will increment the sentence index in the metadata for each split documents. Default: false
-
def
setMaxLength(value: Int): DocumentSplitterParams.this.type
Sets the maximum length for text parsing based on the specified mode.
-
def
setMetaDataFields(value: Array[String]): DocumentSplitterParams.this.type
Sets metadata fields to add specified data in columns to the metadata of the split documents.
Sets metadata fields to add specified data in columns to the metadata of the split documents. You should set column names to read columns. Default: Array.empty
-
def
setSentenceAwareness(value: Boolean): DocumentSplitterParams.this.type
Sets whether to split document by sentence awareness if possible.
Sets whether to split document by sentence awareness if possible. If true, it can stop the split process before maxLength. If true, You should supply sentences from inputCols. Default: false.
-
def
setSplitMode(value: String): DocumentSplitterParams.this.type
Sets the split mode to determine how text should be segmented.
Sets the split mode to determine how text should be segmented. Default: 'regex'
- value
The split mode to be set. It should be one of the following values:
- "char": Split text based on individual characters.
- "token": Split text based on tokens. You should supply tokens from inputCols.
- "sentence": Split text based on sentences. You should supply sentences from inputCols.
- "recursive": Split text recursively using a specific algorithm.
- "regex": Split text based on a regular expression pattern.
-
val
splitMode: Param[String]
The split mode to determine how text should be segmented.
The split mode to determine how text should be segmented. Default: 'regex'
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- Identifiable → AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()