class FeaturesAssembler extends Transformer with DefaultParamsWritable with HasOutputAnnotatorType with HasOutputAnnotationCol with HasStorageRef with CheckLicense
The FeaturesAssembler is used to collect features from different columns. It can collect features from single value
columns (anything which can be cast to a float, if casts fails then the value is set to 0), array columns or
SparkNLP annotations (if the annotation is an embedding, it takes the embedding, otherwise tries to cast the
result
field). The output of the transformer is a FEATURE_VECTOR
annotation (the numeric vector is in the
embeddings
field).
- Grouped
- Alphabetic
- By Inheritance
- FeaturesAssembler
- CheckLicense
- HasStorageRef
- ParamsAndFeaturesWritable
- HasFeatures
- HasOutputAnnotationCol
- HasOutputAnnotatorType
- DefaultParamsWritable
- MLWritable
- Transformer
- PipelineStage
- Logging
- Params
- Serializable
- Serializable
- Identifiable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Parameters
-
val
inputCols: StringArrayParam
Input columns containing features
Annotator types
Required input and expected output annotator types
-
val
outputAnnotatorType: AnnotatorType
Output annotator type: FEATURE_VECTOR
Output annotator type: FEATURE_VECTOR
- Definition Classes
- FeaturesAssembler → HasOutputAnnotatorType
Members
-
type
AnnotatorType = String
- Definition Classes
- HasOutputAnnotatorType
-
def
checkValidEnvironment(spark: Option[SparkSession]): Unit
- Definition Classes
- CheckLicense
-
def
checkValidScope(scope: String): Unit
- Definition Classes
- CheckLicense
-
def
checkValidScopeAndEnvironment(scope: String, spark: Option[SparkSession], checkLp: Boolean): Unit
- Definition Classes
- CheckLicense
-
def
checkValidScopesAndEnvironment(scopes: Seq[String], spark: Option[SparkSession], checkLp: Boolean): Unit
- Definition Classes
- CheckLicense
-
final
def
clear(param: Param[_]): FeaturesAssembler.this.type
- Definition Classes
- Params
-
def
copy(extra: ParamMap): Transformer
- Definition Classes
- FeaturesAssembler → Transformer → PipelineStage → Params
-
def
createDatabaseConnection(database: Name): RocksDBConnection
- Definition Classes
- HasStorageRef
-
def
explainParam(param: Param[_]): String
- Definition Classes
- Params
-
def
explainParams(): String
- Definition Classes
- Params
-
final
def
extractParamMap(): ParamMap
- Definition Classes
- Params
-
final
def
extractParamMap(extra: ParamMap): ParamMap
- Definition Classes
- Params
-
val
features: ArrayBuffer[Feature[_, _, _]]
- Definition Classes
- HasFeatures
-
final
def
get[T](param: Param[T]): Option[T]
- Definition Classes
- Params
-
final
def
getDefault[T](param: Param[T]): Option[T]
- Definition Classes
- Params
-
final
def
getOrDefault[T](param: Param[T]): T
- Definition Classes
- Params
-
final
def
getOutputCol: String
- Definition Classes
- HasOutputAnnotationCol
-
def
getParam(paramName: String): Param[Any]
- Definition Classes
- Params
-
def
getStorageRef: String
- Definition Classes
- HasStorageRef
-
final
def
hasDefault[T](param: Param[T]): Boolean
- Definition Classes
- Params
-
def
hasParam(paramName: String): Boolean
- Definition Classes
- Params
-
final
def
isDefined(param: Param[_]): Boolean
- Definition Classes
- Params
-
final
def
isSet(param: Param[_]): Boolean
- Definition Classes
- Params
-
lazy val
params: Array[Param[_]]
- Definition Classes
- Params
-
def
save(path: String): Unit
- Definition Classes
- MLWritable
- Annotations
- @Since( "1.6.0" ) @throws( ... )
-
final
def
set[T](param: Param[T], value: T): FeaturesAssembler.this.type
- Definition Classes
- Params
-
final
def
setOutputCol(value: String): FeaturesAssembler.this.type
- Definition Classes
- HasOutputAnnotationCol
-
def
setStorageRef(value: String): FeaturesAssembler.this.type
- Definition Classes
- HasStorageRef
-
val
storageRef: Param[String]
- Definition Classes
- HasStorageRef
-
def
toString(): String
- Definition Classes
- Identifiable → AnyRef → Any
-
def
transform(dataset: Dataset[_]): DataFrame
- Definition Classes
- FeaturesAssembler → Transformer
-
def
transform(dataset: Dataset[_], paramMap: ParamMap): DataFrame
- Definition Classes
- Transformer
- Annotations
- @Since( "2.0.0" )
-
def
transform(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): DataFrame
- Definition Classes
- Transformer
- Annotations
- @Since( "2.0.0" ) @varargs()
-
final
def
transformSchema(schema: StructType): StructType
requirement for pipeline transformation validation.
requirement for pipeline transformation validation. It is called on fit()
- Definition Classes
- FeaturesAssembler → PipelineStage
-
val
uid: String
- Definition Classes
- FeaturesAssembler → Identifiable
-
def
validateStorageRef(dataset: Dataset[_], inputCols: Array[String], annotatorType: String): Unit
- Definition Classes
- HasStorageRef
-
def
write: MLWriter
- Definition Classes
- ParamsAndFeaturesWritable → DefaultParamsWritable → MLWritable
Parameter setters
-
def
setInputCols(value: Array[String]): FeaturesAssembler.this.type
Input columns containing features
Parameter getters
-
def
getInputCols: Array[String]
Input columns containing features