trait ChunkMapperFuzzyMatchingParams extends Params
- Alphabetic
- By Inheritance
- ChunkMapperFuzzyMatchingParams
- Params
- Serializable
- Serializable
- Identifiable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Abstract Value Members
Concrete Value Members
-
final
def
clear(param: Param[_]): ChunkMapperFuzzyMatchingParams.this.type
- Definition Classes
- Params
-
val
enableCharFingerprintMatching: BooleanParam
Whether to apply char Ngram fingerprint matching
-
val
enableFuzzyMatching: BooleanParam
Whether to apply fuzzy matching
-
val
enableTokenFingerprintMatching: BooleanParam
Whether to apply partial token Ngram fingerprint matching
-
def
explainParam(param: Param[_]): String
- Definition Classes
- Params
-
def
explainParams(): String
- Definition Classes
- Params
-
final
def
extractParamMap(): ParamMap
- Definition Classes
- Params
-
final
def
extractParamMap(extra: ParamMap): ParamMap
- Definition Classes
- Params
-
val
fuzzyDistanceScalingMode: Param[String]
When enableFuzzyMatching is true, the scaling mode for Integer Edit Distances; possible values are: left, right, long, short, none
-
val
fuzzyMatchingDistanceThresholds: DoubleArrayParam
When enableFuzzyMatching is true, this array contains the respective thresholds for each calculated distance
-
val
fuzzyMatchingDistances: StringArrayParam
When enableFuzzyMatching is true, this array contains the distances to calculate; possible values are: levenshtein, longest-common-subsequence, cosine, jaccard
-
final
def
get[T](param: Param[T]): Option[T]
- Definition Classes
- Params
-
final
def
getDefault[T](param: Param[T]): Option[T]
- Definition Classes
- Params
- def getEnableCharFingerprintMatching: Boolean
- def getEnableFuzzyMatching: Boolean
- def getEnableTokenFingerprintMatching: Boolean
- def getFuzzyDistanceScalingMode: String
- def getFuzzyMatchingDistanceThresholds: Array[Double]
- def getFuzzyMatchingDistances: Array[String]
- def getLowerCase: Boolean
- def getMaxCharNgramFingerprint: Int
- def getMaxTokenNgramDroppingCharsRatio: Double
- def getMaxTokenNgramDroppingOperator: String
- def getMaxTokenNgramDroppingTokens: Int
- def getMaxTokenNgramFingerprint: Int
- def getMinCharNgramFingerprint: Int
- def getMinTokenNgramFingerprint: Int
-
final
def
getOrDefault[T](param: Param[T]): T
- Definition Classes
- Params
-
def
getParam(paramName: String): Param[Any]
- Definition Classes
- Params
-
final
def
hasDefault[T](param: Param[T]): Boolean
- Definition Classes
- Params
-
def
hasParam(paramName: String): Boolean
- Definition Classes
- Params
-
final
def
isDefined(param: Param[_]): Boolean
- Definition Classes
- Params
- lazy val isLowerCase: Boolean
-
final
def
isSet(param: Param[_]): Boolean
- Definition Classes
- Params
- val lowerCase: BooleanParam
-
val
maxCharNgramFingerprint: IntParam
When enableCharFingerprintMatching is true, the max number of chars for Ngrams in Fingerprint
-
val
maxTokenNgramDroppingCharsRatio: DoubleParam
When enableTokenFingerprintMatching is true, this value drives the maximum ratio of chars allowed to be dropped from the full chunk; whenever it is desired for all Ngrams to be used as keys, no matter how short the final chunk is, this param should be set to 1.0
-
val
maxTokenNgramDroppingOperator: Param[String]
When enableTokenFingerprintMatching is true, this param drives the logic to compose both dropping parameters; it can be either "and" or "or"
-
val
maxTokenNgramDroppingTokens: IntParam
When enableTokenFingerprintMatching is true, this value drives the maximum number of tokens allowed to be dropped from the full chunk; whenever it is desired for all Ngrams to be used as keys, no matter how short the final chunk is, this param should be set to a very high value: i.e Int.MaxValue
-
val
maxTokenNgramFingerprint: IntParam
When enableTokenFingerprintMatching is true, the max number of tokens for partial Ngrams in Fingerprint
-
val
minCharNgramFingerprint: IntParam
When enableCharFingerprintMatching is true, the min number of chars for Ngrams in Fingerprint
-
val
minTokenNgramFingerprint: IntParam
When enableTokenFingerprintMatching is true, the min number of tokens for partial Ngrams in Fingerprint
-
lazy val
params: Array[Param[_]]
- Definition Classes
- Params
- lazy val realizedFilledDistancesAndThresholds: Array[(String, Double)]
-
final
def
set[T](param: Param[T], value: T): ChunkMapperFuzzyMatchingParams.this.type
- Definition Classes
- Params
- def setEnableCharFingerprintMatching(value: Boolean): ChunkMapperFuzzyMatchingParams.this.type
- def setEnableFuzzyMatching(value: Boolean): ChunkMapperFuzzyMatchingParams.this.type
- def setEnableTokenFingerprintMatching(value: Boolean): ChunkMapperFuzzyMatchingParams.this.type
- def setFuzzyDistanceScalingMode(value: String): ChunkMapperFuzzyMatchingParams.this.type
- def setFuzzyMatchingDistanceThresholds(value: Double): ChunkMapperFuzzyMatchingParams.this.type
- def setFuzzyMatchingDistanceThresholds(value: Array[Double]): ChunkMapperFuzzyMatchingParams.this.type
- def setFuzzyMatchingDistances(value: Array[String]): ChunkMapperFuzzyMatchingParams.this.type
- def setLowerCase(lc: Boolean): ChunkMapperFuzzyMatchingParams.this.type
- def setMaxCharNgramFingerprint(value: Int): ChunkMapperFuzzyMatchingParams.this.type
- def setMaxTokenNgramDroppingCharsRatio(value: Double): ChunkMapperFuzzyMatchingParams.this.type
- def setMaxTokenNgramDroppingOperator(value: String): ChunkMapperFuzzyMatchingParams.this.type
- def setMaxTokenNgramDroppingTokens(value: Int): ChunkMapperFuzzyMatchingParams.this.type
- def setMaxTokenNgramFingerprint(value: Int): ChunkMapperFuzzyMatchingParams.this.type
- def setMinCharNgramFingerprint(value: Int): ChunkMapperFuzzyMatchingParams.this.type
- def setMinTokenNgramFingerprint(value: Int): ChunkMapperFuzzyMatchingParams.this.type
- lazy val toLowercase: (String) ⇒ String
-
def
toString(): String
- Definition Classes
- Identifiable → AnyRef → Any