c

com.johnsnowlabs.nlp.annotators.chunker

ChunkMapperApproach

class ChunkMapperApproach extends AnnotatorApproach[ChunkMapperModel] with CheckLicense with ChunkMapperFuzzyMatchingParams with HandleExceptionParams

Linear Supertypes
HandleExceptionParams, ChunkMapperFuzzyMatchingParams, CheckLicense, AnnotatorApproach[ChunkMapperModel], CanBeLazy, DefaultParamsWritable, MLWritable, HasOutputAnnotatorType, HasOutputAnnotationCol, HasInputAnnotationCols, Estimator[ChunkMapperModel], PipelineStage, Logging, Params, Serializable, Serializable, Identifiable, AnyRef, Any
Ordering
  1. Grouped
  2. Alphabetic
  3. By Inheritance
Inherited
  1. ChunkMapperApproach
  2. HandleExceptionParams
  3. ChunkMapperFuzzyMatchingParams
  4. CheckLicense
  5. AnnotatorApproach
  6. CanBeLazy
  7. DefaultParamsWritable
  8. MLWritable
  9. HasOutputAnnotatorType
  10. HasOutputAnnotationCol
  11. HasInputAnnotationCols
  12. Estimator
  13. PipelineStage
  14. Logging
  15. Params
  16. Serializable
  17. Serializable
  18. Identifiable
  19. AnyRef
  20. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new ChunkMapperApproach()
  2. new ChunkMapperApproach(uid: String)

Type Members

  1. type AnnotatorType = String
    Definition Classes
    HasOutputAnnotatorType

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def $[T](param: Param[T]): T
    Attributes
    protected
    Definition Classes
    Params
  4. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  5. def _fit(dataset: Dataset[_], recursiveStages: Option[PipelineModel]): ChunkMapperModel
    Attributes
    protected
    Definition Classes
    AnnotatorApproach
  6. val allowMultiTokenChunk: BooleanParam
  7. def appendSpecialRelations(mapping: Mapping): Map[String, Array[String]]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  8. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  9. def beforeTraining(spark: SparkSession): Unit
    Definition Classes
    AnnotatorApproach
  10. final def checkSchema(schema: StructType, inputAnnotatorType: String): Boolean
    Attributes
    protected
    Definition Classes
    HasInputAnnotationCols
  11. def checkValidEnvironment(spark: Option[SparkSession], scopes: Seq[String]): Unit
    Definition Classes
    CheckLicense
  12. def checkValidScope(scope: String): Unit
    Definition Classes
    CheckLicense
  13. def checkValidScopeAndEnvironment(scope: String, spark: Option[SparkSession], checkLp: Boolean): Unit
    Definition Classes
    CheckLicense
  14. def checkValidScopesAndEnvironment(scopes: Seq[String], spark: Option[SparkSession], checkLp: Boolean): Unit
    Definition Classes
    CheckLicense
  15. final def clear(param: Param[_]): ChunkMapperApproach.this.type
    Definition Classes
    Params
  16. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  17. final def copy(extra: ParamMap): Estimator[ChunkMapperModel]
    Definition Classes
    AnnotatorApproach → Estimator → PipelineStage → Params
  18. def copyValues[T <: Params](to: T, extra: ParamMap): T
    Attributes
    protected
    Definition Classes
    Params
  19. final def defaultCopy[T <: Params](extra: ParamMap): T
    Attributes
    protected
    Definition Classes
    Params
  20. val description: String
    Definition Classes
    ChunkMapperApproach → AnnotatorApproach
  21. val dictionary: Param[String]
  22. val doExceptionHandling: BooleanParam

    If true, exceptions are handled.

    If true, exceptions are handled. If exception causing data is passed to the model, a error annotation is emitted which has the exception message. Processing continues with the next one. This comes with a performance penalty.

    Definition Classes
    HandleExceptionParams
  23. val enableCharFingerprintMatching: BooleanParam

    Whether to apply char Ngram fingerprint matching

    Whether to apply char Ngram fingerprint matching

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  24. val enableFuzzyMatching: BooleanParam

    Whether to apply fuzzy matching

    Whether to apply fuzzy matching

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  25. val enableTokenFingerprintMatching: BooleanParam

    Whether to apply partial token Ngram fingerprint matching

    Whether to apply partial token Ngram fingerprint matching

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  26. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  27. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  28. def explainParam(param: Param[_]): String
    Definition Classes
    Params
  29. def explainParams(): String
    Definition Classes
    Params
  30. final def extractParamMap(): ParamMap
    Definition Classes
    Params
  31. final def extractParamMap(extra: ParamMap): ParamMap
    Definition Classes
    Params
  32. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  33. def findSpecialRelations(relations: Map[String, Array[String]]): Map[String, Array[String]]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  34. final def fit(dataset: Dataset[_]): ChunkMapperModel
    Definition Classes
    AnnotatorApproach → Estimator
  35. def fit(dataset: Dataset[_], paramMaps: Seq[ParamMap]): Seq[ChunkMapperModel]
    Definition Classes
    Estimator
    Annotations
    @Since( "2.0.0" )
  36. def fit(dataset: Dataset[_], paramMap: ParamMap): ChunkMapperModel
    Definition Classes
    Estimator
    Annotations
    @Since( "2.0.0" )
  37. def fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): ChunkMapperModel
    Definition Classes
    Estimator
    Annotations
    @Since( "2.0.0" ) @varargs()
  38. val fuzzyDistanceScalingMode: Param[String]

    When enableFuzzyMatching is true, the scaling mode for Integer Edit Distances; possible values are: left, right, long, short, none

    When enableFuzzyMatching is true, the scaling mode for Integer Edit Distances; possible values are: left, right, long, short, none

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  39. val fuzzyMatchingDistanceThresholds: DoubleArrayParam

    When enableFuzzyMatching is true, this array contains the respective thresholds for each calculated distance

    When enableFuzzyMatching is true, this array contains the respective thresholds for each calculated distance

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  40. val fuzzyMatchingDistances: StringArrayParam

    When enableFuzzyMatching is true, this array contains the distances to calculate; possible values are: levenshtein, longest-common-subsequence, cosine, jaccard

    When enableFuzzyMatching is true, this array contains the distances to calculate; possible values are: levenshtein, longest-common-subsequence, cosine, jaccard

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  41. final def get[T](param: Param[T]): Option[T]
    Definition Classes
    Params
  42. def getAllPossibleFingerprintPredictionKeys(annotationResult: String): Stream[(Double, String)]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  43. def getAllowMultiTokenChunk: Boolean
  44. def getCharNgramFingerprints(mappingKey: String): Seq[String]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  45. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  46. final def getDefault[T](param: Param[T]): Option[T]
    Definition Classes
    Params
  47. def getEnableCharFingerprintMatching: Boolean
  48. def getEnableFuzzyMatching: Boolean
  49. def getEnableTokenFingerprintMatching: Boolean
  50. def getFingerprintFuzzyDistances(leftTerm: String, rightTerm: String): Array[(String, Double)]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  51. def getFuzzyDistanceDictionaries(): (Map[String, (Integer) ⇒ EditDistance[Integer]], Map[String, EditDistance[Integer]], Map[String, EditDistance[Double]])
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  52. def getFuzzyDistanceScalingMode: String
  53. def getFuzzyDistanceValue(leftTerm: String, rightTerm: String, distRels: (String, Double), distDicts: (Map[String, (Integer) ⇒ EditDistance[Integer]], Map[String, EditDistance[Integer]], Map[String, EditDistance[Double]])): Option[Double]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  54. def getFuzzyMatches(dictionary: Map[String, Map[String, Array[String]]], annotationResult: String): Map[Double, Map[String, Array[String]]]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  55. def getFuzzyMatchingDistanceThresholds: Array[Double]
  56. def getFuzzyMatchingDistances: Array[String]
  57. def getInputCols: Array[String]
    Definition Classes
    HasInputAnnotationCols
  58. def getLazyAnnotator: Boolean
    Definition Classes
    CanBeLazy
  59. def getLowerCase: Boolean
  60. def getMaxCharNgramFingerprint: Int
  61. def getMaxTokenNgramDroppingCharsRatio: Double
  62. def getMaxTokenNgramDroppingOperator: String
  63. def getMaxTokenNgramDroppingTokens: Int
  64. def getMaxTokenNgramFingerprint: Int
  65. def getMinCharNgramFingerprint: Int
  66. def getMinTokenNgramFingerprint: Int
  67. def getMultivaluesRelations: Boolean
  68. final def getOrDefault[T](param: Param[T]): T
    Definition Classes
    Params
  69. final def getOutputCol: String
    Definition Classes
    HasOutputAnnotationCol
  70. def getParam(paramName: String): Param[Any]
    Definition Classes
    Params
  71. def getPossibleFingerprintPredictionKeys(annotationResult: String): Stream[(Double, String)]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  72. def getPossibleFingerprintTrainingKeys(keys: Mappings): Map[String, Map[String, Array[String]]]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  73. def getRel: String
  74. def getRels: Array[String]
  75. def getTokenNgramFingerprints(mappingKey: String): Seq[String]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  76. final def hasDefault[T](param: Param[T]): Boolean
    Definition Classes
    Params
  77. def hasParam(paramName: String): Boolean
    Definition Classes
    Params
  78. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  79. def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
    Attributes
    protected
    Definition Classes
    Logging
  80. def initializeLogIfNecessary(isInterpreter: Boolean): Unit
    Attributes
    protected
    Definition Classes
    Logging
  81. val inputAnnotatorTypes: Array[String]
    Definition Classes
    ChunkMapperApproach → HasInputAnnotationCols
  82. final val inputCols: StringArrayParam
    Attributes
    protected
    Definition Classes
    HasInputAnnotationCols
  83. final def isDefined(param: Param[_]): Boolean
    Definition Classes
    Params
  84. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  85. lazy val isLowerCase: Boolean
  86. final def isSet(param: Param[_]): Boolean
    Definition Classes
    Params
  87. def isTraceEnabled(): Boolean
    Attributes
    protected
    Definition Classes
    Logging
  88. val lazyAnnotator: BooleanParam
    Definition Classes
    CanBeLazy
  89. def log: Logger
    Attributes
    protected
    Definition Classes
    Logging
  90. def logDebug(msg: ⇒ String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  91. def logDebug(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  92. def logError(msg: ⇒ String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  93. def logError(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  94. def logInfo(msg: ⇒ String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  95. def logInfo(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  96. def logName: String
    Attributes
    protected
    Definition Classes
    Logging
  97. def logTrace(msg: ⇒ String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  98. def logTrace(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  99. def logWarning(msg: ⇒ String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  100. def logWarning(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  101. val lowerCase: BooleanParam

  102. val maxCharNgramFingerprint: IntParam

    When enableCharFingerprintMatching is true, the max number of chars for Ngrams in Fingerprint

    When enableCharFingerprintMatching is true, the max number of chars for Ngrams in Fingerprint

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  103. val maxTokenNgramDroppingCharsRatio: DoubleParam

    When enableTokenFingerprintMatching is true, this value drives the maximum ratio of chars allowed to be dropped from the full chunk; whenever it is desired for all Ngrams to be used as keys, no matter how short the final chunk is, this param should be set to 1.0

    When enableTokenFingerprintMatching is true, this value drives the maximum ratio of chars allowed to be dropped from the full chunk; whenever it is desired for all Ngrams to be used as keys, no matter how short the final chunk is, this param should be set to 1.0

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  104. val maxTokenNgramDroppingOperator: Param[String]

    When enableTokenFingerprintMatching is true, this param drives the logic to compose both dropping parameters; it can be either "and" or "or"

    When enableTokenFingerprintMatching is true, this param drives the logic to compose both dropping parameters; it can be either "and" or "or"

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  105. val maxTokenNgramDroppingTokens: IntParam

    When enableTokenFingerprintMatching is true, this value drives the maximum number of tokens allowed to be dropped from the full chunk; whenever it is desired for all Ngrams to be used as keys, no matter how short the final chunk is, this param should be set to a very high value: i.e Int.MaxValue

    When enableTokenFingerprintMatching is true, this value drives the maximum number of tokens allowed to be dropped from the full chunk; whenever it is desired for all Ngrams to be used as keys, no matter how short the final chunk is, this param should be set to a very high value: i.e Int.MaxValue

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  106. val maxTokenNgramFingerprint: IntParam

    When enableTokenFingerprintMatching is true, the max number of tokens for partial Ngrams in Fingerprint

    When enableTokenFingerprintMatching is true, the max number of tokens for partial Ngrams in Fingerprint

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  107. val minCharNgramFingerprint: IntParam

    When enableCharFingerprintMatching is true, the min number of chars for Ngrams in Fingerprint

    When enableCharFingerprintMatching is true, the min number of chars for Ngrams in Fingerprint

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  108. val minTokenNgramFingerprint: IntParam

    When enableTokenFingerprintMatching is true, the min number of tokens for partial Ngrams in Fingerprint

    When enableTokenFingerprintMatching is true, the min number of tokens for partial Ngrams in Fingerprint

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  109. def msgHelper(schema: StructType): String
    Attributes
    protected
    Definition Classes
    HasInputAnnotationCols
  110. val multivaluesRelations: BooleanParam
  111. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  112. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  113. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  114. def onTrained(model: ChunkMapperModel, spark: SparkSession): Unit
    Definition Classes
    AnnotatorApproach
  115. val optionalInputAnnotatorTypes: Array[String]
    Definition Classes
    HasInputAnnotationCols
  116. val outputAnnotatorType: AnnotatorType
    Definition Classes
    ChunkMapperApproach → HasOutputAnnotatorType
  117. final val outputCol: Param[String]
    Attributes
    protected
    Definition Classes
    HasOutputAnnotationCol
  118. lazy val params: Array[Param[_]]
    Definition Classes
    Params
  119. lazy val realizedFilledDistancesAndThresholds: Array[(String, Double)]
  120. val rels: StringArrayParam
  121. def save(path: String): Unit
    Definition Classes
    MLWritable
    Annotations
    @Since( "1.6.0" ) @throws( ... )
  122. def seAllowMultiTokenChunk(lc: Boolean): ChunkMapperApproach.this.type
  123. final def set(paramPair: ParamPair[_]): ChunkMapperApproach.this.type
    Attributes
    protected
    Definition Classes
    Params
  124. final def set(param: String, value: Any): ChunkMapperApproach.this.type
    Attributes
    protected
    Definition Classes
    Params
  125. final def set[T](param: Param[T], value: T): ChunkMapperApproach.this.type
    Definition Classes
    Params
  126. final def setDefault(paramPairs: ParamPair[_]*): ChunkMapperApproach.this.type
    Attributes
    protected
    Definition Classes
    Params
  127. final def setDefault[T](param: Param[T], value: T): ChunkMapperApproach.this.type
    Attributes
    protected
    Definition Classes
    Params
  128. def setDictionary(path: String): ChunkMapperApproach.this.type
  129. def setDoExceptionHandling(value: Boolean): ChunkMapperApproach.this.type

    If true, exceptions are handled.

    If true, exceptions are handled. If exception causing data is passed to the model, a error annotation is emitted which has the exception message. Processing continues with the next one. This comes with a performance penalty.

    Definition Classes
    HandleExceptionParams
  130. def setEnableCharFingerprintMatching(value: Boolean): ChunkMapperApproach.this.type
  131. def setEnableFuzzyMatching(value: Boolean): ChunkMapperApproach.this.type
  132. def setEnableTokenFingerprintMatching(value: Boolean): ChunkMapperApproach.this.type
  133. def setFuzzyDistanceScalingMode(value: String): ChunkMapperApproach.this.type
  134. def setFuzzyMatchingDistanceThresholds(value: Double): ChunkMapperApproach.this.type
  135. def setFuzzyMatchingDistanceThresholds(value: Array[Double]): ChunkMapperApproach.this.type
  136. def setFuzzyMatchingDistances(value: Array[String]): ChunkMapperApproach.this.type
  137. final def setInputCols(value: String*): ChunkMapperApproach.this.type
    Definition Classes
    HasInputAnnotationCols
  138. def setInputCols(value: Array[String]): ChunkMapperApproach.this.type
    Definition Classes
    HasInputAnnotationCols
  139. def setLazyAnnotator(value: Boolean): ChunkMapperApproach.this.type
    Definition Classes
    CanBeLazy
  140. def setLowerCase(lc: Boolean): ChunkMapperApproach.this.type
  141. def setMaxCharNgramFingerprint(value: Int): ChunkMapperApproach.this.type
  142. def setMaxTokenNgramDroppingCharsRatio(value: Double): ChunkMapperApproach.this.type
  143. def setMaxTokenNgramDroppingOperator(value: String): ChunkMapperApproach.this.type
  144. def setMaxTokenNgramDroppingTokens(value: Int): ChunkMapperApproach.this.type
  145. def setMaxTokenNgramFingerprint(value: Int): ChunkMapperApproach.this.type
  146. def setMinCharNgramFingerprint(value: Int): ChunkMapperApproach.this.type
  147. def setMinTokenNgramFingerprint(value: Int): ChunkMapperApproach.this.type
  148. def setMultivaluesRelations(lc: Boolean): ChunkMapperApproach.this.type
  149. final def setOutputCol(value: String): ChunkMapperApproach.this.type
    Definition Classes
    HasOutputAnnotationCol
  150. def setRels(value: Array[String]): ChunkMapperApproach.this.type
  151. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  152. lazy val toLowercase: (String) ⇒ String
  153. def toString(): String
    Definition Classes
    Identifiable → AnyRef → Any
  154. def train(dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): ChunkMapperModel
    Definition Classes
    ChunkMapperApproach → AnnotatorApproach
  155. final def transformSchema(schema: StructType): StructType
    Definition Classes
    AnnotatorApproach → PipelineStage
  156. def transformSchema(schema: StructType, logging: Boolean): StructType
    Attributes
    protected
    Definition Classes
    PipelineStage
    Annotations
    @DeveloperApi()
  157. val uid: String
    Definition Classes
    ChunkMapperApproach → Identifiable
  158. def validate(schema: StructType): Boolean
    Attributes
    protected
    Definition Classes
    AnnotatorApproach
  159. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  160. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  161. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  162. def write: MLWriter
    Definition Classes
    DefaultParamsWritable → MLWritable

Deprecated Value Members

  1. val rel: Param[String]
    Annotations
    @deprecated
    Deprecated
  2. def setRel(value: String): ChunkMapperApproach.this.type
    Annotations
    @deprecated
    Deprecated

    Use 'setRels' method instead of 'setRel' method

Inherited from HandleExceptionParams

Inherited from CheckLicense

Inherited from AnnotatorApproach[ChunkMapperModel]

Inherited from CanBeLazy

Inherited from DefaultParamsWritable

Inherited from MLWritable

Inherited from HasOutputAnnotatorType

Inherited from HasOutputAnnotationCol

Inherited from HasInputAnnotationCols

Inherited from Estimator[ChunkMapperModel]

Inherited from PipelineStage

Inherited from Logging

Inherited from Params

Inherited from Serializable

Inherited from Serializable

Inherited from Identifiable

Inherited from AnyRef

Inherited from Any

param

setParam

Ungrouped