class DocMapperApproach extends ChunkMapperApproach

Linear Supertypes
ChunkMapperApproach, HandleExceptionParams, ChunkMapperFuzzyMatchingParams, CheckLicense, AnnotatorApproach[ChunkMapperModel], CanBeLazy, DefaultParamsWritable, MLWritable, HasOutputAnnotatorType, HasOutputAnnotationCol, HasInputAnnotationCols, Estimator[ChunkMapperModel], PipelineStage, Logging, Params, Serializable, Serializable, Identifiable, AnyRef, Any
Ordering
  1. Grouped
  2. Alphabetic
  3. By Inheritance
Inherited
  1. DocMapperApproach
  2. ChunkMapperApproach
  3. HandleExceptionParams
  4. ChunkMapperFuzzyMatchingParams
  5. CheckLicense
  6. AnnotatorApproach
  7. CanBeLazy
  8. DefaultParamsWritable
  9. MLWritable
  10. HasOutputAnnotatorType
  11. HasOutputAnnotationCol
  12. HasInputAnnotationCols
  13. Estimator
  14. PipelineStage
  15. Logging
  16. Params
  17. Serializable
  18. Serializable
  19. Identifiable
  20. AnyRef
  21. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new DocMapperApproach()
  2. new DocMapperApproach(uid: String)

Type Members

  1. type AnnotatorType = String
    Definition Classes
    HasOutputAnnotatorType

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def $[T](param: Param[T]): T
    Attributes
    protected
    Definition Classes
    Params
  4. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  5. def _fit(dataset: Dataset[_], recursiveStages: Option[PipelineModel]): ChunkMapperModel
    Attributes
    protected
    Definition Classes
    AnnotatorApproach
  6. val allowMultiTokenChunk: BooleanParam
    Definition Classes
    ChunkMapperApproach
  7. def appendSpecialRelations(mapping: Mapping): Map[String, Array[String]]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  8. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  9. def beforeTraining(spark: SparkSession): Unit
    Definition Classes
    AnnotatorApproach
  10. final def checkSchema(schema: StructType, inputAnnotatorType: String): Boolean
    Attributes
    protected
    Definition Classes
    HasInputAnnotationCols
  11. def checkValidEnvironment(spark: Option[SparkSession], scopes: Seq[String], metadata: Option[Map[String, Value]]): Unit
    Definition Classes
    CheckLicense
  12. def checkValidScope(scope: String): Unit
    Definition Classes
    CheckLicense
  13. def checkValidScopeAndEnvironment(scope: String, spark: Option[SparkSession], checkLp: Boolean, metadata: Option[Map[String, Value]]): Unit
    Definition Classes
    CheckLicense
  14. def checkValidScopesAndEnvironment(scopes: Seq[String], spark: Option[SparkSession], checkLp: Boolean, metadata: Option[Map[String, Value]]): Unit
    Definition Classes
    CheckLicense
  15. final def clear(param: Param[_]): DocMapperApproach.this.type
    Definition Classes
    Params
  16. def clone(): AnyRef
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  17. final def copy(extra: ParamMap): Estimator[ChunkMapperModel]
    Definition Classes
    AnnotatorApproach → Estimator → PipelineStage → Params
  18. def copyValues[T <: Params](to: T, extra: ParamMap): T
    Attributes
    protected
    Definition Classes
    Params
  19. final def defaultCopy[T <: Params](extra: ParamMap): T
    Attributes
    protected
    Definition Classes
    Params
  20. val description: String
    Definition Classes
    DocMapperApproachChunkMapperApproach → AnnotatorApproach
  21. val dictionary: Param[String]
    Definition Classes
    ChunkMapperApproach
  22. val doExceptionHandling: BooleanParam

    If true, exceptions are handled.

    If true, exceptions are handled. If exception causing data is passed to the model, a error annotation is emitted which has the exception message. Processing continues with the next one. This comes with a performance penalty.

    Definition Classes
    HandleExceptionParams
  23. val enableCharFingerprintMatching: BooleanParam

    Whether to apply char Ngram fingerprint matching

    Whether to apply char Ngram fingerprint matching

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  24. val enableFuzzyMatching: BooleanParam

    Whether to apply fuzzy matching

    Whether to apply fuzzy matching

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  25. val enableTokenFingerprintMatching: BooleanParam

    Whether to apply partial token Ngram fingerprint matching

    Whether to apply partial token Ngram fingerprint matching

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  26. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  27. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  28. def explainParam(param: Param[_]): String
    Definition Classes
    Params
  29. def explainParams(): String
    Definition Classes
    Params
  30. final val extraInputCols: StringArrayParam
    Attributes
    protected
    Definition Classes
    HasInputAnnotationCols
  31. final def extractParamMap(): ParamMap
    Definition Classes
    Params
  32. final def extractParamMap(extra: ParamMap): ParamMap
    Definition Classes
    Params
  33. def finalize(): Unit
    Attributes
    protected[lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  34. def findSpecialRelations(relations: Map[String, Array[String]]): Map[String, Array[String]]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  35. final def fit(dataset: Dataset[_]): ChunkMapperModel
    Definition Classes
    AnnotatorApproach → Estimator
  36. def fit(dataset: Dataset[_], paramMaps: Seq[ParamMap]): Seq[ChunkMapperModel]
    Definition Classes
    Estimator
    Annotations
    @Since( "2.0.0" )
  37. def fit(dataset: Dataset[_], paramMap: ParamMap): ChunkMapperModel
    Definition Classes
    Estimator
    Annotations
    @Since( "2.0.0" )
  38. def fit(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): ChunkMapperModel
    Definition Classes
    Estimator
    Annotations
    @Since( "2.0.0" ) @varargs()
  39. val fuzzyDistanceScalingMode: Param[String]

    When enableFuzzyMatching is true, the scaling mode for Integer Edit Distances; possible values are: left, right, long, short, none

    When enableFuzzyMatching is true, the scaling mode for Integer Edit Distances; possible values are: left, right, long, short, none

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  40. val fuzzyMatchingDistanceThresholds: DoubleArrayParam

    When enableFuzzyMatching is true, this array contains the respective thresholds for each calculated distance

    When enableFuzzyMatching is true, this array contains the respective thresholds for each calculated distance

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  41. val fuzzyMatchingDistances: StringArrayParam

    When enableFuzzyMatching is true, this array contains the distances to calculate; possible values are: levenshtein, longest-common-subsequence, cosine, jaccard

    When enableFuzzyMatching is true, this array contains the distances to calculate; possible values are: levenshtein, longest-common-subsequence, cosine, jaccard

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  42. final def get[T](param: Param[T]): Option[T]
    Definition Classes
    Params
  43. def getAllPossibleFingerprintPredictionKeys(annotationResult: String): Stream[(Double, String)]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  44. def getAllowMultiTokenChunk: Boolean
    Definition Classes
    ChunkMapperApproach
  45. def getCharNgramFingerprints(mappingKey: String): Seq[String]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  46. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  47. final def getDefault[T](param: Param[T]): Option[T]
    Definition Classes
    Params
  48. def getEnableCharFingerprintMatching: Boolean
  49. def getEnableFuzzyMatching: Boolean
  50. def getEnableTokenFingerprintMatching: Boolean
  51. def getFingerprintFuzzyDistances(leftTerm: String, rightTerm: String): Array[(String, Double)]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  52. def getFuzzyDistanceDictionaries(): (Map[String, (Integer) ⇒ EditDistance[Integer]], Map[String, EditDistance[Integer]], Map[String, EditDistance[Double]])
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  53. def getFuzzyDistanceScalingMode: String
  54. def getFuzzyDistanceValue(leftTerm: String, rightTerm: String, distRels: (String, Double), distDicts: (Map[String, (Integer) ⇒ EditDistance[Integer]], Map[String, EditDistance[Integer]], Map[String, EditDistance[Double]])): Option[Double]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  55. def getFuzzyMatches(dictionary: Map[String, Map[String, Array[String]]], annotationResult: String): Map[Double, Map[String, Array[String]]]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  56. def getFuzzyMatchingDistanceThresholds: Array[Double]
  57. def getFuzzyMatchingDistances: Array[String]
  58. def getInputCols: Array[String]
    Definition Classes
    HasInputAnnotationCols
  59. def getLazyAnnotator: Boolean
    Definition Classes
    CanBeLazy
  60. def getLowerCase: Boolean
  61. def getMaxCharNgramFingerprint: Int
  62. def getMaxTokenNgramDroppingCharsRatio: Double
  63. def getMaxTokenNgramDroppingOperator: String
  64. def getMaxTokenNgramDroppingTokens: Int
  65. def getMaxTokenNgramFingerprint: Int
  66. def getMinCharNgramFingerprint: Int
  67. def getMinTokenNgramFingerprint: Int
  68. def getMultivaluesRelations: Boolean
    Definition Classes
    ChunkMapperApproach
  69. final def getOrDefault[T](param: Param[T]): T
    Definition Classes
    Params
  70. final def getOutputCol: String
    Definition Classes
    HasOutputAnnotationCol
  71. def getParam(paramName: String): Param[Any]
    Definition Classes
    Params
  72. def getPossibleFingerprintPredictionKeys(annotationResult: String): Stream[(Double, String)]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  73. def getPossibleFingerprintTrainingKeys(keys: Mappings): Map[String, Map[String, Array[String]]]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  74. def getRel: String
    Definition Classes
    ChunkMapperApproach
  75. def getRels: Array[String]
    Definition Classes
    ChunkMapperApproach
  76. def getTokenNgramFingerprints(mappingKey: String): Seq[String]
    Attributes
    protected
    Definition Classes
    ChunkMapperFuzzyMatchingParams
  77. final def hasDefault[T](param: Param[T]): Boolean
    Definition Classes
    Params
  78. def hasParam(paramName: String): Boolean
    Definition Classes
    Params
  79. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  80. def initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
    Attributes
    protected
    Definition Classes
    Logging
  81. def initializeLogIfNecessary(isInterpreter: Boolean): Unit
    Attributes
    protected
    Definition Classes
    Logging
  82. val inputAnnotatorTypes: Array[String]
    Definition Classes
    DocMapperApproachChunkMapperApproach → HasInputAnnotationCols
  83. final val inputCols: StringArrayParam
    Attributes
    protected
    Definition Classes
    HasInputAnnotationCols
  84. final def isDefined(param: Param[_]): Boolean
    Definition Classes
    Params
  85. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  86. lazy val isLowerCase: Boolean
  87. final def isSet(param: Param[_]): Boolean
    Definition Classes
    Params
  88. def isTraceEnabled(): Boolean
    Attributes
    protected
    Definition Classes
    Logging
  89. val lazyAnnotator: BooleanParam
    Definition Classes
    CanBeLazy
  90. def log: Logger
    Attributes
    protected
    Definition Classes
    Logging
  91. def logDebug(msg: ⇒ String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  92. def logDebug(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  93. def logError(msg: ⇒ String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  94. def logError(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  95. def logInfo(msg: ⇒ String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  96. def logInfo(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  97. def logName: String
    Attributes
    protected
    Definition Classes
    Logging
  98. def logTrace(msg: ⇒ String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  99. def logTrace(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  100. def logWarning(msg: ⇒ String, throwable: Throwable): Unit
    Attributes
    protected
    Definition Classes
    Logging
  101. def logWarning(msg: ⇒ String): Unit
    Attributes
    protected
    Definition Classes
    Logging
  102. val lowerCase: BooleanParam

  103. val maxCharNgramFingerprint: IntParam

    When enableCharFingerprintMatching is true, the max number of chars for Ngrams in Fingerprint

    When enableCharFingerprintMatching is true, the max number of chars for Ngrams in Fingerprint

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  104. val maxTokenNgramDroppingCharsRatio: DoubleParam

    When enableTokenFingerprintMatching is true, this value drives the maximum ratio of chars allowed to be dropped from the full chunk; whenever it is desired for all Ngrams to be used as keys, no matter how short the final chunk is, this param should be set to 1.0

    When enableTokenFingerprintMatching is true, this value drives the maximum ratio of chars allowed to be dropped from the full chunk; whenever it is desired for all Ngrams to be used as keys, no matter how short the final chunk is, this param should be set to 1.0

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  105. val maxTokenNgramDroppingOperator: Param[String]

    When enableTokenFingerprintMatching is true, this param drives the logic to compose both dropping parameters; it can be either "and" or "or"

    When enableTokenFingerprintMatching is true, this param drives the logic to compose both dropping parameters; it can be either "and" or "or"

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  106. val maxTokenNgramDroppingTokens: IntParam

    When enableTokenFingerprintMatching is true, this value drives the maximum number of tokens allowed to be dropped from the full chunk; whenever it is desired for all Ngrams to be used as keys, no matter how short the final chunk is, this param should be set to a very high value: i.e Int.MaxValue

    When enableTokenFingerprintMatching is true, this value drives the maximum number of tokens allowed to be dropped from the full chunk; whenever it is desired for all Ngrams to be used as keys, no matter how short the final chunk is, this param should be set to a very high value: i.e Int.MaxValue

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  107. val maxTokenNgramFingerprint: IntParam

    When enableTokenFingerprintMatching is true, the max number of tokens for partial Ngrams in Fingerprint

    When enableTokenFingerprintMatching is true, the max number of tokens for partial Ngrams in Fingerprint

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  108. val minCharNgramFingerprint: IntParam

    When enableCharFingerprintMatching is true, the min number of chars for Ngrams in Fingerprint

    When enableCharFingerprintMatching is true, the min number of chars for Ngrams in Fingerprint

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  109. val minTokenNgramFingerprint: IntParam

    When enableTokenFingerprintMatching is true, the min number of tokens for partial Ngrams in Fingerprint

    When enableTokenFingerprintMatching is true, the min number of tokens for partial Ngrams in Fingerprint

    Definition Classes
    ChunkMapperFuzzyMatchingParams
  110. def msgHelper(schema: StructType): String
    Attributes
    protected
    Definition Classes
    HasInputAnnotationCols
  111. val multivaluesRelations: BooleanParam
    Definition Classes
    ChunkMapperApproach
  112. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  113. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  114. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  115. def onTrained(model: ChunkMapperModel, spark: SparkSession): Unit
    Definition Classes
    AnnotatorApproach
  116. val optionalInputAnnotatorTypes: Array[String]
    Definition Classes
    HasInputAnnotationCols
  117. val outputAnnotatorType: AnnotatorType
    Definition Classes
    DocMapperApproachChunkMapperApproach → HasOutputAnnotatorType
  118. final val outputCol: Param[String]
    Attributes
    protected
    Definition Classes
    HasOutputAnnotationCol
  119. lazy val params: Array[Param[_]]
    Definition Classes
    Params
  120. lazy val realizedFilledDistancesAndThresholds: Array[(String, Double)]
  121. val rels: StringArrayParam
    Definition Classes
    ChunkMapperApproach
  122. def save(path: String): Unit
    Definition Classes
    MLWritable
    Annotations
    @Since( "1.6.0" ) @throws( ... )
  123. def seAllowMultiTokenChunk(lc: Boolean): DocMapperApproach.this.type
    Definition Classes
    ChunkMapperApproach
  124. final def set(paramPair: ParamPair[_]): DocMapperApproach.this.type
    Attributes
    protected
    Definition Classes
    Params
  125. final def set(param: String, value: Any): DocMapperApproach.this.type
    Attributes
    protected
    Definition Classes
    Params
  126. final def set[T](param: Param[T], value: T): DocMapperApproach.this.type
    Definition Classes
    Params
  127. final def setDefault(paramPairs: ParamPair[_]*): DocMapperApproach.this.type
    Attributes
    protected
    Definition Classes
    Params
  128. final def setDefault[T](param: Param[T], value: T): DocMapperApproach.this.type
    Attributes
    protected[org.apache.spark.ml]
    Definition Classes
    Params
  129. def setDictionary(path: String): DocMapperApproach.this.type
    Definition Classes
    ChunkMapperApproach
  130. def setDoExceptionHandling(value: Boolean): DocMapperApproach.this.type

    If true, exceptions are handled.

    If true, exceptions are handled. If exception causing data is passed to the model, a error annotation is emitted which has the exception message. Processing continues with the next one. This comes with a performance penalty.

    Definition Classes
    HandleExceptionParams
  131. def setEnableCharFingerprintMatching(value: Boolean): DocMapperApproach.this.type
  132. def setEnableFuzzyMatching(value: Boolean): DocMapperApproach.this.type
  133. def setEnableTokenFingerprintMatching(value: Boolean): DocMapperApproach.this.type
  134. def setExtraInputCols(value: Array[String]): DocMapperApproach.this.type
    Definition Classes
    HasInputAnnotationCols
  135. def setFuzzyDistanceScalingMode(value: String): DocMapperApproach.this.type
  136. def setFuzzyMatchingDistanceThresholds(value: Double): DocMapperApproach.this.type
  137. def setFuzzyMatchingDistanceThresholds(value: Array[Double]): DocMapperApproach.this.type
  138. def setFuzzyMatchingDistances(value: Array[String]): DocMapperApproach.this.type
  139. final def setInputCols(value: String*): DocMapperApproach.this.type
    Definition Classes
    HasInputAnnotationCols
  140. def setInputCols(value: Array[String]): DocMapperApproach.this.type
    Definition Classes
    HasInputAnnotationCols
  141. def setLazyAnnotator(value: Boolean): DocMapperApproach.this.type
    Definition Classes
    CanBeLazy
  142. def setLowerCase(lc: Boolean): DocMapperApproach.this.type
  143. def setMaxCharNgramFingerprint(value: Int): DocMapperApproach.this.type
  144. def setMaxTokenNgramDroppingCharsRatio(value: Double): DocMapperApproach.this.type
  145. def setMaxTokenNgramDroppingOperator(value: String): DocMapperApproach.this.type
  146. def setMaxTokenNgramDroppingTokens(value: Int): DocMapperApproach.this.type
  147. def setMaxTokenNgramFingerprint(value: Int): DocMapperApproach.this.type
  148. def setMinCharNgramFingerprint(value: Int): DocMapperApproach.this.type
  149. def setMinTokenNgramFingerprint(value: Int): DocMapperApproach.this.type
  150. def setMultivaluesRelations(lc: Boolean): DocMapperApproach.this.type
    Definition Classes
    ChunkMapperApproach
  151. final def setOutputCol(value: String): DocMapperApproach.this.type
    Definition Classes
    HasOutputAnnotationCol
  152. def setRels(value: Array[String]): DocMapperApproach.this.type
    Definition Classes
    ChunkMapperApproach
  153. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  154. lazy val toLowercase: (String) ⇒ String
  155. def toString(): String
    Definition Classes
    Identifiable → AnyRef → Any
  156. def train(dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): DocMapperModel
    Definition Classes
    DocMapperApproachChunkMapperApproach → AnnotatorApproach
  157. final def transformSchema(schema: StructType): StructType
    Definition Classes
    AnnotatorApproach → PipelineStage
  158. def transformSchema(schema: StructType, logging: Boolean): StructType
    Attributes
    protected
    Definition Classes
    PipelineStage
    Annotations
    @DeveloperApi()
  159. val uid: String
    Definition Classes
    DocMapperApproachChunkMapperApproach → Identifiable
  160. def validate(schema: StructType): Boolean
    Attributes
    protected
    Definition Classes
    AnnotatorApproach
  161. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  162. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  163. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  164. def write: MLWriter
    Definition Classes
    DefaultParamsWritable → MLWritable

Deprecated Value Members

  1. val rel: Param[String]
    Definition Classes
    ChunkMapperApproach
    Annotations
    @deprecated
    Deprecated
  2. def setRel(value: String): DocMapperApproach.this.type
    Definition Classes
    ChunkMapperApproach
    Annotations
    @deprecated
    Deprecated

    Use 'setRels' method instead of 'setRel' method

Inherited from ChunkMapperApproach

Inherited from HandleExceptionParams

Inherited from CheckLicense

Inherited from AnnotatorApproach[ChunkMapperModel]

Inherited from CanBeLazy

Inherited from DefaultParamsWritable

Inherited from MLWritable

Inherited from HasOutputAnnotatorType

Inherited from HasOutputAnnotationCol

Inherited from HasInputAnnotationCols

Inherited from Estimator[ChunkMapperModel]

Inherited from PipelineStage

Inherited from Logging

Inherited from Params

Inherited from Serializable

Inherited from Serializable

Inherited from Identifiable

Inherited from AnyRef

Inherited from Any

param

setParam

Ungrouped