com.johnsnowlabs.nlp.annotators.seq2seq
MedicalTextGenerator
Companion object MedicalTextGenerator
class MedicalTextGenerator extends AnnotatorModel[MedicalQuestionAnswering] with GPTGenerationParams with HasCaseSensitiveProperties with HasBatchedAnnotate[MedicalQuestionAnswering] with ParamsAndFeaturesWritable with WriteTensorflowModel with WriteOnnxModel with WriteSentencePieceModel with HasEngine with CheckLicense
- Grouped
- Alphabetic
- By Inheritance
- MedicalTextGenerator
- CheckLicense
- HasEngine
- WriteSentencePieceModel
- WriteOnnxModel
- WriteTensorflowModel
- HasBatchedAnnotate
- HasCaseSensitiveProperties
- GPTGenerationParams
- AnnotatorModel
- CanBeLazy
- RawAnnotator
- HasOutputAnnotationCol
- HasInputAnnotationCols
- HasOutputAnnotatorType
- ParamsAndFeaturesWritable
- HasFeatures
- DefaultParamsWritable
- MLWritable
- Model
- Transformer
- PipelineStage
- Logging
- Params
- Serializable
- Serializable
- Identifiable
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Type Members
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
$[T](param: Param[T]): T
- Attributes
- protected
- Definition Classes
- Params
-
def
$$[T](feature: StructFeature[T]): T
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
$$[K, V](feature: MapFeature[K, V]): Map[K, V]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
$$[T](feature: SetFeature[T]): Set[T]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
$$[T](feature: ArrayFeature[T]): Array[T]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
val
DOCUMENT_VARIABLE_NAME: String
- Attributes
- protected
-
def
_transform(dataset: Dataset[_], recursivePipeline: Option[PipelineModel]): DataFrame
- Attributes
- protected
- Definition Classes
- AnnotatorModel
-
val
additionalTokens: MapFeature[Int, String]
Additional tokens
-
def
afterAnnotate(dataset: DataFrame): DataFrame
- Attributes
- protected
- Definition Classes
- AnnotatorModel
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
batchAnnotate(batchedAnnotations: Seq[Array[Annotation]]): Seq[Seq[Annotation]]
takes a document and annotations and produces new annotations of this annotator's annotation type
takes a document and annotations and produces new annotations of this annotator's annotation type
- batchedAnnotations
Annotations that correspond to inputAnnotationCols generated by previous annotators if any
- returns
any number of annotations processed for every input annotation. Not necessary one to one relationship
- Definition Classes
- MedicalTextGenerator → HasBatchedAnnotate
-
def
batchProcess(rows: Iterator[_]): Iterator[Row]
- Definition Classes
- HasBatchedAnnotate
-
val
batchSize: IntParam
- Definition Classes
- HasBatchedAnnotate
- def batchedAnnotateWithoutPromptTemplate(batchedAnnotations: Seq[Array[Annotation]]): Seq[Seq[Annotation]]
-
def
beforeAnnotate(dataset: Dataset[_]): Dataset[_]
- Attributes
- protected
- Definition Classes
- AnnotatorModel
-
val
caseSensitive: BooleanParam
- Definition Classes
- HasCaseSensitiveProperties
-
final
def
checkSchema(schema: StructType, inputAnnotatorType: String): Boolean
- Attributes
- protected
- Definition Classes
- HasInputAnnotationCols
-
def
checkValidEnvironment(spark: Option[SparkSession], scopes: Seq[String]): Unit
- Definition Classes
- CheckLicense
-
def
checkValidScope(scope: String): Unit
- Definition Classes
- CheckLicense
-
def
checkValidScopeAndEnvironment(scope: String, spark: Option[SparkSession], checkLp: Boolean): Unit
- Definition Classes
- CheckLicense
-
def
checkValidScopesAndEnvironment(scopes: Seq[String], spark: Option[SparkSession], checkLp: Boolean): Unit
- Definition Classes
- CheckLicense
-
final
def
clear(param: Param[_]): MedicalTextGenerator.this.type
- Definition Classes
- Params
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
val
configProtoBytes: IntArrayParam
ConfigProto from tensorflow, serialized into byte array.
ConfigProto from tensorflow, serialized into byte array. Get with config_proto.SerializeToString()
-
def
copy(extra: ParamMap): MedicalQuestionAnswering
- Definition Classes
- RawAnnotator → Model → Transformer → PipelineStage → Params
-
def
copyValues[T <: Params](to: T, extra: ParamMap): T
- Attributes
- protected
- Definition Classes
- Params
-
val
customPrompt: Param[String]
Custom model prompt
-
final
def
defaultCopy[T <: Params](extra: ParamMap): T
- Attributes
- protected
- Definition Classes
- Params
-
val
doSample: BooleanParam
Whether or not to use sampling, use greedy decoding otherwise (Default:
false
)Whether or not to use sampling, use greedy decoding otherwise (Default:
false
)- Definition Classes
- GPTGenerationParams
-
val
engine: Param[String]
- Definition Classes
- HasEngine
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
explainParam(param: Param[_]): String
- Definition Classes
- Params
-
def
explainParams(): String
- Definition Classes
- Params
-
def
extraValidate(structType: StructType): Boolean
- Attributes
- protected
- Definition Classes
- RawAnnotator
-
def
extraValidateMsg: String
- Attributes
- protected
- Definition Classes
- RawAnnotator
-
final
def
extractParamMap(): ParamMap
- Definition Classes
- Params
-
final
def
extractParamMap(extra: ParamMap): ParamMap
- Definition Classes
- Params
-
val
features: ArrayBuffer[Feature[_, _, _]]
- Definition Classes
- HasFeatures
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
get[T](feature: StructFeature[T]): Option[T]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
get[K, V](feature: MapFeature[K, V]): Option[Map[K, V]]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
get[T](feature: SetFeature[T]): Option[Set[T]]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
get[T](feature: ArrayFeature[T]): Option[Array[T]]
- Attributes
- protected
- Definition Classes
- HasFeatures
-
final
def
get[T](param: Param[T]): Option[T]
- Definition Classes
- Params
-
def
getAdditionalTokens: Map[Int, String]
Get additional tokens
-
def
getAdditionalTokensStr: String
Get additional tokens in string format
-
def
getBatchSize: Int
- Definition Classes
- HasBatchedAnnotate
-
def
getCaseSensitive: Boolean
- Definition Classes
- HasCaseSensitiveProperties
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
- def getConfigProtoBytes: Option[Array[Byte]]
-
def
getCustomPrompt: String
Custom model prompt
Custom model prompt
- Attributes
- protected
-
final
def
getDefault[T](param: Param[T]): Option[T]
- Definition Classes
- Params
-
def
getDoSample: Boolean
- Definition Classes
- GPTGenerationParams
-
def
getEngine: String
- Definition Classes
- HasEngine
-
def
getIgnoreTokenIds: Array[Int]
- Definition Classes
- GPTGenerationParams
-
def
getInputCols: Array[String]
- Definition Classes
- HasInputAnnotationCols
-
def
getLazyAnnotator: Boolean
- Definition Classes
- CanBeLazy
-
def
getMaxContextLength: Int
- Definition Classes
- GPTGenerationParams
-
def
getMaxNewTokens: Int
- Definition Classes
- GPTGenerationParams
- def getMaxTextLength: Int
-
def
getMlFrameworkType: String
Get ML framework type
- def getModelIfNotSet: MedicalEncoderDecoderModel
-
def
getModelType: String
Get model type
-
def
getNoRepeatNgramSize: Int
- Definition Classes
- GPTGenerationParams
-
final
def
getOrDefault[T](param: Param[T]): T
- Definition Classes
- Params
-
final
def
getOutputCol: String
- Definition Classes
- HasOutputAnnotationCol
-
def
getParam(paramName: String): Param[Any]
- Definition Classes
- Params
-
def
getRandomSeed: Option[Int]
- Definition Classes
- GPTGenerationParams
- def getSignatures: Option[Map[String, String]]
-
def
getStopAtEos: Boolean
Checks whether text generation stops when the end-of-sentence token is encountered.
-
def
getTopK: Int
- Definition Classes
- GPTGenerationParams
- def getUseCache: Boolean
-
final
def
hasDefault[T](param: Param[T]): Boolean
- Definition Classes
- Params
-
def
hasParam(paramName: String): Boolean
- Definition Classes
- Params
-
def
hasParent: Boolean
- Definition Classes
- Model
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
val
ignoreTokenIds: IntArrayParam
A list of token ids which are ignored in the decoder's output (Default:
Array()
)A list of token ids which are ignored in the decoder's output (Default:
Array()
)- Definition Classes
- GPTGenerationParams
-
def
initializeLogIfNecessary(isInterpreter: Boolean, silent: Boolean): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
def
initializeLogIfNecessary(isInterpreter: Boolean): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
val
inputAnnotatorTypes: Array[String]
Input annotator type : DOCUMENT, DOCUMENT
Input annotator type : DOCUMENT, DOCUMENT
- Definition Classes
- MedicalTextGenerator → HasInputAnnotationCols
-
final
val
inputCols: StringArrayParam
- Attributes
- protected
- Definition Classes
- HasInputAnnotationCols
-
final
def
isDefined(param: Param[_]): Boolean
- Definition Classes
- Params
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
final
def
isSet(param: Param[_]): Boolean
- Definition Classes
- Params
-
def
isTraceEnabled(): Boolean
- Attributes
- protected
- Definition Classes
- Logging
-
val
lazyAnnotator: BooleanParam
- Definition Classes
- CanBeLazy
-
def
log: Logger
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logDebug(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logError(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logInfo(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logName: String
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logTrace(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String, throwable: Throwable): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
def
logWarning(msg: ⇒ String): Unit
- Attributes
- protected
- Definition Classes
- Logging
-
val
maxContextLength: IntParam
Maximum length of context text.
Maximum length of context text. (Default:
1024
)- Definition Classes
- GPTGenerationParams
-
val
maxNewTokens: IntParam
Maximum number of new tokens to be generated (Default: 30)
Maximum number of new tokens to be generated (Default: 30)
- Definition Classes
- GPTGenerationParams
-
val
maxTextLength: IntParam
Maximum length of context text.
Maximum length of context text. (Default:
1024
) -
val
merges: MapFeature[(String, String), Int]
Holding merges.txt coming from RoBERTa model
-
val
mlFrameworkType: Param[String]
ML framework type
-
val
modelType: Param[String]
Model type
-
def
msgHelper(schema: StructType): String
- Attributes
- protected
- Definition Classes
- HasInputAnnotationCols
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
val
noRepeatNgramSize: IntParam
If set to int >
0
, all ngrams of that size can only occur once (Default:0
)If set to int >
0
, all ngrams of that size can only occur once (Default:0
)- Definition Classes
- GPTGenerationParams
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
def
onWrite(path: String, spark: SparkSession): Unit
- Definition Classes
- MedicalTextGenerator → ParamsAndFeaturesWritable
-
val
optionalInputAnnotatorTypes: Array[String]
- Definition Classes
- HasInputAnnotationCols
-
val
outputAnnotatorType: String
Output annotator type : DOCUMENT
Output annotator type : DOCUMENT
- Definition Classes
- MedicalTextGenerator → HasOutputAnnotatorType
-
final
val
outputCol: Param[String]
- Attributes
- protected
- Definition Classes
- HasOutputAnnotationCol
-
lazy val
params: Array[Param[_]]
- Definition Classes
- Params
-
var
parent: Estimator[MedicalQuestionAnswering]
- Definition Classes
- Model
-
val
randomSeed: Option[Int]
Optional Random seed for the model.
Optional Random seed for the model. Needs to be of type
Long
.- Definition Classes
- GPTGenerationParams
-
def
save(path: String): Unit
- Definition Classes
- MLWritable
- Annotations
- @Since( "1.6.0" ) @throws( ... )
-
def
set[T](feature: StructFeature[T], value: T): MedicalTextGenerator.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
set[K, V](feature: MapFeature[K, V], value: Map[K, V]): MedicalTextGenerator.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
set[T](feature: SetFeature[T], value: Set[T]): MedicalTextGenerator.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
set[T](feature: ArrayFeature[T], value: Array[T]): MedicalTextGenerator.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
final
def
set(paramPair: ParamPair[_]): MedicalTextGenerator.this.type
- Attributes
- protected
- Definition Classes
- Params
-
final
def
set(param: String, value: Any): MedicalTextGenerator.this.type
- Attributes
- protected
- Definition Classes
- Params
-
final
def
set[T](param: Param[T], value: T): MedicalTextGenerator.this.type
- Definition Classes
- Params
-
def
setAdditionalTokens(values: HashMap[Int, String]): MedicalTextGenerator.this.type
Set additional tokens
-
def
setAdditionalTokens(value: Map[Int, String]): MedicalTextGenerator.this.type
Set additional tokens
-
def
setBatchSize(size: Int): MedicalTextGenerator.this.type
- Definition Classes
- HasBatchedAnnotate
-
def
setCaseSensitive(value: Boolean): MedicalTextGenerator.this.type
- Definition Classes
- HasCaseSensitiveProperties
- def setConfigProtoBytes(bytes: Array[Int]): MedicalTextGenerator.this.type
-
def
setCustomPrompt(value: String): MedicalTextGenerator.this.type
Set custom model prompt
-
def
setDefault[T](feature: StructFeature[T], value: () ⇒ T): MedicalTextGenerator.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
setDefault[K, V](feature: MapFeature[K, V], value: () ⇒ Map[K, V]): MedicalTextGenerator.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
setDefault[T](feature: SetFeature[T], value: () ⇒ Set[T]): MedicalTextGenerator.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
def
setDefault[T](feature: ArrayFeature[T], value: () ⇒ Array[T]): MedicalTextGenerator.this.type
- Attributes
- protected
- Definition Classes
- HasFeatures
-
final
def
setDefault(paramPairs: ParamPair[_]*): MedicalTextGenerator.this.type
- Attributes
- protected
- Definition Classes
- Params
-
final
def
setDefault[T](param: Param[T], value: T): MedicalTextGenerator.this.type
- Attributes
- protected[org.apache.spark.ml]
- Definition Classes
- Params
-
def
setDoSample(value: Boolean): MedicalTextGenerator.this.type
- Definition Classes
- GPTGenerationParams
-
def
setIgnoreTokenIds(tokenIds: Array[Int]): MedicalTextGenerator.this.type
- Definition Classes
- GPTGenerationParams
-
final
def
setInputCols(value: String*): MedicalTextGenerator.this.type
- Definition Classes
- HasInputAnnotationCols
-
def
setInputCols(value: Array[String]): MedicalTextGenerator.this.type
- Definition Classes
- HasInputAnnotationCols
-
def
setLazyAnnotator(value: Boolean): MedicalTextGenerator.this.type
- Definition Classes
- CanBeLazy
-
def
setMaxContextLength(value: Int): MedicalTextGenerator.this.type
- Definition Classes
- GPTGenerationParams
-
def
setMaxNewTokens(value: Int): MedicalTextGenerator.this.type
- Definition Classes
- GPTGenerationParams
- def setMaxTextLength(value: Int): MedicalTextGenerator.this.type
- def setMerges(value: Map[(String, String), Int]): MedicalTextGenerator.this.type
-
def
setMlFrameworkType(value: String): MedicalTextGenerator.this.type
Set ML framework type
- def setModelIfNotSet(spark: SparkSession, model: MedicalEncoderDecoderModel): MedicalTextGenerator.this.type
- def setModelIfNotSet(spark: SparkSession, encoder: OnnxWrapper, decoder: OnnxWrapper, spp: SentencePieceWrapper): MedicalTextGenerator.this.type
- def setModelIfNotSet(spark: SparkSession, tfWrapper: TensorflowWrapper, spp: SentencePieceWrapper, useCache: Boolean): MedicalTextGenerator.this.type
- def setModelIfNotSet(spark: SparkSession, tfWrapper: TensorflowWrapper): MedicalTextGenerator.this.type
-
def
setModelType(value: String): MedicalTextGenerator.this.type
Set model type
-
def
setNoRepeatNgramSize(value: Int): MedicalTextGenerator.this.type
- Definition Classes
- GPTGenerationParams
-
final
def
setOutputCol(value: String): MedicalTextGenerator.this.type
- Definition Classes
- HasOutputAnnotationCol
-
def
setParent(parent: Estimator[MedicalQuestionAnswering]): MedicalQuestionAnswering
- Definition Classes
- Model
-
def
setRandomSeed(value: Int): MedicalTextGenerator.this.type
- Definition Classes
- GPTGenerationParams
- def setSignatures(value: Map[String, String]): MedicalTextGenerator.this.type
-
def
setStopAtEos(value: Boolean): MedicalTextGenerator.this.type
Determines whether text generation stops when the end-of-sentence token is encountered.
-
def
setTopK(value: Int): MedicalTextGenerator.this.type
- Definition Classes
- GPTGenerationParams
- def setUseCache(value: Boolean): MedicalTextGenerator.this.type
- def setVocabulary(value: Map[String, Int]): MedicalTextGenerator.this.type
-
val
signatures: MapFeature[String, String]
It contains TF model signatures for the laded saved model
-
val
stopAtEos: BooleanParam
Stop text generation when the end-of-sentence token is encountered.
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- Identifiable → AnyRef → Any
-
val
topK: IntParam
The number of highest probability vocabulary tokens to consider
The number of highest probability vocabulary tokens to consider
- Definition Classes
- GPTGenerationParams
-
final
def
transform(dataset: Dataset[_]): DataFrame
- Definition Classes
- AnnotatorModel → Transformer
-
def
transform(dataset: Dataset[_], paramMap: ParamMap): DataFrame
- Definition Classes
- Transformer
- Annotations
- @Since( "2.0.0" )
-
def
transform(dataset: Dataset[_], firstParamPair: ParamPair[_], otherParamPairs: ParamPair[_]*): DataFrame
- Definition Classes
- Transformer
- Annotations
- @Since( "2.0.0" ) @varargs()
-
final
def
transformSchema(schema: StructType): StructType
- Definition Classes
- RawAnnotator → PipelineStage
-
def
transformSchema(schema: StructType, logging: Boolean): StructType
- Attributes
- protected
- Definition Classes
- PipelineStage
- Annotations
- @DeveloperApi()
-
val
uid: String
- Definition Classes
- MedicalTextGenerator → Identifiable
-
val
useCache: BooleanParam
Cache internal state of the model to improve performance
-
def
validate(schema: StructType): Boolean
- Attributes
- protected
- Definition Classes
- RawAnnotator
-
val
vocabulary: MapFeature[String, Int]
Vocabulary used to encode the words to ids with bpeTokenizer.encode
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
wrapColumnMetadata(col: Column): Column
- Attributes
- protected
- Definition Classes
- RawAnnotator
-
def
write: MLWriter
- Definition Classes
- ParamsAndFeaturesWritable → DefaultParamsWritable → MLWritable
-
def
writeOnnxModel(path: String, spark: SparkSession, onnxWrapper: OnnxWrapper, suffix: String, fileName: String): Unit
- Definition Classes
- WriteOnnxModel
-
def
writeOnnxModels(path: String, spark: SparkSession, onnxWrappersWithNames: Seq[(OnnxWrapper, String)], suffix: String): Unit
- Definition Classes
- WriteOnnxModel
-
def
writeSentencePieceModel(path: String, spark: SparkSession, spp: SentencePieceWrapper, suffix: String, filename: String): Unit
- Definition Classes
- WriteSentencePieceModel
-
def
writeTensorflowHub(path: String, tfPath: String, spark: SparkSession, suffix: String): Unit
- Definition Classes
- WriteTensorflowModel
-
def
writeTensorflowModel(path: String, spark: SparkSession, tensorflow: TensorflowWrapper, suffix: String, filename: String, configProtoBytes: Option[Array[Byte]]): Unit
- Definition Classes
- WriteTensorflowModel
-
def
writeTensorflowModelV2(path: String, spark: SparkSession, tensorflow: TensorflowWrapper, suffix: String, filename: String, configProtoBytes: Option[Array[Byte]], savedSignatures: Option[Map[String, String]]): Unit
- Definition Classes
- WriteTensorflowModel