class Annotation2Training extends CheckLicense
Converts annotation results from json or csv files to DataFrame suitable for NER training. Input files must have a structure similar to the one produced by John Snow Labs' Generative AI annotation tool.
- Alphabetic
- By Inheritance
- Annotation2Training
- CheckLicense
- AnyRef
- Any
- Hide All
- Show All
- Public
- All
Instance Constructors
- new Annotation2Training(spark: SparkSession)
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
checkValidEnvironment(spark: Option[SparkSession], scopes: Seq[String]): Unit
- Definition Classes
- CheckLicense
-
def
checkValidScope(scope: String): Unit
- Definition Classes
- CheckLicense
-
def
checkValidScopeAndEnvironment(scope: String, spark: Option[SparkSession], checkLp: Boolean): Unit
- Definition Classes
- CheckLicense
-
def
checkValidScopesAndEnvironment(scopes: Seq[String], spark: Option[SparkSession], checkLp: Boolean): Unit
- Definition Classes
- CheckLicense
-
def
clone(): AnyRef
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
convertCsv2NerDF(csvPath: String, pipelineModel: PipelineModel, repartition: Int = 32, tokenOutputCol: String = "token", nerLabelCol: String = "label"): DataFrame
Converts a CSV file with annotation results to a DataFrame suitable for NER training.
Converts a CSV file with annotation results to a DataFrame suitable for NER training.
- csvPath
Path to the input CSV file. The file will be read with the spark.read.csv method with header, multiLine, quote and escape options set.
- pipelineModel
A pre-trained Spark NLP PipelineModel that includes at least a DocumentAssembler, and Tokenizer. PipelineModel can also include SentenceDetector, DocumentSplitter, WordEmbeddings, etc.
- repartition
Number of partitions to use when reading the CSV file (default is 32).
- tokenOutputCol
The name of the column containing token annotations (default is "token").
- nerLabelCol
The name of the output column for NER labels (default is "label").
- returns
A DataFrame to train NER models.
-
def
convertJson2NerDF(inputPath: String, pipelineModel: PipelineModel, repartition: Int = 32, tokenOutputCol: String = "token", nerLabelCol: String = "label"): DataFrame
Converts a JSON file with annotation results to a DataFrame suitable for NER training.
Converts a JSON file with annotation results to a DataFrame suitable for NER training.
- inputPath
Path to the input JSON file. The file will be read with the spark.read.json method with multiLine option set to true.
- pipelineModel
A pre-trained Spark NLP PipelineModel that includes at least a DocumentAssembler, and Tokenizer. PipelineModel can also include SentenceDetector, DocumentSplitter, WordEmbeddings, etc.
- repartition
Number of partitions to use when reading the input file (default is 32).
- tokenOutputCol
The name of the column containing token annotations (default is "token").
- nerLabelCol
The name of the output column for NER labels (default is "label").
- returns
A DataFrame to train NER models.
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()