Gliner2PreprocessedBatch

case class Gliner2PreprocessedBatch(inputIds: Array[Array[Long]], attentionMask: Array[Array[Long]], mappedIndices: Array[Array[(String, Int, Int)]], schemaCounts: Array[Int], originalLengths: Array[Int], taskTypes: Array[Array[String]], wordTokens: Array[Array[String]], schemaTokensList: Array[Array[Array[String]]], startMappings: Array[Array[Int]], endMappings: Array[Array[Int]], originalTexts: Array[String], originalSchemas: Array[Gliner2Schema], structureLabels: Array[Any] = Array.empty) extends Product with Serializable

Batch of preprocessed inputs ready for ONNX encoder. Maps 1:1 to Python's PreprocessedBatch.

This is the output of Gliner2DataProcessor.prepareInputs() and the input to the ONNX encoder model.

inputIds

Token IDs for encoder input (batch, max_seq_len)

attentionMask

Attention mask for encoder (batch, max_seq_len)

mappedIndices

Token mappings: (seg_type, orig_idx, schema_idx)

seg_type: "schema" or "text"
orig_idx: Original token index in text or schema
schema_idx: Which schema this token belongs to (for schema tokens)

schemaCounts

Number of schemas per sample

originalLengths

Original sequence lengths per sample

taskTypes

Task types per schema per sample

wordTokens

Original text tokens per sample

schemaTokensList

Schema tokens per sample

startMappings

Token char start positions per sample

endMappings

Token char end positions per sample

originalTexts

Original text strings

originalSchemas

Original schema dictionaries

structureLabels

Ground truth labels (training only, can be empty for inference)

Linear Supertypes

Serializable, Serializable, Product, Equals, AnyRef, Any

Ordering

Alphabetic
By Inheritance

Inherited

Gliner2PreprocessedBatch
Serializable
Serializable
Product
Equals
AnyRef
Any

Hide All
Show All

Visibility

Public
All

Instance Constructors

new Gliner2PreprocessedBatch(inputIds: Array[Array[Long]], attentionMask: Array[Array[Long]], mappedIndices: Array[Array[(String, Int, Int)]], schemaCounts: Array[Int], originalLengths: Array[Int], taskTypes: Array[Array[String]], wordTokens: Array[Array[String]], schemaTokensList: Array[Array[Array[String]]], startMappings: Array[Array[Int]], endMappings: Array[Array[Int]], originalTexts: Array[String], originalSchemas: Array[Gliner2Schema], structureLabels: Array[Any] = Array.empty)
inputIds
Token IDs for encoder input (batch, max_seq_len)
attentionMask
Attention mask for encoder (batch, max_seq_len)
mappedIndices
Token mappings: (seg_type, orig_idx, schema_idx)
- seg_type: "schema" or "text"
- orig_idx: Original token index in text or schema
- schema_idx: Which schema this token belongs to (for schema tokens)
schemaCounts
Number of schemas per sample
originalLengths
Original sequence lengths per sample
taskTypes
Task types per schema per sample
wordTokens
Original text tokens per sample
schemaTokensList
Schema tokens per sample
startMappings
Token char start positions per sample
endMappings
Token char end positions per sample
originalTexts
Original text strings
originalSchemas
Original schema dictionaries
structureLabels
Ground truth labels (training only, can be empty for inference)

Value Members

final def !=(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def ##(): Int

Definition Classes
AnyRef → Any
final def ==(arg0: Any): Boolean

Definition Classes
AnyRef → Any
final def asInstanceOf[T0]: T0

Definition Classes
Any
val attentionMask: Array[Array[Long]]
def batchSize: Int
Number of samples in this batch.
def clone(): AnyRef

Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws( ... ) @native()
val endMappings: Array[Array[Int]]
final def eq(arg0: AnyRef): Boolean

Definition Classes
AnyRef
def finalize(): Unit

Attributes
protected[lang]
Definition Classes
AnyRef
Annotations
@throws( classOf[java.lang.Throwable] )
final def getClass(): Class[_]

Definition Classes
AnyRef → Any
Annotations
@native()
val inputIds: Array[Array[Long]]
def isEmpty: Boolean
Check if batch is empty.
final def isInstanceOf[T0]: Boolean

Definition Classes
Any
val mappedIndices: Array[Array[(String, Int, Int)]]
final def ne(arg0: AnyRef): Boolean

Definition Classes
AnyRef
final def notify(): Unit

Definition Classes
AnyRef
Annotations
@native()
final def notifyAll(): Unit

Definition Classes
AnyRef
Annotations
@native()
val originalLengths: Array[Int]
val originalSchemas: Array[Gliner2Schema]
val originalTexts: Array[String]
val schemaCounts: Array[Int]
val schemaTokensList: Array[Array[Array[String]]]
val startMappings: Array[Array[Int]]
val structureLabels: Array[Any]
final def synchronized[T0](arg0: ⇒ T0): T0

Definition Classes
AnyRef
val taskTypes: Array[Array[String]]
final def wait(): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long, arg1: Int): Unit

Definition Classes
AnyRef
Annotations
@throws( ... )
final def wait(arg0: Long): Unit

Definition Classes
AnyRef
Annotations
@throws( ... ) @native()
val wordTokens: Array[Array[String]]

Packages

Gliner2PreprocessedBatch

Instance Constructors

Value Members

Inherited from Serializable

Inherited from Serializable

Inherited from Product

Inherited from Equals

Inherited from AnyRef

Inherited from Any

Ungrouped

Packages

Gliner2PreprocessedBatch 

Instance Constructors

Value Members

Inherited from Serializable

Inherited from Serializable

Inherited from Product

Inherited from Equals

Inherited from AnyRef

Inherited from Any

Ungrouped

Gliner2PreprocessedBatch