class BioGPTTokenizer extends Gpt2Tokenizer
Linear Supertypes
Ordering
- Alphabetic
- By Inheritance
Inherited
- BioGPTTokenizer
- Gpt2Tokenizer
- BpeTokenizer
- AnyRef
- Any
- Hide All
- Show All
Visibility
- Public
- All
Instance Constructors
Value Members
-
final
def
!=(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
final
def
##(): Int
- Definition Classes
- AnyRef → Any
-
final
def
==(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
val
addPrefixSpaceToSentence: Boolean
- Definition Classes
- BpeTokenizer
-
val
alwaysAddPrefix: Boolean
- Definition Classes
- BpeTokenizer
-
final
def
asInstanceOf[T0]: T0
- Definition Classes
- Any
-
def
bpe(indToken: IndexedToken): Array[TokenPiece]
- Definition Classes
- BioGPTTokenizer → BpeTokenizer
-
val
bpeRanks: Map[(String, String), Int]
- Attributes
- protected
- Definition Classes
- BpeTokenizer
-
val
bytesToUnicodeMapping: Map[Int, String]
- Attributes
- protected
- Definition Classes
- Gpt2Tokenizer
-
val
cache: Map[String, Array[String]]
- Attributes
- protected
- Definition Classes
- BpeTokenizer
-
def
clone(): AnyRef
- Attributes
- protected[java.lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()
-
def
decodeTokens(tokens: Array[Int]): String
Mapping for bytes to a different set of unicode characters (especially white spaces).
Mapping for bytes to a different set of unicode characters (especially white spaces). This improved model performance for gpt-2
- Definition Classes
- BioGPTTokenizer → Gpt2Tokenizer
-
val
decoderVocab: Map[Int, String]
- Attributes
- protected
- Definition Classes
- Gpt2Tokenizer
-
def
encode(indTokens: Array[IndexedToken]): Array[TokenPiece]
- Definition Classes
- BpeTokenizer
-
def
encode(indToken: IndexedToken): Array[TokenPiece]
- Definition Classes
- BpeTokenizer
-
final
def
eq(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
def
equals(arg0: Any): Boolean
- Definition Classes
- AnyRef → Any
-
def
finalize(): Unit
- Attributes
- protected[java.lang]
- Definition Classes
- AnyRef
- Annotations
- @throws( classOf[java.lang.Throwable] )
-
def
getBpeRanking: ((String, String)) ⇒ Int
- Attributes
- protected
- Definition Classes
- BpeTokenizer
-
def
getBytePairs(word: Array[String]): Array[(String, String)]
- Attributes
- protected
- Definition Classes
- BpeTokenizer
-
final
def
getClass(): Class[_]
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
def
getTokenPieces(indToken: IndexedToken, word: Array[String]): Array[TokenPiece]
- Attributes
- protected
- Definition Classes
- BpeTokenizer
-
def
hashCode(): Int
- Definition Classes
- AnyRef → Any
- Annotations
- @native()
-
final
def
isInstanceOf[T0]: Boolean
- Definition Classes
- Any
-
val
merges: Map[(String, String), Int]
- Definition Classes
- BpeTokenizer
- val mosesNormalizer: MosesPunctNormalizer
- def mosesPipeline(text: String): Array[String]
- val mosesTokenizer: MosesTokenizer
-
final
def
ne(arg0: AnyRef): Boolean
- Definition Classes
- AnyRef
-
final
def
notify(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
final
def
notifyAll(): Unit
- Definition Classes
- AnyRef
- Annotations
- @native()
-
val
padWithSequenceTokens: Boolean
- Definition Classes
- BpeTokenizer
-
def
performMerges(wordChars: Array[String], charPairs: Array[(String, String)]): Array[String]
- Attributes
- protected
- Definition Classes
- BpeTokenizer
-
def
preProcessTokenForBpe(token: String): String
- Definition Classes
- Gpt2Tokenizer → BpeTokenizer
-
val
prefixForPieceId: Option[String]
- Definition Classes
- Gpt2Tokenizer → BpeTokenizer
-
val
sentencePadding: (String, String)
- Definition Classes
- BpeTokenizer
-
val
specialTokens: SpecialTokens
- Definition Classes
- BpeTokenizer
-
def
splitOnSpecialToken(specialToken: SpecialToken, text: String): ListBuffer[String]
- Attributes
- protected
- Definition Classes
- BpeTokenizer
-
val
splitPattern: Regex
- Definition Classes
- Gpt2Tokenizer
-
val
suffixForPieceId: Option[String]
- Definition Classes
- BioGPTTokenizer → BpeTokenizer
-
final
def
synchronized[T0](arg0: ⇒ T0): T0
- Definition Classes
- AnyRef
-
def
toString(): String
- Definition Classes
- AnyRef → Any
-
def
tokenize(sentence: Sentence): Array[IndexedToken]
- Definition Classes
- BpeTokenizer
-
def
tokenizeSubText(text: String, indexOffset: Int): Array[IndexedToken]
- Definition Classes
- BioGPTTokenizer → Gpt2Tokenizer → BpeTokenizer
-
val
unicodeToByteMapping: Map[String, Int]
- Attributes
- protected
- Definition Classes
- Gpt2Tokenizer
-
val
vocab: Map[String, Int]
- Definition Classes
- BpeTokenizer
-
final
def
wait(): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long, arg1: Int): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... )
-
final
def
wait(arg0: Long): Unit
- Definition Classes
- AnyRef
- Annotations
- @throws( ... ) @native()