Packages

class BioGPTTokenizer extends Gpt2Tokenizer

Linear Supertypes
Gpt2Tokenizer, BpeTokenizer, AnyRef, Any
Ordering
  1. Alphabetic
  2. By Inheritance
Inherited
  1. BioGPTTokenizer
  2. Gpt2Tokenizer
  3. BpeTokenizer
  4. AnyRef
  5. Any
  1. Hide All
  2. Show All
Visibility
  1. Public
  2. All

Instance Constructors

  1. new BioGPTTokenizer(merges: Map[(String, String), Int], vocab: Map[String, Int], lang: String = "en", additionalStrings: Array[String] = Array())

Value Members

  1. final def !=(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  2. final def ##(): Int
    Definition Classes
    AnyRef → Any
  3. final def ==(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  4. val addPrefixSpaceToSentence: Boolean
    Definition Classes
    BpeTokenizer
  5. val alwaysAddPrefix: Boolean
    Definition Classes
    BpeTokenizer
  6. final def asInstanceOf[T0]: T0
    Definition Classes
    Any
  7. def bpe(indToken: IndexedToken): Array[TokenPiece]
    Definition Classes
    BioGPTTokenizer → BpeTokenizer
  8. val bpeRanks: Map[(String, String), Int]
    Attributes
    protected
    Definition Classes
    BpeTokenizer
  9. val bytesToUnicodeMapping: Map[Int, String]
    Attributes
    protected
    Definition Classes
    Gpt2Tokenizer
  10. val cache: Map[String, Array[String]]
    Attributes
    protected
    Definition Classes
    BpeTokenizer
  11. def clone(): AnyRef
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()
  12. def decodeTokens(tokens: Array[Int]): String

    Mapping for bytes to a different set of unicode characters (especially white spaces).

    Mapping for bytes to a different set of unicode characters (especially white spaces). This improved model performance for gpt-2

    Definition Classes
    BioGPTTokenizer → Gpt2Tokenizer
  13. val decoderVocab: Map[Int, String]
    Attributes
    protected
    Definition Classes
    Gpt2Tokenizer
  14. def encode(indTokens: Array[IndexedToken]): Array[TokenPiece]
    Definition Classes
    BpeTokenizer
  15. def encode(indToken: IndexedToken): Array[TokenPiece]
    Definition Classes
    BpeTokenizer
  16. final def eq(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  17. def equals(arg0: Any): Boolean
    Definition Classes
    AnyRef → Any
  18. def finalize(): Unit
    Attributes
    protected[java.lang]
    Definition Classes
    AnyRef
    Annotations
    @throws( classOf[java.lang.Throwable] )
  19. def getBpeRanking: ((String, String)) ⇒ Int
    Attributes
    protected
    Definition Classes
    BpeTokenizer
  20. def getBytePairs(word: Array[String]): Array[(String, String)]
    Attributes
    protected
    Definition Classes
    BpeTokenizer
  21. final def getClass(): Class[_]
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  22. def getTokenPieces(indToken: IndexedToken, word: Array[String]): Array[TokenPiece]
    Attributes
    protected
    Definition Classes
    BpeTokenizer
  23. def hashCode(): Int
    Definition Classes
    AnyRef → Any
    Annotations
    @native()
  24. final def isInstanceOf[T0]: Boolean
    Definition Classes
    Any
  25. val merges: Map[(String, String), Int]
    Definition Classes
    BpeTokenizer
  26. val mosesNormalizer: MosesPunctNormalizer
  27. def mosesPipeline(text: String): Array[String]
  28. val mosesTokenizer: MosesTokenizer
  29. final def ne(arg0: AnyRef): Boolean
    Definition Classes
    AnyRef
  30. final def notify(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  31. final def notifyAll(): Unit
    Definition Classes
    AnyRef
    Annotations
    @native()
  32. val padWithSequenceTokens: Boolean
    Definition Classes
    BpeTokenizer
  33. def performMerges(wordChars: Array[String], charPairs: Array[(String, String)]): Array[String]
    Attributes
    protected
    Definition Classes
    BpeTokenizer
  34. def preProcessTokenForBpe(token: String): String
    Definition Classes
    Gpt2Tokenizer → BpeTokenizer
  35. val prefixForPieceId: Option[String]
    Definition Classes
    Gpt2Tokenizer → BpeTokenizer
  36. val sentencePadding: (String, String)
    Definition Classes
    BpeTokenizer
  37. val specialTokens: SpecialTokens
    Definition Classes
    BpeTokenizer
  38. def splitOnSpecialToken(specialToken: SpecialToken, text: String): ListBuffer[String]
    Attributes
    protected
    Definition Classes
    BpeTokenizer
  39. val splitPattern: Regex
    Definition Classes
    Gpt2Tokenizer
  40. val suffixForPieceId: Option[String]
    Definition Classes
    BioGPTTokenizer → BpeTokenizer
  41. final def synchronized[T0](arg0: ⇒ T0): T0
    Definition Classes
    AnyRef
  42. def toString(): String
    Definition Classes
    AnyRef → Any
  43. def tokenize(sentence: Sentence): Array[IndexedToken]
    Definition Classes
    BpeTokenizer
  44. def tokenizeSubText(text: String, indexOffset: Int): Array[IndexedToken]
    Definition Classes
    BioGPTTokenizer → Gpt2Tokenizer → BpeTokenizer
  45. val unicodeToByteMapping: Map[String, Int]
    Attributes
    protected
    Definition Classes
    Gpt2Tokenizer
  46. val vocab: Map[String, Int]
    Definition Classes
    BpeTokenizer
  47. final def wait(): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  48. final def wait(arg0: Long, arg1: Int): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... )
  49. final def wait(arg0: Long): Unit
    Definition Classes
    AnyRef
    Annotations
    @throws( ... ) @native()

Inherited from Gpt2Tokenizer

Inherited from BpeTokenizer

Inherited from AnyRef

Inherited from Any

Ungrouped