pub struct TokenOutputStream { /* private fields */ }
Expand description

TokenOutputStream is a wrapper around a tokenizer that allows for streaming tokens to the user rather than waiting for the full decoding to complete.

Implementations§

source§

impl TokenOutputStream

source

pub fn new(tokenizer: Tokenizer) -> Self

Creates a new TokenOutputStream instance.

§Arguments
  • tokenizer - A Tokenizer instance to be used for tokenizing.
§Returns

A new TokenOutputStream instance.

source

pub fn into_inner(self) -> Tokenizer

Consumes the TokenOutputStream, returning the inner Tokenizer.

This method is used when the TokenOutputStream is no longer needed, and you want to access the underlying Tokenizer.

§Returns

The inner Tokenizer instance.

source

pub fn tokenizer(&self) -> &Tokenizer

Provides a reference to the inner Tokenizer.

This method is used when you want to access the underlying Tokenizer but still keep the TokenOutputStream for further use.

§Returns

A reference to the inner Tokenizer instance.

source

pub fn get_token(&self, token_s: &str) -> Option<u32>

Retrieves the token associated with a given string.

§Arguments
  • token_s - A string representation of the token.
§Returns

An Option which contains the token if it exists, or None if it does not.

source

pub fn clear(&mut self)

Clears the TokenOutputStream.

This method is used to reset the state of the TokenOutputStream. It clears the tokens and resets the prev_index and current_index to 0.

source

pub fn next_token(&mut self, token: u32) -> Result<Option<String>>

Processes the next token and returns the decoded string if the token leads to a new word.

§Arguments
  • token - The next token to process.
§Returns

A Result which contains an Option with the decoded string if the token leads to a new word, or None if it does not. Returns an error if the decoding fails.

§Example
// Assuming that the `tokenizer.json` file contains the following vocab:
// { "hello": 1, "world": 2, "everybody": 3 }
let tokenizer = Tokenizer::from_file("path/to/tokenizer.json").unwrap();
let mut stream = TokenOutputStream::new(tokenizer);

let tokens: [u32; 4] = [1, 2, 1, 3];

let sent: String = tokens
    .iter()
    .filter_map(|token| stream.next_token(*token).ok())
    .flatten()
    .collect();

assert_eq!(sent, "hello world hello everybody");
source

pub fn decode_rest(&self) -> Result<Option<String>>

Decodes the remaining tokens and returns the decoded string if there are any new words.

§Returns

A Result which contains an Option with the decoded string if there are any new words, or None if there are not. Returns an error if the decoding fails.

source

pub fn decode_all(&self) -> Result<String>

Decodes all tokens in the TokenOutputStream and returns the decoded string.

§Returns

A Result which contains the decoded string if the decoding is successful, or an error if the decoding fails.

Trait Implementations§

source§

impl Debug for TokenOutputStream

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result

Formats the value using the given formatter. Read more

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T> Instrument for T

source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> IntoRequest<T> for T

source§

fn into_request(self) -> Request<T>

Wrap the input message T in a tonic::Request
source§

impl<T> Pointable for T

source§

const ALIGN: usize = _

The alignment of pointer.
§

type Init = T

The type for initializers.
source§

unsafe fn init(init: <T as Pointable>::Init) -> usize

Initializes a with the given initializer. Read more
source§

unsafe fn deref<'a>(ptr: usize) -> &'a T

Dereferences the given pointer. Read more
source§

unsafe fn deref_mut<'a>(ptr: usize) -> &'a mut T

Mutably dereferences the given pointer. Read more
source§

unsafe fn drop(ptr: usize)

Drops the object pointed to by the given pointer. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

source§

fn vzip(self) -> V

source§

impl<T> WithSubscriber for T

source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more
source§

impl<T> ErasedDestructor for T
where T: 'static,