Struct VocabNetwork

Source

pub struct VocabNetwork<M: Module> {
    pub embedding: Embedding,
    pub layers: Layers<M>,
    pub norm_f: RmsNorm,
    pub lm_head: Option<Linear>,
}

Expand description

A complete autoregressive language model over a token vocabulary: Embedding (vocab → d_model) → Layers<M> → norm_f → LM head (d_model → vocab).

This is the token-LM counterpart of LatentNetwork; both are built on the shared Layers core. The only differences are the I/O boundary (a token Embedding and a vocab logit head, instead of two latent Linears) and a final pre-head RmsNorm.

The LM head is tied (lm_head = None, the transposed embedding weight is reused) or untied (a dedicated Linear); the vocabulary is rounded up to a multiple for GPU alignment (see VocabNetworkBuilder).

Fields§

§embedding: Embedding

Token embedding table, weight shape [padded_vocab, d_model].

§layers: Layers<M>

The shared Mamba-x layer stack.

§norm_f: RmsNorm

Final RMSNorm applied before the LM head (norm_f).

§lm_head: Option<Linear>

Optional dedicated LM head. None ⇒ weight-tied (reuse embeddingᵀ).

Struct VocabNetwork Copy item path

Fields§

Implementations§

impl<M: MambaBlock> VocabNetwork<M>where M::SsdPath: Clone,

pub fn forward( &self, x: Tensor<2, Int>, caches: Option<M::Caches>, ssd_path: M::SsdPath, ) -> (Tensor<3>, M::Caches)

pub fn step( &self, x: Tensor<1, Int>, caches: Option<M::Caches>, layers_own_index: Option<&mut usize>, layer_indices: Option<&mut Vec<usize>>, ) -> (Tensor<2>, M::Caches)

pub fn step_infinite(&self, x: Tensor<1, Int>) -> Tensor<2>

pub fn step_n_approx( &self, x: Tensor<1, Int>, n: usize, caches: Option<M::Caches>, ) -> (Tensor<2>, M::Caches)

fn apply_lm_head(&self, x: Tensor<3>) -> Tensor<3>

Trait Implementations§

impl<M> AutodiffModule for VocabNetwork<M>where M: AutodiffModule + ModuleDisplay + Module,

fn valid(&self) -> Self

fn from_inner(module: Self) -> Self

impl<M> Clone for VocabNetwork<M>where M: Module + ModuleDisplay + Module,

fn clone(&self) -> Self

fn clone_from(&mut self, source: &Self)

impl<M: Debug + Module> Debug for VocabNetwork<M>

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl<M> Display for VocabNetwork<M>where M: Module + ModuleDisplay + Module,

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl<M> Module for VocabNetwork<M>where M: Module + ModuleDisplay + Module,

type Record = VocabNetworkRecord<M>

fn load_record(self, record: Self::Record) -> Self

fn into_record(self) -> Self::Record

fn num_params(&self) -> usize

fn visit<Visitor: ModuleVisitor>(&self, visitor: &mut Visitor)

fn map<Mapper: ModuleMapper>(self, mapper: &mut Mapper) -> Self

fn collect_devices(&self, devices: Devices) -> Devices

fn to_device(self, device: &Device) -> Self

fn fork(self, device: &Device) -> Self

fn devices(&self) -> Vec<Device>

fn no_grad(self) -> Self

fn train(self) -> Selfwhere Self: AutodiffModule,

fn quantize_weights(self, quantizer: &mut Quantizer) -> Self

impl<M> ModuleDisplay for VocabNetwork<M>where M: Module + ModuleDisplay + Module,

fn format(&self, passed_settings: DisplaySettings) -> String

fn custom_settings(&self) -> Option<DisplaySettings>

fn custom_content(&self, _content: Content) -> Option<Content>

impl<M> ModuleDisplayDefault for VocabNetwork<M>where M: Module + ModuleDisplay + Module,

fn content(&self, content: Content) -> Option<Content>

fn num_params(&self) -> usize

Auto Trait Implementations§

impl<M> !Freeze for VocabNetwork<M>

impl<M> !RefUnwindSafe for VocabNetwork<M>

impl<M> !UnwindSafe for VocabNetwork<M>

impl<M> Send for VocabNetwork<M>

impl<M> Sync for VocabNetwork<M>where M: Sync,

impl<M> Unpin for VocabNetwork<M>where M: Unpin,

impl<M> UnsafeUnpin for VocabNetwork<M>

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T> ToString for Twhere T: Display + ?Sized,

fn to_string(&self) -> String

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Struct VocabNetwork

impl<M: MambaBlock> VocabNetwork<M>
where M::SsdPath: Clone,

impl<M> AutodiffModule for VocabNetwork<M>
where M: AutodiffModule + ModuleDisplay + Module,

impl<M> Clone for VocabNetwork<M>
where M: Module + ModuleDisplay + Module,

impl<M> Display for VocabNetwork<M>
where M: Module + ModuleDisplay + Module,

impl<M> Module for VocabNetwork<M>
where M: Module + ModuleDisplay + Module,

fn train(self) -> Self
where Self: AutodiffModule,

impl<M> ModuleDisplay for VocabNetwork<M>
where M: Module + ModuleDisplay + Module,

impl<M> ModuleDisplayDefault for VocabNetwork<M>
where M: Module + ModuleDisplay + Module,

impl<M> Sync for VocabNetwork<M>
where M: Sync,

impl<M> Unpin for VocabNetwork<M>
where M: Unpin,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T> ToString for T
where T: Display + ?Sized,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,