Struct Mamba2Layers

Source

pub struct Mamba2Layers<B: Backend> {
    pub n_real_layers: usize,
    pub n_virtual_layers: Option<(usize, Schedule)>,
    pub real_layers: Vec<Mamba2Layer<B>>,
    pub ignore_first_residual: bool,
    pub ignore_last_residual: bool,
}

Expand description

A stack of Mamba-2 layers with optional virtual-layer scheduling.

The stack maintains n_real_layers distinct weight sets but can execute n_virtual_layers logical forward passes, cycling through weights according to the provided Schedule.

Fields§

§n_real_layers: usize

Number of real (weight-bearing) layers.

§n_virtual_layers: Option<(usize, Schedule)>

Optional (n_virtual_layers, schedule) for weight-sharing.

When None, the virtual layer count falls back to n_real_layers (no sharing). Marked module(skip) so Burn does not treat it as a trainable parameter.

§real_layers: Vec<Mamba2Layer>

The actual weight-bearing layer instances.

Length: n_real_layers.

§ignore_first_residual: bool

When true, the residual connection of the first virtual layer is scaled to zero (i.e. the first block acts as a pure projection, not a residual update).

§ignore_last_residual: bool

When true, the residual connection of the last virtual layer is scaled to zero.

Struct Mamba2Layers Copy item path

Fields§

Implementations§

impl<B: Backend + Mamba2BackendExt> Mamba2Layers<B>

pub fn forward( &self, x: Tensor<B, 3>, caches: Option<Mamba2Caches<B>>, ssd_path: Mamba2SsdPath, ) -> (Tensor<B, 3>, Mamba2Caches<B>)

§Arguments

§Returns

pub fn step( &self, x: Tensor<B, 2>, caches: Option<Mamba2Caches<B>>, ) -> (Tensor<B, 2>, Mamba2Caches<B>)

§Arguments

§Returns

Trait Implementations§

impl<B> AutodiffModule<B> for Mamba2Layers<B>where B: AutodiffBackend + Backend, <B as AutodiffBackend>::InnerBackend: Backend,

type InnerModule = Mamba2Layers<<B as AutodiffBackend>::InnerBackend>

fn valid(&self) -> Self::InnerModule

fn from_inner(module: Self::InnerModule) -> Self

impl<B: Backend> Clone for Mamba2Layers<B>

fn clone(&self) -> Self

fn clone_from(&mut self, source: &Self)

impl<B: Debug + Backend> Debug for Mamba2Layers<B>

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl<B: Backend> Display for Mamba2Layers<B>

fn fmt(&self, f: &mut Formatter<'_>) -> Result

impl<B> HasAutodiffModule<B> for Mamba2Layers<B::InnerBackend>where B: AutodiffBackend + Backend, <B as AutodiffBackend>::InnerBackend: Backend,

type TrainModule = Mamba2Layers<B>

impl<B: Backend> Module<B> for Mamba2Layers<B>

type Record = Mamba2LayersRecord<B>

fn load_record(self, record: Self::Record) -> Self

fn into_record(self) -> Self::Record

fn num_params(&self) -> usize

fn visit<Visitor: ModuleVisitor<B>>(&self, visitor: &mut Visitor)

fn map<Mapper: ModuleMapper<B>>(self, mapper: &mut Mapper) -> Self

fn collect_devices(&self, devices: Devices<B>) -> Devices<B>

fn to_device(self, device: &B::Device) -> Self

fn fork(self, device: &B::Device) -> Self

fn devices(&self) -> Vec<<B as BackendTypes>::Device>

fn no_grad(self) -> Self

fn train<AB>(self) -> Self::TrainModulewhere AB: AutodiffBackend<InnerBackend = B>, Self: HasAutodiffModule<AB>,

fn quantize_weights(self, quantizer: &mut Quantizer) -> Self

impl<B: Backend> ModuleDisplay for Mamba2Layers<B>

fn format(&self, passed_settings: DisplaySettings) -> String

fn custom_settings(&self) -> Option<DisplaySettings>

fn custom_content(&self, _content: Content) -> Option<Content>

impl<B: Backend> ModuleDisplayDefault for Mamba2Layers<B>

fn content(&self, content: Content) -> Option<Content>

fn num_params(&self) -> usize

Auto Trait Implementations§

impl<B> Freeze for Mamba2Layers<B>

impl<B> !RefUnwindSafe for Mamba2Layers<B>

impl<B> Send for Mamba2Layers<B>

impl<B> Sync for Mamba2Layers<B>

impl<B> Unpin for Mamba2Layers<B>where <B as BackendTypes>::Device: Unpin, <B as BackendTypes>::FloatTensorPrimitive: Unpin, <B as BackendTypes>::QuantizedTensorPrimitive: Unpin,

impl<B> UnsafeUnpin for Mamba2Layers<B>

impl<B> !UnwindSafe for Mamba2Layers<B>

Blanket Implementations§

impl<T> Any for Twhere T: 'static + ?Sized,

fn type_id(&self) -> TypeId

impl<T> Borrow<T> for Twhere T: ?Sized,

fn borrow(&self) -> &T

impl<T> BorrowMut<T> for Twhere T: ?Sized,

fn borrow_mut(&mut self) -> &mut T

impl<T> CloneToUninit for Twhere T: Clone,

unsafe fn clone_to_uninit(&self, dest: *mut u8)

impl<T> From<T> for T

fn from(t: T) -> T

impl<T, U> Into<U> for Twhere U: From<T>,

fn into(self) -> U

impl<T> ToOwned for Twhere T: Clone,

type Owned = T

fn to_owned(&self) -> T

fn clone_into(&self, target: &mut T)

impl<T> ToString for Twhere T: Display + ?Sized,

fn to_string(&self) -> String

impl<T, U> TryFrom<U> for Twhere U: Into<T>,

type Error = Infallible

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

impl<T, U> TryInto<U> for Twhere U: TryFrom<T>,

type Error = <U as TryFrom<T>>::Error

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Struct Mamba2Layers

impl<B> AutodiffModule<B> for Mamba2Layers<B>
where B: AutodiffBackend + Backend, <B as AutodiffBackend>::InnerBackend: Backend,

impl<B> HasAutodiffModule<B> for Mamba2Layers<B::InnerBackend>
where B: AutodiffBackend + Backend, <B as AutodiffBackend>::InnerBackend: Backend,

fn train<AB>(self) -> Self::TrainModule
where AB: AutodiffBackend<InnerBackend = B>, Self: HasAutodiffModule<AB>,

impl<B> Unpin for Mamba2Layers<B>
where <B as BackendTypes>::Device: Unpin, <B as BackendTypes>::FloatTensorPrimitive: Unpin, <B as BackendTypes>::QuantizedTensorPrimitive: Unpin,

impl<T> Any for T
where T: 'static + ?Sized,

impl<T> Borrow<T> for T
where T: ?Sized,

impl<T> BorrowMut<T> for T
where T: ?Sized,

impl<T> CloneToUninit for T
where T: Clone,

impl<T, U> Into<U> for T
where U: From<T>,

impl<T> ToOwned for T
where T: Clone,

impl<T> ToString for T
where T: Display + ?Sized,

impl<T, U> TryFrom<U> for T
where U: Into<T>,

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,