Transformers¶

SC-native transformer blocks built on stochastic attention.

StochasticTransformerBlock — S-Former: spiking transformer with per-head stochastic attention over disjoint feature subspaces. Architecture: Input -> SC Multi-Head Attention -> Add & Norm -> SC Dense FF -> Add & Norm -> Output. d_model must be divisible by n_heads; each head owns d_model / n_heads contiguous channels. Inputs must be finite one- or two-dimensional arrays with trailing dimension d_model.

Python

from sc_neurocore.transformers import StochasticTransformerBlock

block = StochasticTransformerBlock(d_model=64, n_heads=4, length=256)
output = block.forward(input_sequence)

See Tutorial 54: Spiking Transformers.

`sc_neurocore.transformers.block` ¶

`StochasticTransformerBlock` `dataclass` ¶

Spiking Transformer Block (S-Former). Structure: Input -> Multi-Head Attention -> Add & Norm -> Feed Forward -> Add & Norm -> Output

Source code in src/sc_neurocore/transformers/block.py

Python
@dataclass
class StochasticTransformerBlock:
    """
    Spiking Transformer Block (S-Former).
    Structure:
    Input -> Multi-Head Attention -> Add & Norm -> Feed Forward -> Add & Norm -> Output
    """

    d_model: int
    n_heads: int
    length: int = 1024

    def __post_init__(self) -> None:
        if self.d_model <= 0:
            raise ValueError("d_model must be positive")
        if self.n_heads <= 0:
            raise ValueError("n_heads must be positive")
        if self.d_model % self.n_heads != 0:
            raise ValueError("d_model must be divisible by n_heads")
        if self.length <= 0:
            raise ValueError("length must be positive")

        self.head_dim = self.d_model // self.n_heads
        self.attention_heads: list[_AttentionHead] = [
            StochasticAttention(dim_k=self.head_dim) for _ in range(self.n_heads)
        ]

        # Feed Forward Network (FFN)
        # 2-layer MLP: d_model -> 4*d_model -> d_model
        # We use our Vectorized Layer for efficiency
        self.ffn_1 = VectorizedSCLayer(
            n_inputs=self.d_model, n_neurons=4 * self.d_model, length=self.length
        )
        self.ffn_2 = VectorizedSCLayer(
            n_inputs=4 * self.d_model, n_neurons=self.d_model, length=self.length
        )

    def forward(self, x: np.ndarray[Any, Any]) -> np.ndarray[Any, Any]:
        """
        x: (d_model,) or (Sequence_Length, d_model). Returns same shape.
        """
        x = np.asarray(x, dtype=np.float64)
        if x.ndim not in (1, 2):
            raise ValueError("x must be a one- or two-dimensional array")
        if x.shape[-1] != self.d_model:
            raise ValueError(f"x must have trailing dimension d_model={self.d_model}")
        if not np.all(np.isfinite(x)):
            raise ValueError("x must contain only finite values")

        input_1d = x.ndim == 1
        x_2d = x[None, :] if input_1d else x
        attn_out = self._multi_head_attention(x_2d)

        # Match shapes for residual: attention may add a batch dim
        if input_1d:
            attn_out = attn_out.reshape(-1)[: x.shape[0]]

        res1 = np.clip(0.5 * x + 0.5 * attn_out, 0.0, 1.0)

        # Position-wise FFN: apply same weights to each token
        def _ffn(token: np.ndarray) -> np.ndarray:
            vals = token.tolist() if hasattr(token, "tolist") else token
            h = np.clip(self.ffn_1.forward(vals), 0.0, 1.0)  # type: ignore[arg-type]
            return self.ffn_2.forward(h.tolist() if hasattr(h, "tolist") else h)  # type: ignore[arg-type]

        if res1.ndim > 1:
            ff_out = np.zeros_like(res1)
            for t in range(res1.shape[0]):
                ff_out[t] = _ffn(res1[t])
        else:
            ff_out = _ffn(res1)

        return 0.5 * res1 + 0.5 * ff_out

    def _multi_head_attention(self, x: np.ndarray[Any, Any]) -> np.ndarray[Any, Any]:
        x_2d = np.asarray(x, dtype=np.float64)
        if x_2d.ndim != 2 or x_2d.shape[1] != self.d_model:
            raise ValueError(f"x must have shape (sequence_length, {self.d_model})")

        head_outputs = []
        for head_idx, head in enumerate(self.attention_heads):
            start = head_idx * self.head_dim
            stop = start + self.head_dim
            head_x = x_2d[:, start:stop]
            head_outputs.append(head.forward(Q=head_x, K=head_x, V=head_x))

        return np.concatenate(head_outputs, axis=1)

`forward(x)` ¶

x: (d_model,) or (Sequence_Length, d_model). Returns same shape.

Source code in src/sc_neurocore/transformers/block.py

Python
def forward(self, x: np.ndarray[Any, Any]) -> np.ndarray[Any, Any]:
    """
    x: (d_model,) or (Sequence_Length, d_model). Returns same shape.
    """
    x = np.asarray(x, dtype=np.float64)
    if x.ndim not in (1, 2):
        raise ValueError("x must be a one- or two-dimensional array")
    if x.shape[-1] != self.d_model:
        raise ValueError(f"x must have trailing dimension d_model={self.d_model}")
    if not np.all(np.isfinite(x)):
        raise ValueError("x must contain only finite values")

    input_1d = x.ndim == 1
    x_2d = x[None, :] if input_1d else x
    attn_out = self._multi_head_attention(x_2d)

    # Match shapes for residual: attention may add a batch dim
    if input_1d:
        attn_out = attn_out.reshape(-1)[: x.shape[0]]

    res1 = np.clip(0.5 * x + 0.5 * attn_out, 0.0, 1.0)

    # Position-wise FFN: apply same weights to each token
    def _ffn(token: np.ndarray) -> np.ndarray:
        vals = token.tolist() if hasattr(token, "tolist") else token
        h = np.clip(self.ffn_1.forward(vals), 0.0, 1.0)  # type: ignore[arg-type]
        return self.ffn_2.forward(h.tolist() if hasattr(h, "tolist") else h)  # type: ignore[arg-type]

    if res1.ndim > 1:
        ff_out = np.zeros_like(res1)
        for t in range(res1.shape[0]):
            ff_out[t] = _ffn(res1[t])
    else:
        ff_out = _ffn(res1)

    return 0.5 * res1 + 0.5 * ff_out

Transformers¶

sc_neurocore.transformers.block ¶

StochasticTransformerBlock dataclass ¶

forward(x) ¶

`sc_neurocore.transformers.block` ¶

`StochasticTransformerBlock` `dataclass` ¶

`forward(x)` ¶