Skip to content

Network Export and Deployment

Export trained SC-NeuroCore networks for deployment on different backends: Rust engine (CPU), FPGA (Verilog), and standard ML formats. This tutorial covers the complete path from trained Python model to production deployment.

Prerequisites: pip install sc-neurocore

1. Export targets

SC-NeuroCore supports multiple deployment targets:

Target Format Use case
Rust engine JSON config 10-100x CPU speedup
FPGA Verilog + hex weights Ultra-low-power edge
NumPy checkpoint .npz Python-to-Python transfer
SCPN compiler IR graph SCPN ecosystem integration

2. Save and load NumPy checkpoints

The simplest export — save all layer weights and metadata:

import numpy as np
from sc_neurocore import VectorizedSCLayer

# Train a network (simplified)
layer1 = VectorizedSCLayer(n_inputs=50, n_neurons=128, length=512)
layer2 = VectorizedSCLayer(n_inputs=128, n_neurons=10, length=512)

# Save checkpoint
np.savez("model_checkpoint.npz",
    layer1_weights=layer1.weights,
    layer2_weights=layer2.weights,
    layer1_length=layer1.length,
    layer2_length=layer2.length,
    architecture="50-128-10",
)

# Load checkpoint
ckpt = np.load("model_checkpoint.npz")
layer1_loaded = VectorizedSCLayer(n_inputs=50, n_neurons=128, length=int(ckpt["layer1_length"]))
layer1_loaded.weights = ckpt["layer1_weights"]
layer1_loaded._refresh_packed_weights()

print(f"Loaded architecture: {ckpt['architecture']}")
print(f"Layer 1 weights: {layer1_loaded.weights.shape}")

3. Export to Rust engine

The Rust engine reads a JSON configuration file describing the network topology and weights:

import json

def export_to_rust(layers, filename):
    """Export network for the SC-NeuroCore Rust engine."""
    config = {
        "version": "3.10",
        "bitstream_length": layers[0].length,
        "layers": [],
    }
    for i, layer in enumerate(layers):
        config["layers"].append({
            "id": i,
            "type": "dense",
            "n_inputs": layer.weights.shape[1],
            "n_neurons": layer.weights.shape[0],
            "weights": layer.weights.tolist(),
        })

    with open(filename, "w") as f:
        json.dump(config, f, indent=2)
    print(f"Exported {len(layers)} layers to {filename}")

export_to_rust([layer1, layer2], "model_rust.json")

Run with the Rust engine:

sc-neurocore-engine run model_rust.json --input data.npy --output predictions.npy

4. Export weights for FPGA

Convert weights to Q8.8 fixed-point hex files (see Tutorial 13):

def export_fpga_weights(layer, prefix):
    """Export layer weights as hex files for Verilog $readmemh."""
    q_weights = np.round(layer.weights * 256).astype(np.int16)
    filename = f"{prefix}_weights.hex"
    with open(filename, "w") as f:
        for row in q_weights:
            for val in row:
                f.write(f"{val & 0xFFFF:04X}\n")
    print(f"  {filename}: {q_weights.shape[0]}×{q_weights.shape[1]} Q8.8 values")
    return filename

print("FPGA weight export:")
export_fpga_weights(layer1, "layer1")
export_fpga_weights(layer2, "layer2")

5. Generate Verilog network wrapper

Create a top-level Verilog module that instantiates layers:

def generate_verilog_top(layers, module_name="sc_network_top"):
    """Generate Verilog wrapper for a multi-layer SC network."""
    lines = [
        f"// Auto-generated by SC-NeuroCore export",
        f"// {len(layers)} layers, bitstream length {layers[0].length}",
        f"module {module_name} (",
        f"    input  wire        clk,",
        f"    input  wire        rst_n,",
        f"    input  wire [15:0] input_data [{layers[0].weights.shape[1]-1}:0],",
        f"    output wire [15:0] output_data [{layers[-1].weights.shape[0]-1}:0]",
        f");",
        f"",
    ]

    for i, layer in enumerate(layers):
        n_in = layer.weights.shape[1]
        n_out = layer.weights.shape[0]
        lines.append(f"    // Layer {i}: {n_in}{n_out}")
        lines.append(f"    wire [15:0] layer{i}_out [{n_out-1}:0];")
        lines.append(f"    sc_dense_layer_top #(")
        lines.append(f"        .N_INPUTS({n_in}),")
        lines.append(f"        .N_NEURONS({n_out})")
        lines.append(f"    ) layer{i} (")
        lines.append(f"        .clk(clk),")
        lines.append(f"        .rst_n(rst_n),")
        if i == 0:
            lines.append(f"        .inputs(input_data),")
        else:
            lines.append(f"        .inputs(layer{i-1}_out),")
        lines.append(f"        .outputs(layer{i}_out)")
        lines.append(f"    );")
        lines.append(f"")

    last = len(layers) - 1
    lines.append(f"    assign output_data = layer{last}_out;")
    lines.append(f"endmodule")

    verilog = "\n".join(lines)
    filename = f"{module_name}.v"
    with open(filename, "w") as f:
        f.write(verilog)
    print(f"Generated {filename} ({len(lines)} lines)")
    return filename

generate_verilog_top([layer1, layer2])

6. SCPN compiler integration

The SC-NeuroCore compiler translates a network into an intermediate representation for the SCPN ecosystem:

from sc_neurocore.compiler.equation_compiler import equation_to_fpga

# Compile equation-defined neuron to Python model + Verilog RTL
neuron, verilog_src = equation_to_fpga(
    "dv/dt = -(v - v_rest) / tau + I",
    threshold="v >= v_threshold",
    reset="v = v_reset",
    params={"tau": 20.0, "v_rest": 0.0, "v_threshold": 1.0, "v_reset": 0.0},
)
print(f"Generated {len(verilog_src)} characters of SystemVerilog")

7. Deployment checklist

Before deploying a trained network:

def deployment_preflight(layers):
    """Verify network is ready for deployment."""
    checks = []

    for i, layer in enumerate(layers):
        w = layer.weights

        # Weight range check
        in_range = np.all((w >= 0) & (w <= 1))
        checks.append(f"Layer {i} weights in [0,1]: {in_range}")

        # No NaN/Inf
        finite = np.all(np.isfinite(w))
        checks.append(f"Layer {i} weights finite: {finite}")

        # No degenerate weights (all same value)
        unique = len(np.unique(np.round(w, 4)))
        checks.append(f"Layer {i} unique weights: {unique}")

        # Q8.8 quantisation error
        q = np.round(w * 256) / 256
        max_err = np.max(np.abs(w - q))
        checks.append(f"Layer {i} max Q8.8 error: {max_err:.6f}")

    for c in checks:
        print(f"  {c}")

deployment_preflight([layer1, layer2])

8. Performance comparison across backends

Backend Throughput Latency Power
Python (NumPy) ~1K samples/s ~1 ms ~50 W (CPU)
Rust engine ~100K samples/s ~10 μs ~15 W (CPU)
FPGA (iCE40) ~10M samples/s ~25 ns ~50 mW
FPGA (ECP5) ~50M samples/s ~10 ns ~200 mW

The Rust engine gives immediate speedup with no hardware changes. FPGA deployment requires synthesis but achieves 1000x lower power.

What you learned

  • NumPy checkpoints for Python-to-Python model transfer
  • JSON export for the Rust engine (10-100x speedup)
  • Q8.8 hex files for FPGA weight loading via $readmemh
  • Auto-generated Verilog network wrapper modules
  • SCPN compiler integration for ecosystem interop
  • Deployment preflight: weight range, finiteness, quantisation error

Next steps

  • Benchmark Rust engine vs Python on your specific network
  • Synthesise the generated Verilog with Yosys for iCE40/ECP5
  • Add batch inference mode to the Rust engine export
  • Profile FPGA resource usage (LUT, FF, BRAM) for different network sizes