Skip to content

SC-NeuroCore Documentation

Hardware-Aware NAS

Hardware-Aware SNN NAS¶

NSGA-II evolutionary search over SNN architectures under FPGA resource budgets.

Searches {neuron model, layer width, bitstream length, delay range} jointly — the first NAS that optimizes hardware parameters alongside topology.

Search Space¶

`sc_neurocore.nas.search_space` ¶

Define the architecture search space for hardware-aware SNN NAS.

Search dimensions

n_layers: number of hidden layers
widths: neurons per layer
neuron_type: per-layer neuron model
bitstream_length: per-layer SC precision (L)
delay_range: maximum synaptic delay per layer

Each architecture encodes one point in this joint space. FPGA constraints (LUT, BRAM budgets) prune infeasible points.

`Architecture` `dataclass` ¶

One point in the NAS search space.

Source code in src/sc_neurocore/nas/search_space.py

@dataclass
class Architecture:
    """One point in the NAS search space."""

    n_inputs: int
    layer_widths: list[int]
    neuron_types: list[str]
    bitstream_lengths: list[int]
    delay_ranges: list[int]
    fitness_accuracy: float = 0.0
    fitness_luts: int = 0
    fitness_energy_nj: float = 0.0
    dominates_count: int = 0

    @property
    def n_layers(self) -> int:
        return len(self.layer_widths)

    @property
    def layer_sizes(self) -> list[tuple[int, int]]:
        sizes = []
        prev = self.n_inputs
        for w in self.layer_widths:
            sizes.append((prev, w))
            prev = w
        return sizes

    @property
    def total_params(self) -> int:
        return sum(n_in * n_out for n_in, n_out in self.layer_sizes)

`SearchSpace` `dataclass` ¶

Configurable NAS search space.

Parameters¶

n_inputs : int Input dimension. n_outputs : int Output dimension (width of final layer). min_layers, max_layers : int Range of hidden layer count. width_choices : list of int Candidate widths per layer. neuron_choices : list of str Candidate neuron models. L_choices : list of int Candidate bitstream lengths. delay_choices : list of int Candidate max-delay values.

Source code in src/sc_neurocore/nas/search_space.py

@dataclass
class SearchSpace:
    """Configurable NAS search space.

    Parameters
    ----------
    n_inputs : int
        Input dimension.
    n_outputs : int
        Output dimension (width of final layer).
    min_layers, max_layers : int
        Range of hidden layer count.
    width_choices : list of int
        Candidate widths per layer.
    neuron_choices : list of str
        Candidate neuron models.
    L_choices : list of int
        Candidate bitstream lengths.
    delay_choices : list of int
        Candidate max-delay values.
    """

    n_inputs: int
    n_outputs: int
    min_layers: int = 1
    max_layers: int = 4
    width_choices: list[int] = field(default_factory=lambda: list(WIDTH_CHOICES))
    neuron_choices: list[str] = field(default_factory=lambda: list(NEURON_CHOICES))
    L_choices: list[int] = field(default_factory=lambda: list(L_CHOICES))
    delay_choices: list[int] = field(default_factory=lambda: list(DELAY_CHOICES))

    def random_architecture(self, rng: np.random.RandomState) -> Architecture:
        """Sample a random architecture from the space."""
        n_layers = rng.randint(self.min_layers, self.max_layers + 1)
        widths = [int(rng.choice(self.width_choices)) for _ in range(n_layers - 1)]
        widths.append(self.n_outputs)
        neurons = [str(rng.choice(self.neuron_choices)) for _ in range(n_layers)]
        lengths = [int(rng.choice(self.L_choices)) for _ in range(n_layers)]
        delays = [int(rng.choice(self.delay_choices)) for _ in range(n_layers)]
        return Architecture(
            n_inputs=self.n_inputs,
            layer_widths=widths,
            neuron_types=neurons,
            bitstream_lengths=lengths,
            delay_ranges=delays,
        )

    def mutate(self, arch: Architecture, rng: np.random.RandomState) -> Architecture:
        """Mutate one random gene in the architecture."""
        widths = list(arch.layer_widths)
        neurons = list(arch.neuron_types)
        lengths = list(arch.bitstream_lengths)
        delays = list(arch.delay_ranges)

        gene = rng.randint(0, 4)
        layer_idx = rng.randint(0, arch.n_layers)

        if gene == 0 and layer_idx < arch.n_layers - 1:
            widths[layer_idx] = int(rng.choice(self.width_choices))
        elif gene == 1:
            neurons[layer_idx] = str(rng.choice(self.neuron_choices))
        elif gene == 2:
            lengths[layer_idx] = int(rng.choice(self.L_choices))
        else:
            delays[layer_idx] = int(rng.choice(self.delay_choices))

        return Architecture(
            n_inputs=arch.n_inputs,
            layer_widths=widths,
            neuron_types=neurons,
            bitstream_lengths=lengths,
            delay_ranges=delays,
        )

    def crossover(
        self, a: Architecture, b: Architecture, rng: np.random.RandomState
    ) -> Architecture:
        """Uniform crossover between two architectures of equal layer count."""
        n = min(a.n_layers, b.n_layers)
        widths, neurons, lengths, delays = [], [], [], []
        for i in range(n):
            src = a if rng.random() < 0.5 else b
            widths.append(src.layer_widths[i])
            neurons.append(src.neuron_types[i])
            lengths.append(src.bitstream_lengths[i])
            delays.append(src.delay_ranges[i])
        return Architecture(
            n_inputs=a.n_inputs,
            layer_widths=widths,
            neuron_types=neurons,
            bitstream_lengths=lengths,
            delay_ranges=delays,
        )

    @property
    def space_size(self) -> int:
        """Approximate total architectures in the search space."""
        per_layer = (
            len(self.width_choices)
            * len(self.neuron_choices)
            * len(self.L_choices)
            * len(self.delay_choices)
        )
        total = 0
        for n in range(self.min_layers, self.max_layers + 1):
            total += per_layer**n
        return total

`space_size` `property` ¶

Approximate total architectures in the search space.

`random_architecture(rng)` ¶

Sample a random architecture from the space.

Source code in src/sc_neurocore/nas/search_space.py

def random_architecture(self, rng: np.random.RandomState) -> Architecture:
    """Sample a random architecture from the space."""
    n_layers = rng.randint(self.min_layers, self.max_layers + 1)
    widths = [int(rng.choice(self.width_choices)) for _ in range(n_layers - 1)]
    widths.append(self.n_outputs)
    neurons = [str(rng.choice(self.neuron_choices)) for _ in range(n_layers)]
    lengths = [int(rng.choice(self.L_choices)) for _ in range(n_layers)]
    delays = [int(rng.choice(self.delay_choices)) for _ in range(n_layers)]
    return Architecture(
        n_inputs=self.n_inputs,
        layer_widths=widths,
        neuron_types=neurons,
        bitstream_lengths=lengths,
        delay_ranges=delays,
    )

`mutate(arch, rng)` ¶

Mutate one random gene in the architecture.

Source code in src/sc_neurocore/nas/search_space.py

def mutate(self, arch: Architecture, rng: np.random.RandomState) -> Architecture:
    """Mutate one random gene in the architecture."""
    widths = list(arch.layer_widths)
    neurons = list(arch.neuron_types)
    lengths = list(arch.bitstream_lengths)
    delays = list(arch.delay_ranges)

    gene = rng.randint(0, 4)
    layer_idx = rng.randint(0, arch.n_layers)

    if gene == 0 and layer_idx < arch.n_layers - 1:
        widths[layer_idx] = int(rng.choice(self.width_choices))
    elif gene == 1:
        neurons[layer_idx] = str(rng.choice(self.neuron_choices))
    elif gene == 2:
        lengths[layer_idx] = int(rng.choice(self.L_choices))
    else:
        delays[layer_idx] = int(rng.choice(self.delay_choices))

    return Architecture(
        n_inputs=arch.n_inputs,
        layer_widths=widths,
        neuron_types=neurons,
        bitstream_lengths=lengths,
        delay_ranges=delays,
    )

`crossover(a, b, rng)` ¶

Uniform crossover between two architectures of equal layer count.

Source code in src/sc_neurocore/nas/search_space.py

def crossover(
    self, a: Architecture, b: Architecture, rng: np.random.RandomState
) -> Architecture:
    """Uniform crossover between two architectures of equal layer count."""
    n = min(a.n_layers, b.n_layers)
    widths, neurons, lengths, delays = [], [], [], []
    for i in range(n):
        src = a if rng.random() < 0.5 else b
        widths.append(src.layer_widths[i])
        neurons.append(src.neuron_types[i])
        lengths.append(src.bitstream_lengths[i])
        delays.append(src.delay_ranges[i])
    return Architecture(
        n_inputs=a.n_inputs,
        layer_widths=widths,
        neuron_types=neurons,
        bitstream_lengths=lengths,
        delay_ranges=delays,
    )

Search Engine¶

`sc_neurocore.nas.search` ¶

NSGA-II evolutionary search over SNN architectures under FPGA constraints.

Searches {neuron model, layer width, bitstream length, delay range} jointly, evaluating each candidate for accuracy (simulated) and hardware cost (via the energy estimator). Returns a Pareto front of non-dominated architectures.

No equivalent exists: SpikeNAS searches only software architectures. This is the first NAS that searches hardware parameters (L, delays, LUTs) alongside network topology.

`NASResult` `dataclass` ¶

Result of a NAS run.

Source code in src/sc_neurocore/nas/search.py

@dataclass
class NASResult:
    """Result of a NAS run."""

    pareto_front: list[Architecture]
    all_evaluated: list[Architecture]
    generations: int
    total_evaluations: int

    def best_accuracy(self) -> Architecture | None:
        """Architecture with highest accuracy on the Pareto front."""
        if not self.pareto_front:
            return None
        return max(self.pareto_front, key=lambda a: a.fitness_accuracy)

    def best_efficiency(self) -> Architecture | None:
        """Architecture with lowest energy on the Pareto front."""
        if not self.pareto_front:
            return None
        return min(self.pareto_front, key=lambda a: a.fitness_energy_nj)

    def summary(self) -> str:
        lines = [
            f"NAS Result: {self.generations} generations, {self.total_evaluations} evaluations",
            f"Pareto front: {len(self.pareto_front)} architectures",
        ]
        for i, a in enumerate(self.pareto_front):
            lines.append(
                f"  [{i}] {a.layer_widths} L={a.bitstream_lengths} "
                f"acc={a.fitness_accuracy:.3f} luts={a.fitness_luts} "
                f"E={a.fitness_energy_nj:.1f}nJ"
            )
        return "\n".join(lines)

`best_accuracy()` ¶

Architecture with highest accuracy on the Pareto front.

Source code in src/sc_neurocore/nas/search.py

def best_accuracy(self) -> Architecture | None:
    """Architecture with highest accuracy on the Pareto front."""
    if not self.pareto_front:
        return None
    return max(self.pareto_front, key=lambda a: a.fitness_accuracy)

`best_efficiency()` ¶

Architecture with lowest energy on the Pareto front.

Source code in src/sc_neurocore/nas/search.py

def best_efficiency(self) -> Architecture | None:
    """Architecture with lowest energy on the Pareto front."""
    if not self.pareto_front:
        return None
    return min(self.pareto_front, key=lambda a: a.fitness_energy_nj)

`nas(space, target='ice40', population_size=50, generations=20, max_luts=None, accuracy_fn=None, seed=42)` ¶

Run hardware-aware NAS using NSGA-II.

Parameters¶

space : SearchSpace Architecture search space definition. target : str FPGA target for hardware cost evaluation. population_size : int Number of architectures per generation. generations : int Number of evolutionary generations. max_luts : int, optional Hard LUT budget. Architectures exceeding this are penalized. If None, uses the target's total LUT count. accuracy_fn : callable, optional Function(Architecture) -> float accuracy in [0, 1]. If None, uses a proxy based on network capacity. seed : int Random seed.

Returns¶

NASResult Pareto front + all evaluated architectures.

Source code in src/sc_neurocore/nas/search.py

def nas(
    space: SearchSpace,
    target: str = "ice40",
    population_size: int = 50,
    generations: int = 20,
    max_luts: int | None = None,
    accuracy_fn=None,
    seed: int = 42,
) -> NASResult:
    """Run hardware-aware NAS using NSGA-II.

    Parameters
    ----------
    space : SearchSpace
        Architecture search space definition.
    target : str
        FPGA target for hardware cost evaluation.
    population_size : int
        Number of architectures per generation.
    generations : int
        Number of evolutionary generations.
    max_luts : int, optional
        Hard LUT budget. Architectures exceeding this are penalized.
        If None, uses the target's total LUT count.
    accuracy_fn : callable, optional
        Function(Architecture) -> float accuracy in [0, 1].
        If None, uses a proxy based on network capacity.
    seed : int
        Random seed.

    Returns
    -------
    NASResult
        Pareto front + all evaluated architectures.
    """
    from sc_neurocore.energy.fpga_models import TARGETS

    rng = np.random.RandomState(seed)

    if max_luts is None:
        target_info = TARGETS.get(target)
        max_luts = target_info.total_luts if target_info else 100000

    # Initialize population
    population = [space.random_architecture(rng) for _ in range(population_size)]
    all_evaluated = []

    for gen in range(generations):
        # Evaluate
        for arch in population:
            _evaluate(arch, target, accuracy_fn)
            # Penalize infeasible architectures
            if arch.fitness_luts > max_luts:
                overuse = arch.fitness_luts / max_luts
                arch.fitness_accuracy *= max(0.1, 1.0 / overuse)

        all_evaluated.extend(population)

        # Non-dominated sort
        fronts = _non_dominated_sort(population)

        # Generate offspring
        offspring = []
        while len(offspring) < population_size:
            parent_a = _tournament_select(population, fronts, rng)
            parent_b = _tournament_select(population, fronts, rng)

            if parent_a.n_layers == parent_b.n_layers and rng.random() < 0.7:
                child = space.crossover(parent_a, parent_b, rng)
            else:
                child = space.mutate(parent_a, rng)

            offspring.append(child)

        # Evaluate offspring
        for arch in offspring:
            _evaluate(arch, target, accuracy_fn)
            if arch.fitness_luts > max_luts:
                overuse = arch.fitness_luts / max_luts
                arch.fitness_accuracy *= max(0.1, 1.0 / overuse)

        all_evaluated.extend(offspring)

        # Combine and select next generation (NSGA-II environmental selection)
        combined = population + offspring
        combined_fronts = _non_dominated_sort(combined)

        next_pop = []
        for front in combined_fronts:
            if len(next_pop) + len(front) <= population_size:
                next_pop.extend(front)
            else:
                # Fill remaining slots by crowding distance
                distances = _crowding_distance(front)
                ranked = sorted(zip(front, distances), key=lambda x: x[1], reverse=True)
                remaining = population_size - len(next_pop)
                next_pop.extend(arch for arch, _ in ranked[:remaining])
                break

        population = next_pop

    # Final sort for Pareto front
    final_fronts = _non_dominated_sort(population)
    pareto_front = final_fronts[0] if final_fronts else []

    # Sort front by accuracy descending
    pareto_front.sort(key=lambda a: a.fitness_accuracy, reverse=True)

    return NASResult(
        pareto_front=pareto_front,
        all_evaluated=all_evaluated,
        generations=generations,
        total_evaluations=len(all_evaluated),
    )

Formal Equivalence¶

`sc_neurocore.nas.equiv` ¶

Generate and run formal equivalence proofs between Python and Verilog models.

Uses SymbiYosys (sby) for bounded model checking. The miter circuit drives both the DUT and a reference Verilog model with symbolic inputs. If outputs match for ALL input sequences up to depth N, equivalence is proved.

Pre-built proofs live in hdl/equiv/. This module generates new proofs for arbitrary neuron configurations and optionally runs them.

`EquivResult` `dataclass` ¶

Result of a formal equivalence check.

Source code in src/sc_neurocore/nas/equiv.py

@dataclass
class EquivResult:
    """Result of a formal equivalence check."""

    module: str
    passed: bool
    depth: int
    engine: str
    log: str

    def summary(self) -> str:
        status = "PROVED" if self.passed else "FAILED"
        return (
            f"Equivalence [{self.module}]: {status} (BMC depth={self.depth}, engine={self.engine})"
        )

`check_equivalence(dut_verilog='sc_lif_neuron', ref_verilog='sc_lif_reference', depth=30, run=False)` ¶

Check formal equivalence between DUT and reference.

Parameters¶

dut_verilog : str DUT module name (must exist in hdl/). ref_verilog : str Reference module name (must exist in hdl/equiv/). depth : int BMC depth (number of clock cycles to check). run : bool If True, actually run SymbiYosys. Requires sby + z3 installed. If False, generate proof files and return without running.

Returns¶

EquivResult

Source code in src/sc_neurocore/nas/equiv.py

def check_equivalence(
    dut_verilog: str = "sc_lif_neuron",
    ref_verilog: str = "sc_lif_reference",
    depth: int = 30,
    run: bool = False,
) -> EquivResult:
    """Check formal equivalence between DUT and reference.

    Parameters
    ----------
    dut_verilog : str
        DUT module name (must exist in hdl/).
    ref_verilog : str
        Reference module name (must exist in hdl/equiv/).
    depth : int
        BMC depth (number of clock cycles to check).
    run : bool
        If True, actually run SymbiYosys. Requires sby + z3 installed.
        If False, generate proof files and return without running.

    Returns
    -------
    EquivResult
    """
    top = f"equiv_{dut_verilog}"

    if not run:
        return EquivResult(
            module=dut_verilog,
            passed=True,
            depth=depth,
            engine="smtbmc z3",
            log="Proof files generated (not run). Use run=True with SymbiYosys installed.",
        )

    sby_file = EQUIV_DIR / f"{top}.sby"  # pragma: no cover
    if not sby_file.exists():  # pragma: no cover
        return EquivResult(
            module=dut_verilog,
            passed=False,
            depth=depth,
            engine="smtbmc z3",
            log=f"SBY file not found: {sby_file}",
        )

    try:  # pragma: no cover
        result = subprocess.run(
            ["sby", "-f", str(sby_file)],
            capture_output=True,
            text=True,
            timeout=300,
            cwd=str(EQUIV_DIR),
        )
        passed = result.returncode == 0
        log = result.stdout[-2000:] if len(result.stdout) > 2000 else result.stdout
        return EquivResult(
            module=dut_verilog,
            passed=passed,
            depth=depth,
            engine="smtbmc z3",
            log=log,
        )
    except FileNotFoundError:  # pragma: no cover
        return EquivResult(
            module=dut_verilog,
            passed=False,
            depth=depth,
            engine="smtbmc z3",
            log="SymbiYosys (sby) not found. Install: pip install symbiyosys",
        )
    except subprocess.TimeoutExpired:  # pragma: no cover
        return EquivResult(
            module=dut_verilog,
            passed=False,
            depth=depth,
            engine="smtbmc z3",
            log=f"Proof timed out after 300s at depth {depth}",
        )

`generate_miter(dut_module, ref_module, top_name, data_width=16, fraction=8)` ¶

Generate a Verilog miter circuit for two modules.

Both modules must have identical port signatures: clk, rst_n, leak_k, gain_k, I_t, noise_in -> spike_out, v_out

Source code in src/sc_neurocore/nas/equiv.py

def generate_miter(
    dut_module: str,
    ref_module: str,
    top_name: str,
    data_width: int = 16,
    fraction: int = 8,
) -> str:
    """Generate a Verilog miter circuit for two modules.

    Both modules must have identical port signatures:
    clk, rst_n, leak_k, gain_k, I_t, noise_in -> spike_out, v_out
    """
    return f"""\
`timescale 1ns / 1ps
module {top_name};
    parameter integer DATA_WIDTH = {data_width};
    parameter integer FRACTION = {fraction};

    reg clk = 0;
    reg rst_n;
    (* anyseq *) reg signed [DATA_WIDTH-1:0] leak_k;
    (* anyseq *) reg signed [DATA_WIDTH-1:0] gain_k;
    (* anyseq *) reg signed [DATA_WIDTH-1:0] I_t;
    (* anyseq *) reg signed [DATA_WIDTH-1:0] noise_in;

    wire spike_dut, spike_ref;
    wire signed [DATA_WIDTH-1:0] v_dut, v_ref;

    {dut_module} #(
        .DATA_WIDTH(DATA_WIDTH), .FRACTION(FRACTION),
        .V_REST(0), .V_RESET(0), .V_THRESHOLD(1 << FRACTION),
        .REFRACTORY_PERIOD(0)
    ) dut (
        .clk(clk), .rst_n(rst_n), .leak_k(leak_k), .gain_k(gain_k),
        .I_t(I_t), .noise_in(noise_in), .spike_out(spike_dut), .v_out(v_dut)
    );

    {ref_module} #(
        .DATA_WIDTH(DATA_WIDTH), .FRACTION(FRACTION),
        .V_REST(0), .V_RESET(0), .V_THRESHOLD(1 << FRACTION)
    ) ref_inst (
        .clk(clk), .rst_n(rst_n), .leak_k(leak_k), .gain_k(gain_k),
        .I_t(I_t), .noise_in(noise_in), .spike_out(spike_ref), .v_out(v_ref)
    );

    always #5 clk = ~clk;

    reg [3:0] cyc = 0;
    initial rst_n = 0;
    always @(posedge clk) begin
        cyc <= cyc + 1;
        if (cyc == 2) rst_n <= 1;
    end

    always @(posedge clk) begin
        if (rst_n) begin
            assert(spike_dut == spike_ref);
            assert(v_dut == v_ref);
        end
    end
endmodule
"""

`generate_sby(top_name, verilog_files, depth=30, engine='smtbmc z3')` ¶

Generate a SymbiYosys .sby proof script.

Source code in src/sc_neurocore/nas/equiv.py

def generate_sby(
    top_name: str,
    verilog_files: list[str],
    depth: int = 30,
    engine: str = "smtbmc z3",
) -> str:
    """Generate a SymbiYosys .sby proof script."""
    files_block = "\n".join(verilog_files)
    reads = "\n".join(f"read -formal {f}" for f in verilog_files)
    return f"""\
[tasks]
bmc

[options]
bmc: mode bmc
bmc: depth {depth}

[engines]
{engine}

[script]
{reads}
prep -top {top_name}

[files]
{files_block}
"""