Skip to content

Hardware-Aware SNN NAS

NSGA-II evolutionary search over SNN architectures under FPGA resource budgets.

Searches {neuron model, layer width, bitstream length, delay range} jointly — the first NAS that optimizes hardware parameters alongside topology.

Search Space

sc_neurocore.nas.search_space

Define the architecture search space for hardware-aware SNN NAS.

Search dimensions
  • n_layers: number of hidden layers
  • widths: neurons per layer
  • neuron_type: per-layer neuron model
  • bitstream_length: per-layer SC precision (L)
  • delay_range: maximum synaptic delay per layer

Each architecture encodes one point in this joint space. FPGA constraints (LUT, BRAM budgets) prune infeasible points.

Architecture dataclass

One point in the NAS search space.

Source code in src/sc_neurocore/nas/search_space.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
@dataclass
class Architecture:
    """One point in the NAS search space."""

    n_inputs: int
    layer_widths: list[int]
    neuron_types: list[str]
    bitstream_lengths: list[int]
    delay_ranges: list[int]
    fitness_accuracy: float = 0.0
    fitness_luts: int = 0
    fitness_energy_nj: float = 0.0
    dominates_count: int = 0

    @property
    def n_layers(self) -> int:
        return len(self.layer_widths)

    @property
    def layer_sizes(self) -> list[tuple[int, int]]:
        sizes = []
        prev = self.n_inputs
        for w in self.layer_widths:
            sizes.append((prev, w))
            prev = w
        return sizes

    @property
    def total_params(self) -> int:
        return sum(n_in * n_out for n_in, n_out in self.layer_sizes)

SearchSpace dataclass

Configurable NAS search space.

Parameters

n_inputs : int Input dimension. n_outputs : int Output dimension (width of final layer). min_layers, max_layers : int Range of hidden layer count. width_choices : list of int Candidate widths per layer. neuron_choices : list of str Candidate neuron models. L_choices : list of int Candidate bitstream lengths. delay_choices : list of int Candidate max-delay values.

Source code in src/sc_neurocore/nas/search_space.py
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
@dataclass
class SearchSpace:
    """Configurable NAS search space.

    Parameters
    ----------
    n_inputs : int
        Input dimension.
    n_outputs : int
        Output dimension (width of final layer).
    min_layers, max_layers : int
        Range of hidden layer count.
    width_choices : list of int
        Candidate widths per layer.
    neuron_choices : list of str
        Candidate neuron models.
    L_choices : list of int
        Candidate bitstream lengths.
    delay_choices : list of int
        Candidate max-delay values.
    """

    n_inputs: int
    n_outputs: int
    min_layers: int = 1
    max_layers: int = 4
    width_choices: list[int] = field(default_factory=lambda: list(WIDTH_CHOICES))
    neuron_choices: list[str] = field(default_factory=lambda: list(NEURON_CHOICES))
    L_choices: list[int] = field(default_factory=lambda: list(L_CHOICES))
    delay_choices: list[int] = field(default_factory=lambda: list(DELAY_CHOICES))

    def random_architecture(self, rng: np.random.RandomState) -> Architecture:
        """Sample a random architecture from the space."""
        n_layers = rng.randint(self.min_layers, self.max_layers + 1)
        widths = [int(rng.choice(self.width_choices)) for _ in range(n_layers - 1)]
        widths.append(self.n_outputs)
        neurons = [str(rng.choice(self.neuron_choices)) for _ in range(n_layers)]
        lengths = [int(rng.choice(self.L_choices)) for _ in range(n_layers)]
        delays = [int(rng.choice(self.delay_choices)) for _ in range(n_layers)]
        return Architecture(
            n_inputs=self.n_inputs,
            layer_widths=widths,
            neuron_types=neurons,
            bitstream_lengths=lengths,
            delay_ranges=delays,
        )

    def mutate(self, arch: Architecture, rng: np.random.RandomState) -> Architecture:
        """Mutate one random gene in the architecture."""
        widths = list(arch.layer_widths)
        neurons = list(arch.neuron_types)
        lengths = list(arch.bitstream_lengths)
        delays = list(arch.delay_ranges)

        gene = rng.randint(0, 4)
        layer_idx = rng.randint(0, arch.n_layers)

        if gene == 0 and layer_idx < arch.n_layers - 1:
            widths[layer_idx] = int(rng.choice(self.width_choices))
        elif gene == 1:
            neurons[layer_idx] = str(rng.choice(self.neuron_choices))
        elif gene == 2:
            lengths[layer_idx] = int(rng.choice(self.L_choices))
        else:
            delays[layer_idx] = int(rng.choice(self.delay_choices))

        return Architecture(
            n_inputs=arch.n_inputs,
            layer_widths=widths,
            neuron_types=neurons,
            bitstream_lengths=lengths,
            delay_ranges=delays,
        )

    def crossover(
        self, a: Architecture, b: Architecture, rng: np.random.RandomState
    ) -> Architecture:
        """Uniform crossover between two architectures of equal layer count."""
        n = min(a.n_layers, b.n_layers)
        widths, neurons, lengths, delays = [], [], [], []
        for i in range(n):
            src = a if rng.random() < 0.5 else b
            widths.append(src.layer_widths[i])
            neurons.append(src.neuron_types[i])
            lengths.append(src.bitstream_lengths[i])
            delays.append(src.delay_ranges[i])
        return Architecture(
            n_inputs=a.n_inputs,
            layer_widths=widths,
            neuron_types=neurons,
            bitstream_lengths=lengths,
            delay_ranges=delays,
        )

    @property
    def space_size(self) -> int:
        """Approximate total architectures in the search space."""
        per_layer = (
            len(self.width_choices)
            * len(self.neuron_choices)
            * len(self.L_choices)
            * len(self.delay_choices)
        )
        total = 0
        for n in range(self.min_layers, self.max_layers + 1):
            total += per_layer**n
        return total

space_size property

Approximate total architectures in the search space.

random_architecture(rng)

Sample a random architecture from the space.

Source code in src/sc_neurocore/nas/search_space.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
def random_architecture(self, rng: np.random.RandomState) -> Architecture:
    """Sample a random architecture from the space."""
    n_layers = rng.randint(self.min_layers, self.max_layers + 1)
    widths = [int(rng.choice(self.width_choices)) for _ in range(n_layers - 1)]
    widths.append(self.n_outputs)
    neurons = [str(rng.choice(self.neuron_choices)) for _ in range(n_layers)]
    lengths = [int(rng.choice(self.L_choices)) for _ in range(n_layers)]
    delays = [int(rng.choice(self.delay_choices)) for _ in range(n_layers)]
    return Architecture(
        n_inputs=self.n_inputs,
        layer_widths=widths,
        neuron_types=neurons,
        bitstream_lengths=lengths,
        delay_ranges=delays,
    )

mutate(arch, rng)

Mutate one random gene in the architecture.

Source code in src/sc_neurocore/nas/search_space.py
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
def mutate(self, arch: Architecture, rng: np.random.RandomState) -> Architecture:
    """Mutate one random gene in the architecture."""
    widths = list(arch.layer_widths)
    neurons = list(arch.neuron_types)
    lengths = list(arch.bitstream_lengths)
    delays = list(arch.delay_ranges)

    gene = rng.randint(0, 4)
    layer_idx = rng.randint(0, arch.n_layers)

    if gene == 0 and layer_idx < arch.n_layers - 1:
        widths[layer_idx] = int(rng.choice(self.width_choices))
    elif gene == 1:
        neurons[layer_idx] = str(rng.choice(self.neuron_choices))
    elif gene == 2:
        lengths[layer_idx] = int(rng.choice(self.L_choices))
    else:
        delays[layer_idx] = int(rng.choice(self.delay_choices))

    return Architecture(
        n_inputs=arch.n_inputs,
        layer_widths=widths,
        neuron_types=neurons,
        bitstream_lengths=lengths,
        delay_ranges=delays,
    )

crossover(a, b, rng)

Uniform crossover between two architectures of equal layer count.

Source code in src/sc_neurocore/nas/search_space.py
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
def crossover(
    self, a: Architecture, b: Architecture, rng: np.random.RandomState
) -> Architecture:
    """Uniform crossover between two architectures of equal layer count."""
    n = min(a.n_layers, b.n_layers)
    widths, neurons, lengths, delays = [], [], [], []
    for i in range(n):
        src = a if rng.random() < 0.5 else b
        widths.append(src.layer_widths[i])
        neurons.append(src.neuron_types[i])
        lengths.append(src.bitstream_lengths[i])
        delays.append(src.delay_ranges[i])
    return Architecture(
        n_inputs=a.n_inputs,
        layer_widths=widths,
        neuron_types=neurons,
        bitstream_lengths=lengths,
        delay_ranges=delays,
    )

Search Engine

sc_neurocore.nas.search

NSGA-II evolutionary search over SNN architectures under FPGA constraints.

Searches {neuron model, layer width, bitstream length, delay range} jointly, evaluating each candidate for accuracy (simulated) and hardware cost (via the energy estimator). Returns a Pareto front of non-dominated architectures.

No equivalent exists: SpikeNAS searches only software architectures. This is the first NAS that searches hardware parameters (L, delays, LUTs) alongside network topology.

NASResult dataclass

Result of a NAS run.

Source code in src/sc_neurocore/nas/search.py
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
@dataclass
class NASResult:
    """Result of a NAS run."""

    pareto_front: list[Architecture]
    all_evaluated: list[Architecture]
    generations: int
    total_evaluations: int

    def best_accuracy(self) -> Architecture | None:
        """Architecture with highest accuracy on the Pareto front."""
        if not self.pareto_front:
            return None
        return max(self.pareto_front, key=lambda a: a.fitness_accuracy)

    def best_efficiency(self) -> Architecture | None:
        """Architecture with lowest energy on the Pareto front."""
        if not self.pareto_front:
            return None
        return min(self.pareto_front, key=lambda a: a.fitness_energy_nj)

    def summary(self) -> str:
        lines = [
            f"NAS Result: {self.generations} generations, {self.total_evaluations} evaluations",
            f"Pareto front: {len(self.pareto_front)} architectures",
        ]
        for i, a in enumerate(self.pareto_front):
            lines.append(
                f"  [{i}] {a.layer_widths} L={a.bitstream_lengths} "
                f"acc={a.fitness_accuracy:.3f} luts={a.fitness_luts} "
                f"E={a.fitness_energy_nj:.1f}nJ"
            )
        return "\n".join(lines)

best_accuracy()

Architecture with highest accuracy on the Pareto front.

Source code in src/sc_neurocore/nas/search.py
39
40
41
42
43
def best_accuracy(self) -> Architecture | None:
    """Architecture with highest accuracy on the Pareto front."""
    if not self.pareto_front:
        return None
    return max(self.pareto_front, key=lambda a: a.fitness_accuracy)

best_efficiency()

Architecture with lowest energy on the Pareto front.

Source code in src/sc_neurocore/nas/search.py
45
46
47
48
49
def best_efficiency(self) -> Architecture | None:
    """Architecture with lowest energy on the Pareto front."""
    if not self.pareto_front:
        return None
    return min(self.pareto_front, key=lambda a: a.fitness_energy_nj)

nas(space, target='ice40', population_size=50, generations=20, max_luts=None, accuracy_fn=None, seed=42)

Run hardware-aware NAS using NSGA-II.

Parameters

space : SearchSpace Architecture search space definition. target : str FPGA target for hardware cost evaluation. population_size : int Number of architectures per generation. generations : int Number of evolutionary generations. max_luts : int, optional Hard LUT budget. Architectures exceeding this are penalized. If None, uses the target's total LUT count. accuracy_fn : callable, optional Function(Architecture) -> float accuracy in [0, 1]. If None, uses a proxy based on network capacity. seed : int Random seed.

Returns

NASResult Pareto front + all evaluated architectures.

Source code in src/sc_neurocore/nas/search.py
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
def nas(
    space: SearchSpace,
    target: str = "ice40",
    population_size: int = 50,
    generations: int = 20,
    max_luts: int | None = None,
    accuracy_fn=None,
    seed: int = 42,
) -> NASResult:
    """Run hardware-aware NAS using NSGA-II.

    Parameters
    ----------
    space : SearchSpace
        Architecture search space definition.
    target : str
        FPGA target for hardware cost evaluation.
    population_size : int
        Number of architectures per generation.
    generations : int
        Number of evolutionary generations.
    max_luts : int, optional
        Hard LUT budget. Architectures exceeding this are penalized.
        If None, uses the target's total LUT count.
    accuracy_fn : callable, optional
        Function(Architecture) -> float accuracy in [0, 1].
        If None, uses a proxy based on network capacity.
    seed : int
        Random seed.

    Returns
    -------
    NASResult
        Pareto front + all evaluated architectures.
    """
    from sc_neurocore.energy.fpga_models import TARGETS

    rng = np.random.RandomState(seed)

    if max_luts is None:
        target_info = TARGETS.get(target)
        max_luts = target_info.total_luts if target_info else 100000

    # Initialize population
    population = [space.random_architecture(rng) for _ in range(population_size)]
    all_evaluated = []

    for gen in range(generations):
        # Evaluate
        for arch in population:
            _evaluate(arch, target, accuracy_fn)
            # Penalize infeasible architectures
            if arch.fitness_luts > max_luts:
                overuse = arch.fitness_luts / max_luts
                arch.fitness_accuracy *= max(0.1, 1.0 / overuse)

        all_evaluated.extend(population)

        # Non-dominated sort
        fronts = _non_dominated_sort(population)

        # Generate offspring
        offspring = []
        while len(offspring) < population_size:
            parent_a = _tournament_select(population, fronts, rng)
            parent_b = _tournament_select(population, fronts, rng)

            if parent_a.n_layers == parent_b.n_layers and rng.random() < 0.7:
                child = space.crossover(parent_a, parent_b, rng)
            else:
                child = space.mutate(parent_a, rng)

            offspring.append(child)

        # Evaluate offspring
        for arch in offspring:
            _evaluate(arch, target, accuracy_fn)
            if arch.fitness_luts > max_luts:
                overuse = arch.fitness_luts / max_luts
                arch.fitness_accuracy *= max(0.1, 1.0 / overuse)

        all_evaluated.extend(offspring)

        # Combine and select next generation (NSGA-II environmental selection)
        combined = population + offspring
        combined_fronts = _non_dominated_sort(combined)

        next_pop = []
        for front in combined_fronts:
            if len(next_pop) + len(front) <= population_size:
                next_pop.extend(front)
            else:
                # Fill remaining slots by crowding distance
                distances = _crowding_distance(front)
                ranked = sorted(zip(front, distances), key=lambda x: x[1], reverse=True)
                remaining = population_size - len(next_pop)
                next_pop.extend(arch for arch, _ in ranked[:remaining])
                break

        population = next_pop

    # Final sort for Pareto front
    final_fronts = _non_dominated_sort(population)
    pareto_front = final_fronts[0] if final_fronts else []

    # Sort front by accuracy descending
    pareto_front.sort(key=lambda a: a.fitness_accuracy, reverse=True)

    return NASResult(
        pareto_front=pareto_front,
        all_evaluated=all_evaluated,
        generations=generations,
        total_evaluations=len(all_evaluated),
    )

Formal Equivalence

sc_neurocore.nas.equiv

Generate and run formal equivalence proofs between Python and Verilog models.

Uses SymbiYosys (sby) for bounded model checking. The miter circuit drives both the DUT and a reference Verilog model with symbolic inputs. If outputs match for ALL input sequences up to depth N, equivalence is proved.

Pre-built proofs live in hdl/equiv/. This module generates new proofs for arbitrary neuron configurations and optionally runs them.

EquivResult dataclass

Result of a formal equivalence check.

Source code in src/sc_neurocore/nas/equiv.py
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
@dataclass
class EquivResult:
    """Result of a formal equivalence check."""

    module: str
    passed: bool
    depth: int
    engine: str
    log: str

    def summary(self) -> str:
        status = "PROVED" if self.passed else "FAILED"
        return (
            f"Equivalence [{self.module}]: {status} (BMC depth={self.depth}, engine={self.engine})"
        )

check_equivalence(dut_verilog='sc_lif_neuron', ref_verilog='sc_lif_reference', depth=30, run=False)

Check formal equivalence between DUT and reference.

Parameters

dut_verilog : str DUT module name (must exist in hdl/). ref_verilog : str Reference module name (must exist in hdl/equiv/). depth : int BMC depth (number of clock cycles to check). run : bool If True, actually run SymbiYosys. Requires sby + z3 installed. If False, generate proof files and return without running.

Returns

EquivResult

Source code in src/sc_neurocore/nas/equiv.py
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
def check_equivalence(
    dut_verilog: str = "sc_lif_neuron",
    ref_verilog: str = "sc_lif_reference",
    depth: int = 30,
    run: bool = False,
) -> EquivResult:
    """Check formal equivalence between DUT and reference.

    Parameters
    ----------
    dut_verilog : str
        DUT module name (must exist in hdl/).
    ref_verilog : str
        Reference module name (must exist in hdl/equiv/).
    depth : int
        BMC depth (number of clock cycles to check).
    run : bool
        If True, actually run SymbiYosys. Requires sby + z3 installed.
        If False, generate proof files and return without running.

    Returns
    -------
    EquivResult
    """
    top = f"equiv_{dut_verilog}"

    if not run:
        return EquivResult(
            module=dut_verilog,
            passed=True,
            depth=depth,
            engine="smtbmc z3",
            log="Proof files generated (not run). Use run=True with SymbiYosys installed.",
        )

    sby_file = EQUIV_DIR / f"{top}.sby"  # pragma: no cover
    if not sby_file.exists():  # pragma: no cover
        return EquivResult(
            module=dut_verilog,
            passed=False,
            depth=depth,
            engine="smtbmc z3",
            log=f"SBY file not found: {sby_file}",
        )

    try:  # pragma: no cover
        result = subprocess.run(
            ["sby", "-f", str(sby_file)],
            capture_output=True,
            text=True,
            timeout=300,
            cwd=str(EQUIV_DIR),
        )
        passed = result.returncode == 0
        log = result.stdout[-2000:] if len(result.stdout) > 2000 else result.stdout
        return EquivResult(
            module=dut_verilog,
            passed=passed,
            depth=depth,
            engine="smtbmc z3",
            log=log,
        )
    except FileNotFoundError:  # pragma: no cover
        return EquivResult(
            module=dut_verilog,
            passed=False,
            depth=depth,
            engine="smtbmc z3",
            log="SymbiYosys (sby) not found. Install: pip install symbiyosys",
        )
    except subprocess.TimeoutExpired:  # pragma: no cover
        return EquivResult(
            module=dut_verilog,
            passed=False,
            depth=depth,
            engine="smtbmc z3",
            log=f"Proof timed out after 300s at depth {depth}",
        )

generate_miter(dut_module, ref_module, top_name, data_width=16, fraction=8)

Generate a Verilog miter circuit for two modules.

Both modules must have identical port signatures: clk, rst_n, leak_k, gain_k, I_t, noise_in -> spike_out, v_out

Source code in src/sc_neurocore/nas/equiv.py
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
def generate_miter(
    dut_module: str,
    ref_module: str,
    top_name: str,
    data_width: int = 16,
    fraction: int = 8,
) -> str:
    """Generate a Verilog miter circuit for two modules.

    Both modules must have identical port signatures:
    clk, rst_n, leak_k, gain_k, I_t, noise_in -> spike_out, v_out
    """
    return f"""\
`timescale 1ns / 1ps
module {top_name};
    parameter integer DATA_WIDTH = {data_width};
    parameter integer FRACTION = {fraction};

    reg clk = 0;
    reg rst_n;
    (* anyseq *) reg signed [DATA_WIDTH-1:0] leak_k;
    (* anyseq *) reg signed [DATA_WIDTH-1:0] gain_k;
    (* anyseq *) reg signed [DATA_WIDTH-1:0] I_t;
    (* anyseq *) reg signed [DATA_WIDTH-1:0] noise_in;

    wire spike_dut, spike_ref;
    wire signed [DATA_WIDTH-1:0] v_dut, v_ref;

    {dut_module} #(
        .DATA_WIDTH(DATA_WIDTH), .FRACTION(FRACTION),
        .V_REST(0), .V_RESET(0), .V_THRESHOLD(1 << FRACTION),
        .REFRACTORY_PERIOD(0)
    ) dut (
        .clk(clk), .rst_n(rst_n), .leak_k(leak_k), .gain_k(gain_k),
        .I_t(I_t), .noise_in(noise_in), .spike_out(spike_dut), .v_out(v_dut)
    );

    {ref_module} #(
        .DATA_WIDTH(DATA_WIDTH), .FRACTION(FRACTION),
        .V_REST(0), .V_RESET(0), .V_THRESHOLD(1 << FRACTION)
    ) ref_inst (
        .clk(clk), .rst_n(rst_n), .leak_k(leak_k), .gain_k(gain_k),
        .I_t(I_t), .noise_in(noise_in), .spike_out(spike_ref), .v_out(v_ref)
    );

    always #5 clk = ~clk;

    reg [3:0] cyc = 0;
    initial rst_n = 0;
    always @(posedge clk) begin
        cyc <= cyc + 1;
        if (cyc == 2) rst_n <= 1;
    end

    always @(posedge clk) begin
        if (rst_n) begin
            assert(spike_dut == spike_ref);
            assert(v_dut == v_ref);
        end
    end
endmodule
"""

generate_sby(top_name, verilog_files, depth=30, engine='smtbmc z3')

Generate a SymbiYosys .sby proof script.

Source code in src/sc_neurocore/nas/equiv.py
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
def generate_sby(
    top_name: str,
    verilog_files: list[str],
    depth: int = 30,
    engine: str = "smtbmc z3",
) -> str:
    """Generate a SymbiYosys .sby proof script."""
    files_block = "\n".join(verilog_files)
    reads = "\n".join(f"read -formal {f}" for f in verilog_files)
    return f"""\
[tasks]
bmc

[options]
bmc: mode bmc
bmc: depth {depth}

[engines]
{engine}

[script]
{reads}
prep -top {top_name}

[files]
{files_block}
"""