Estimate FPGA resource usage, power, and energy before synthesis.
Takes an SNN architecture description (layer sizes, neuron types,
connectivity) and an FPGA target, returns estimated LUTs, BRAM,
dynamic power (mW), and energy per inference (nJ) in <1 second.
Calibrated against Yosys synth_ice40 reports for SC-NeuroCore HDL.
Accuracy target: within 20% of actual synthesis for our modules.
EnergyReport
dataclass
Complete pre-silicon energy estimate for an SNN on an FPGA target.
Source code in src/sc_neurocore/energy/estimator.py
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117 | @dataclass
class EnergyReport:
"""Complete pre-silicon energy estimate for an SNN on an FPGA target."""
target: str
layers: list[LayerEstimate]
total_luts: int = field(init=False)
total_ffs: int = field(init=False)
total_bram_kb: float = field(init=False)
infra_luts: int = 0
total_dynamic_power_mw: float = field(init=False)
total_latency_cycles: int = field(init=False)
energy_per_inference_nj: float = field(init=False)
clock_freq_mhz: float = 100.0
fits_on_target: bool = field(init=False)
utilization_pct: float = field(init=False)
def __post_init__(self):
self.total_luts = sum(l.luts for l in self.layers) + self.infra_luts
self.total_ffs = sum(l.ffs for l in self.layers)
self.total_bram_kb = sum(l.bram_bits for l in self.layers) / 8192.0
self.total_dynamic_power_mw = sum(l.dynamic_power_mw for l in self.layers)
self.total_latency_cycles = sum(l.latency_cycles for l in self.layers)
latency_s = self.total_latency_cycles / (self.clock_freq_mhz * 1e6)
self.energy_per_inference_nj = self.total_dynamic_power_mw * latency_s * 1e6
target_info = TARGETS.get(self.target)
if target_info:
self.fits_on_target = self.total_luts <= target_info.total_luts
self.utilization_pct = (self.total_luts / target_info.total_luts) * 100
else: # pragma: no cover — unknown target fallback
self.fits_on_target = True
self.utilization_pct = 0.0
def summary(self) -> str:
"""Human-readable summary."""
lines = [
f"SC-NeuroCore Energy Estimate — {self.target}",
f"{'=' * 55}",
"",
]
for layer in self.layers:
lines.append(
f" {layer.name}: {layer.n_inputs}->{layer.n_neurons} "
f"({layer.n_synapses} syn, L={layer.bitstream_length}) "
f"-> {layer.luts} LUTs, {layer.dynamic_power_mw:.2f} mW"
)
lines.extend(
[
"",
f" Infrastructure: {self.infra_luts} LUTs",
"",
f" Total LUTs: {self.total_luts:,}",
f" Total FFs: {self.total_ffs:,}",
f" Total BRAM: {self.total_bram_kb:.1f} KB",
f" Dynamic power: {self.total_dynamic_power_mw:.2f} mW",
f" Latency: {self.total_latency_cycles:,} cycles",
f" Energy/inf: {self.energy_per_inference_nj:.2f} nJ",
f" Clock: {self.clock_freq_mhz:.0f} MHz",
f" Utilization: {self.utilization_pct:.1f}%",
f" Fits on target: {'YES' if self.fits_on_target else 'NO — exceeds LUT budget'}",
]
)
return "\n".join(lines)
|
summary()
Human-readable summary.
Source code in src/sc_neurocore/energy/estimator.py
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117 | def summary(self) -> str:
"""Human-readable summary."""
lines = [
f"SC-NeuroCore Energy Estimate — {self.target}",
f"{'=' * 55}",
"",
]
for layer in self.layers:
lines.append(
f" {layer.name}: {layer.n_inputs}->{layer.n_neurons} "
f"({layer.n_synapses} syn, L={layer.bitstream_length}) "
f"-> {layer.luts} LUTs, {layer.dynamic_power_mw:.2f} mW"
)
lines.extend(
[
"",
f" Infrastructure: {self.infra_luts} LUTs",
"",
f" Total LUTs: {self.total_luts:,}",
f" Total FFs: {self.total_ffs:,}",
f" Total BRAM: {self.total_bram_kb:.1f} KB",
f" Dynamic power: {self.total_dynamic_power_mw:.2f} mW",
f" Latency: {self.total_latency_cycles:,} cycles",
f" Energy/inf: {self.energy_per_inference_nj:.2f} nJ",
f" Clock: {self.clock_freq_mhz:.0f} MHz",
f" Utilization: {self.utilization_pct:.1f}%",
f" Fits on target: {'YES' if self.fits_on_target else 'NO — exceeds LUT budget'}",
]
)
return "\n".join(lines)
|
LayerEstimate
dataclass
Resource estimate for one layer.
Source code in src/sc_neurocore/energy/estimator.py
37
38
39
40
41
42
43
44
45
46
47
48
49
50 | @dataclass
class LayerEstimate:
"""Resource estimate for one layer."""
name: str
n_inputs: int
n_neurons: int
n_synapses: int
bitstream_length: int
luts: int
ffs: int
bram_bits: int
dynamic_power_mw: float
latency_cycles: int
|
estimate(layer_sizes, target='ice40', bitstream_length=256, neuron_type='lif', event_driven=False, clock_mhz=100.0, include_infra=True)
Estimate FPGA resources and power for an SNN.
Parameters
layer_sizes : list of (n_inputs, n_neurons)
Architecture as list of layer dimensions.
target : str
FPGA target: 'ice40', 'ecp5', 'artix7', 'zynq'.
bitstream_length : int
SC bitstream length L (affects latency and precision).
neuron_type : str
'lif' (clock-driven) or 'event' (event-driven).
event_driven : bool
Use event-driven architecture (AER).
clock_mhz : float
Target clock frequency.
include_infra : bool
Include AXI/DMA infrastructure cost.
Returns
EnergyReport
Complete resource and power estimate.
Source code in src/sc_neurocore/energy/estimator.py
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219 | def estimate(
layer_sizes: list[tuple[int, int]],
target: str = "ice40",
bitstream_length: int = 256,
neuron_type: str = "lif",
event_driven: bool = False,
clock_mhz: float = 100.0,
include_infra: bool = True,
) -> EnergyReport:
"""Estimate FPGA resources and power for an SNN.
Parameters
----------
layer_sizes : list of (n_inputs, n_neurons)
Architecture as list of layer dimensions.
target : str
FPGA target: 'ice40', 'ecp5', 'artix7', 'zynq'.
bitstream_length : int
SC bitstream length L (affects latency and precision).
neuron_type : str
'lif' (clock-driven) or 'event' (event-driven).
event_driven : bool
Use event-driven architecture (AER).
clock_mhz : float
Target clock frequency.
include_infra : bool
Include AXI/DMA infrastructure cost.
Returns
-------
EnergyReport
Complete resource and power estimate.
"""
target_info = TARGETS.get(target)
if target_info is None:
raise ValueError(f"Unknown target '{target}'. Options: {list(TARGETS)}")
neuron_cost = EVENT_NEURON if event_driven else LIF_NEURON
layers = []
for i, (n_in, n_out) in enumerate(layer_sizes):
n_synapses = n_in * n_out
n_encoders = n_in
# LUT cost
luts_neurons = n_out * neuron_cost.luts
luts_synapses = n_synapses * SC_SYNAPSE.luts
luts_encoders = n_encoders * BITSTREAM_ENCODER.luts
# MUX trees for popcount: ~log2(n_in) LUTs per neuron
luts_mux = n_out * max(1, int(np.log2(max(n_in, 2))))
total_luts = luts_neurons + luts_synapses + luts_encoders + luts_mux
# FF cost
ffs = n_out * neuron_cost.ffs + n_encoders * BITSTREAM_ENCODER.ffs
# BRAM for weights (if too many for LUT registers)
bram_bits = 0
if n_synapses > 1024:
bram_bits = n_synapses * BRAM_BITS_PER_WEIGHT
# Latency: L cycles for SC computation + 2 cycles for neuron update
latency = bitstream_length + 2
# Dynamic power: C_eff × V² × f × N_gates × activity
# SC activity ~0.5 (random bitstreams toggle 50%)
activity = 0.1 if event_driven else 0.5
c_eff_f = target_info.c_eff_per_lut_ff * 1e-15
v_sq = target_info.voltage**2
freq = clock_mhz * 1e6
power_w = c_eff_f * v_sq * freq * total_luts * activity
power_mw = power_w * 1e3
layers.append(
LayerEstimate(
name=f"layer_{i}",
n_inputs=n_in,
n_neurons=n_out,
n_synapses=n_synapses,
bitstream_length=bitstream_length,
luts=total_luts,
ffs=ffs,
bram_bits=bram_bits,
dynamic_power_mw=power_mw,
latency_cycles=latency,
)
)
# Infrastructure cost
infra_luts = 0
if include_infra:
infra_luts = AXI_LITE.luts
if event_driven:
infra_luts += AER_ENCODER.luts + AER_ROUTER.luts
return EnergyReport(
target=target,
layers=layers,
infra_luts=infra_luts,
clock_freq_mhz=clock_mhz,
)
|