Skip to main content

sc_neurocore_engine/ir/
emit_sv.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Commercial license available
3// © Concepts 1996–2026 Miroslav Šotek. All rights reserved.
4// © Code 2020–2026 Miroslav Šotek. All rights reserved.
5// ORCID: 0009-0009-3560-0851
6// Contact: www.anulum.li | protoscience@anulum.li
7// SC-NeuroCore — SystemVerilog emitter for SC IR graphs
8
9//! SystemVerilog emitter for SC IR graphs.
10//!
11//! Produces synthesizable RTL that instantiates modules from `hdl/`.
12//!
13//! Generated module interface:
14//! - Clock: `clk`
15//! - Reset: `rst_n` (active-low)
16//! - One port per `sc.input` / `sc.output` operation
17//! - Internal wiring for all intermediate values
18
19use crate::ir::graph::*;
20use crate::ir::sv_target::{ResourceReport, SvTarget};
21
22/// Emit a synthesizable SystemVerilog module from an SC graph.
23///
24/// The graph should pass `verify::verify()` before emission.
25pub fn emit(graph: &ScGraph) -> Result<String, String> {
26    emit_systemverilog_with_target(graph, SvTarget::Generic).map(|(systemverilog, _)| systemverilog)
27}
28
29/// Emit a synthesizable SystemVerilog module and a resource estimate for a target.
30pub fn emit_systemverilog_with_target(
31    graph: &ScGraph,
32    target: SvTarget,
33) -> Result<(String, ResourceReport), String> {
34    let mut sv = String::new();
35
36    // Header
37    sv.push_str(&format!(
38        "// Auto-generated by SC-NeuroCore IR Compiler v3.0\n\
39         // Source graph: {}\n\
40         // Do not edit — regenerate from IR source.\n\n",
41        graph.name
42    ));
43    sv.push_str(&target.header_comment());
44    sv.push_str("`timescale 1ns / 1ps\n\n");
45
46    // Module declaration
47    sv.push_str(&format!("module {} (\n", graph.name));
48    sv.push_str("    input wire clk,\n");
49    sv.push_str("    input wire rst_n");
50
51    // Collect inputs and outputs for port list
52    for op in &graph.ops {
53        match op {
54            ScOp::Input { name, ty, .. } => {
55                let port_width = type_to_width(ty);
56                if port_width == 1 {
57                    sv.push_str(&format!(",\n    input wire {}", name));
58                } else {
59                    sv.push_str(&format!(
60                        ",\n    input wire [{}:0] {}",
61                        port_width - 1,
62                        name
63                    ));
64                }
65            }
66            ScOp::Output { name, source, .. } => {
67                let width = find_value_width(graph, *source);
68                if width == 1 {
69                    sv.push_str(&format!(",\n    output wire {}", name));
70                } else {
71                    sv.push_str(&format!(",\n    output wire [{}:0] {}", width - 1, name));
72                }
73            }
74            _ => {}
75        }
76    }
77    sv.push_str("\n);\n\n");
78
79    // Wire declarations for intermediate values
80    for op in &graph.ops {
81        match op {
82            ScOp::Input { .. } | ScOp::Output { .. } => {}
83            ScOp::Constant { id, value, .. } => emit_constant(&mut sv, *id, value, &target),
84            ScOp::Encode { id, .. } => {
85                sv.push_str(&format!("    wire v{};\n", id.0));
86            }
87            ScOp::BitwiseAnd { id, .. } => {
88                sv.push_str(&format!("    wire v{};\n", id.0));
89            }
90            ScOp::Popcount { id, .. } => {
91                sv.push_str(&format!("    logic [63:0] v{};\n", id.0));
92            }
93            ScOp::LifStep { id, params, .. } => {
94                sv.push_str(&format!(
95                    "    wire v{}_spike;\n    wire signed [{}:0] v{}_v_out;\n",
96                    id.0,
97                    params.data_width - 1,
98                    id.0
99                ));
100            }
101            ScOp::DenseForward { id, params, .. } => {
102                sv.push_str(&format!(
103                    "    wire [{}:0] v{}_spikes;\n    wire v{}_running;\n    wire v{}_done;\n",
104                    params.n_neurons - 1,
105                    id.0,
106                    id.0,
107                    id.0
108                ));
109            }
110            ScOp::DclsLayer { id, params, .. } => {
111                sv.push_str(&format!(
112                    "    wire signed [{}:0] v{};\n\
113                     \x20   wire signed [31:0] v{}_accumulator_q16_16;\n\
114                     \x20   wire v{}_valid;\n\
115                     \x20   wire v{}_overflow;\n\
116                     \x20   wire v{}_invalid_sigma;\n",
117                    params.data_width - 1,
118                    id.0,
119                    id.0,
120                    id.0,
121                    id.0,
122                    id.0
123                ));
124            }
125            ScOp::BitwiseXor { id, .. } => {
126                sv.push_str(&format!("    wire v{};\n", id.0));
127            }
128            ScOp::Reduce { id, .. } => {
129                sv.push_str(&format!("    wire [63:0] v{};\n", id.0));
130            }
131            ScOp::GraphForward { id, n_features, .. } => {
132                sv.push_str(&format!(
133                    "    wire [{}:0] v{};\n",
134                    n_features.saturating_sub(1),
135                    id.0
136                ));
137            }
138            ScOp::SoftmaxAttention { id, .. } => {
139                sv.push_str(&format!("    wire [63:0] v{};\n", id.0));
140            }
141            ScOp::KuramotoStep { id, .. } => {
142                sv.push_str(&format!("    wire [63:0] v{};\n", id.0));
143            }
144            ScOp::Scale { id, .. } | ScOp::Offset { id, .. } | ScOp::DivConst { id, .. } => {
145                sv.push_str(&format!("    wire [63:0] v{};\n", id.0));
146            }
147        }
148    }
149    sv.push('\n');
150
151    let mut inst_idx = 0_u32;
152
153    // Module instantiations
154    for op in &graph.ops {
155        match op {
156            ScOp::Encode { id, prob, seed, .. } => {
157                let prob_wire = value_to_wire(graph, *prob);
158                sv.push_str(&format!(
159                    "    sc_bitstream_encoder #(\n\
160                     \x20       .DATA_WIDTH(16),\n\
161                     \x20       .SEED_INIT(16'h{:04X})\n\
162                     \x20   ) u_enc_{} (\n\
163                     \x20       .clk(clk),\n\
164                     \x20       .rst_n(rst_n),\n\
165                     \x20       .x_value({}),\n\
166                     \x20       .t_index(32'd0),\n\
167                     \x20       .bit_out(v{})\n\
168                     \x20   );\n\n",
169                    seed, inst_idx, prob_wire, id.0
170                ));
171                inst_idx += 1;
172            }
173            ScOp::BitwiseAnd { id, lhs, rhs } => {
174                let lhs_wire = value_to_wire(graph, *lhs);
175                let rhs_wire = value_to_wire(graph, *rhs);
176                sv.push_str(&format!(
177                    "    sc_bitstream_synapse u_syn_{} (\n\
178                     \x20       .pre_bit({}),\n\
179                     \x20       .w_bit({}),\n\
180                     \x20       .post_bit(v{})\n\
181                     \x20   );\n\n",
182                    inst_idx, lhs_wire, rhs_wire, id.0
183                ));
184                inst_idx += 1;
185            }
186            ScOp::LifStep {
187                id,
188                current,
189                leak,
190                gain,
191                noise,
192                params,
193            } => {
194                let current_wire = value_to_wire(graph, *current);
195                let leak_wire = value_to_wire(graph, *leak);
196                let gain_wire = value_to_wire(graph, *gain);
197                let noise_wire = value_to_wire(graph, *noise);
198                emit_target_dsp_attribute(&mut sv, &target);
199                sv.push_str(&format!(
200                    "    sc_lif_neuron #(\n\
201                     \x20       .DATA_WIDTH({}),\n\
202                     \x20       .FRACTION({}),\n\
203                     \x20       .V_REST({}),\n\
204                     \x20       .V_RESET({}),\n\
205                     \x20       .V_THRESHOLD({}),\n\
206                     \x20       .REFRACTORY_PERIOD({})\n\
207                     \x20   ) u_lif_{} (\n\
208                     \x20       .clk(clk),\n\
209                     \x20       .rst_n(rst_n),\n\
210                     \x20       .leak_k({}),\n\
211                     \x20       .gain_k({}),\n\
212                     \x20       .I_t({}),\n\
213                     \x20       .noise_in({}),\n\
214                     \x20       .spike_out(v{}_spike),\n\
215                     \x20       .v_out(v{}_v_out)\n\
216                     \x20   );\n\n",
217                    params.data_width,
218                    params.fraction,
219                    params.v_rest,
220                    params.v_reset,
221                    params.v_threshold,
222                    params.refractory_period,
223                    inst_idx,
224                    leak_wire,
225                    gain_wire,
226                    current_wire,
227                    noise_wire,
228                    id.0,
229                    id.0
230                ));
231                inst_idx += 1;
232            }
233            ScOp::DenseForward {
234                id,
235                inputs,
236                weights,
237                leak,
238                gain,
239                params,
240            } => {
241                let inputs_wire = value_to_wire(graph, *inputs);
242                let weights_wire = value_to_wire(graph, *weights);
243                let leak_wire = value_to_wire(graph, *leak);
244                let gain_wire = value_to_wire(graph, *gain);
245                emit_dense_fold_plan_comment(&mut sv, &target, params);
246                emit_target_dsp_attribute(&mut sv, &target);
247                sv.push_str(&format!(
248                    "    sc_dense_layer_core #(\n\
249                     \x20       .N_INPUTS({}),\n\
250                     \x20       .N_NEURONS({}),\n\
251                     \x20       .DATA_WIDTH({})\n\
252                     \x20   ) u_dense_{} (\n\
253                     \x20       .clk(clk),\n\
254                     \x20       .rst_n(rst_n),\n\
255                     \x20       .start_pulse(1'b1),\n\
256                     \x20       .stream_len(32'd{}),\n\
257                     \x20       .x_input_fp({}),\n\
258                     \x20       .weight_fp({}),\n\
259                     \x20       .y_min_fp(16'd0),\n\
260                     \x20       .y_max_fp(16'd256),\n\
261                     \x20       .cfg_leak({}),\n\
262                     \x20       .cfg_gain({}),\n\
263                     \x20       .I_t(),\n\
264                     \x20       .spikes(v{}_spikes),\n\
265                     \x20       .step_valid(),\n\
266                     \x20       .run_done(v{}_done),\n\
267                     \x20       .running(v{}_running)\n\
268                     \x20   );\n\n",
269                    params.n_inputs,
270                    params.n_neurons,
271                    params.data_width,
272                    inst_idx,
273                    params.stream_length,
274                    inputs_wire,
275                    weights_wire,
276                    leak_wire,
277                    gain_wire,
278                    id.0,
279                    id.0,
280                    id.0
281                ));
282                inst_idx += 1;
283            }
284            ScOp::DclsLayer {
285                id,
286                spike,
287                weights,
288                centre,
289                sigma,
290                params,
291            } => {
292                if params.tap_offsets.len() != params.n_taps {
293                    return Err(format!(
294                        "DclsLayer (v{}) expected {} tap offsets, got {}",
295                        id.0,
296                        params.n_taps,
297                        params.tap_offsets.len()
298                    ));
299                }
300                let spike_wire = value_to_wire(graph, *spike);
301                let weights_wire = value_to_wire(graph, *weights);
302                let centre_wire = value_to_wire(graph, *centre);
303                let sigma_wire = value_to_wire(graph, *sigma);
304                let tap_offsets = emit_concat_u32(&params.tap_offsets, params.ptr_width)?;
305                emit_target_dsp_attribute(&mut sv, &target);
306                sv.push_str(&format!(
307                    "    sc_dcls_layer_core #(\n\
308                     \x20       .N_TAPS({}),\n\
309                     \x20       .DATA_WIDTH({}),\n\
310                     \x20       .FRACTION({}),\n\
311                     \x20       .DELAY_DEPTH({}),\n\
312                     \x20       .PTR_WIDTH({})\n\
313                     \x20   ) u_dcls_{} (\n\
314                     \x20       .clk(clk),\n\
315                     \x20       .rst_n(rst_n),\n\
316                     \x20       .in_valid(1'b1),\n\
317                     \x20       .spike_in({}),\n\
318                     \x20       .tap_offsets({}),\n\
319                     \x20       .tap_weights_q88({}),\n\
320                     \x20       .centre_q88({}),\n\
321                     \x20       .sigma_q88({}),\n\
322                     \x20       .out_valid(v{}_valid),\n\
323                     \x20       .weighted_sum_q88(v{}),\n\
324                     \x20       .accumulator_q16_16(v{}_accumulator_q16_16),\n\
325                     \x20       .overflow(v{}_overflow),\n\
326                     \x20       .invalid_sigma(v{}_invalid_sigma)\n\
327                     \x20   );\n\n",
328                    params.n_taps,
329                    params.data_width,
330                    params.fraction,
331                    params.delay_depth,
332                    params.ptr_width,
333                    inst_idx,
334                    spike_wire,
335                    tap_offsets,
336                    weights_wire,
337                    centre_wire,
338                    sigma_wire,
339                    id.0,
340                    id.0,
341                    id.0,
342                    id.0,
343                    id.0
344                ));
345                inst_idx += 1;
346            }
347            ScOp::BitwiseXor { id, lhs, rhs } => {
348                let lhs_wire = value_to_wire(graph, *lhs);
349                let rhs_wire = value_to_wire(graph, *rhs);
350                sv.push_str(&format!(
351                    "    assign v{} = {} ^ {};\n",
352                    id.0, lhs_wire, rhs_wire
353                ));
354            }
355            ScOp::Reduce { id, input, mode } => {
356                let in_wire = value_to_wire(graph, *input);
357                let label = match mode {
358                    ReduceMode::Sum => "reduce_sum",
359                    ReduceMode::Max => "reduce_max",
360                };
361                sv.push_str(&format!(
362                    "    // {label}: passthrough for single-element; multi-element requires adder/comparator tree\n\
363                     \x20   assign v{id} = {wire};\n",
364                    label = label,
365                    id = id.0,
366                    wire = in_wire,
367                ));
368            }
369            ScOp::GraphForward {
370                id,
371                features: _,
372                adjacency: _,
373                n_nodes,
374                n_features,
375            } => {
376                return Err(format!(
377                    "GraphForward (v{}, {} nodes × {} features) has no synthesizable RTL implementation yet",
378                    id.0, n_nodes, n_features
379                ));
380            }
381            ScOp::SoftmaxAttention { id, dim_k, .. } => {
382                return Err(format!(
383                    "SoftmaxAttention (v{}, dim_k={}) has no synthesizable RTL implementation yet",
384                    id.0, dim_k
385                ));
386            }
387            ScOp::KuramotoStep { id, .. } => {
388                return Err(format!(
389                    "KuramotoStep (v{}) has no synthesizable RTL implementation yet",
390                    id.0
391                ));
392            }
393            ScOp::Output { name, source, .. } => {
394                let src_wire = value_to_wire(graph, *source);
395                sv.push_str(&format!("    assign {} = {};\n", name, src_wire));
396            }
397            ScOp::Scale { id, input, factor } => {
398                let in_wire = value_to_wire(graph, *input);
399                let scale_int = (*factor * 256.0) as i64; // Q8.8
400                sv.push_str(&format!(
401                    "    assign v{} = ({} * {}) >>> 8;\n",
402                    id.0, in_wire, scale_int
403                ));
404            }
405            ScOp::Offset { id, input, offset } => {
406                let in_wire = value_to_wire(graph, *input);
407                let offset_int = (*offset * 256.0) as i64;
408                sv.push_str(&format!(
409                    "    assign v{} = {} + {};\n",
410                    id.0, in_wire, offset_int
411                ));
412            }
413            ScOp::DivConst { id, input, divisor } => {
414                let in_wire = value_to_wire(graph, *input);
415                sv.push_str(&format!(
416                    "    assign v{} = {} / {};\n",
417                    id.0, in_wire, divisor
418                ));
419            }
420            ScOp::Popcount { id, input } => {
421                let in_wire = value_to_wire(graph, *input);
422                sv.push_str(&format!(
423                    "    // Combinatorial popcount for v{id}\n\
424                     \x20   always_comb begin\n\
425                     \x20       v{id} = 64'd0;\n\
426                     \x20       for (integer _pc_i = 0; _pc_i < 64; _pc_i = _pc_i + 1)\n\
427                     \x20           v{id} = v{id} + {{63'd0, {wire}[_pc_i]}};\n\
428                     \x20   end\n\n",
429                    id = id.0,
430                    wire = in_wire,
431                ));
432            }
433            _ => {}
434        }
435    }
436
437    sv.push_str("\nendmodule\n");
438    let report = target.estimate_graph(graph);
439    Ok((sv, report))
440}
441
442fn type_to_width(ty: &ScType) -> usize {
443    ty.bit_width()
444}
445
446fn find_value_width(graph: &ScGraph, id: ValueId) -> usize {
447    for op in &graph.ops {
448        if op.result_id() == id {
449            return match op {
450                ScOp::Input { ty, .. } => type_to_width(ty),
451                ScOp::Constant { ty, .. } => type_to_width(ty),
452                ScOp::Encode { .. } | ScOp::BitwiseAnd { .. } | ScOp::BitwiseXor { .. } => 1,
453                ScOp::Popcount { .. } | ScOp::Reduce { .. } => 64,
454                ScOp::LifStep { params, .. } => params.data_width as usize,
455                ScOp::DenseForward { params, .. } => params.n_neurons,
456                ScOp::DclsLayer { params, .. } => params.data_width as usize,
457                ScOp::GraphForward { n_features, .. } => *n_features,
458                ScOp::SoftmaxAttention { .. }
459                | ScOp::KuramotoStep { .. }
460                | ScOp::Scale { .. }
461                | ScOp::Offset { .. }
462                | ScOp::DivConst { .. } => 64,
463                ScOp::Output { source, .. } => find_value_width(graph, *source),
464            };
465        }
466    }
467    16
468}
469
470fn value_to_wire(graph: &ScGraph, id: ValueId) -> String {
471    for op in &graph.ops {
472        if op.result_id() == id {
473            return match op {
474                ScOp::Input { name, .. } => name.clone(),
475                ScOp::Constant { id, .. } => format!("c{}", id.0),
476                ScOp::LifStep { id, .. } => format!("v{}_spike", id.0),
477                ScOp::DenseForward { id, .. } => format!("v{}_spikes", id.0),
478                _ => format!("v{}", id.0),
479            };
480        }
481    }
482    format!("v{}", id.0)
483}
484
485fn emit_concat_u32(values: &[u32], width: u32) -> Result<String, String> {
486    if width == 0 {
487        return Err("packed unsigned concatenation width must be positive".to_string());
488    }
489    let max_value = if width >= 32 {
490        u32::MAX
491    } else {
492        (1_u32 << width) - 1
493    };
494    let mut fields = Vec::with_capacity(values.len());
495    for value in values.iter().rev() {
496        if *value > max_value {
497            return Err(format!(
498                "packed unsigned value {} exceeds {}-bit field",
499                value, width
500            ));
501        }
502        fields.push(format!("{}'d{}", width, value));
503    }
504    Ok(format!("{{{}}}", fields.join(", ")))
505}
506
507fn emit_target_dsp_attribute(sv: &mut String, target: &SvTarget) {
508    if let Some(attribute) = target.dsp_attribute() {
509        sv.push_str("    ");
510        sv.push_str(attribute);
511        sv.push('\n');
512    }
513}
514
515fn emit_dense_fold_plan_comment(sv: &mut String, target: &SvTarget, params: &DenseParams) {
516    let Some(plan) = target.dense_fold_plan(params.n_inputs, params.n_neurons) else {
517        return;
518    };
519    if !plan.fold_required {
520        return;
521    }
522    sv.push_str(&format!(
523        "    // Dense fold plan: unfurled_macs={}, dsp_budget={}, dsp_per_cycle={}, output_parallelism={}, input_parallelism={}, compute_cycles={}\n",
524        plan.mac_count,
525        plan.dsp_budget,
526        plan.dsp_per_cycle,
527        plan.output_parallelism,
528        plan.input_parallelism,
529        plan.compute_cycles
530    ));
531}
532
533fn emit_ram_style_attribute(sv: &mut String, target: &SvTarget, bits: u64) {
534    if let Some(style) = target.ram_style_for_bits(bits) {
535        sv.push_str(&format!("    (* ram_style = \"{}\" *)\n", style));
536    }
537}
538
539fn emit_constant(sv: &mut String, id: ValueId, value: &ScConst, target: &SvTarget) {
540    match value {
541        ScConst::F64(v) => {
542            let fp = (*v * 256.0) as i64; // Q8.8
543            sv.push_str(&format!(
544                "    localparam signed [15:0] c{} = 16'sd{};\n",
545                id.0, fp
546            ));
547        }
548        ScConst::I64(v) => {
549            sv.push_str(&format!(
550                "    localparam signed [15:0] c{} = 16'sd{};\n",
551                id.0, v
552            ));
553        }
554        ScConst::U64(v) => {
555            sv.push_str(&format!("    localparam [31:0] c{} = 32'd{};\n", id.0, v));
556        }
557        ScConst::F64Vec(vec) => {
558            let width = vec.len().saturating_mul(16);
559            if width == 0 {
560                sv.push_str(&format!("    wire [0:0] c{};\n", id.0));
561                return;
562            }
563            emit_ram_style_attribute(sv, target, width as u64);
564            sv.push_str(&format!("    wire [{}:0] c{};\n", width - 1, id.0));
565            for (i, v) in vec.iter().enumerate() {
566                let fp = (*v * 256.0) as i64;
567                sv.push_str(&format!(
568                    "    assign c{}[{} +: 16] = 16'sd{};\n",
569                    id.0,
570                    i * 16,
571                    fp
572                ));
573            }
574        }
575        ScConst::I64Vec(vec) => {
576            let width = vec.len().saturating_mul(16);
577            if width == 0 {
578                sv.push_str(&format!("    wire [0:0] c{};\n", id.0));
579                return;
580            }
581            emit_ram_style_attribute(sv, target, width as u64);
582            sv.push_str(&format!("    wire [{}:0] c{};\n", width - 1, id.0));
583            for (i, v) in vec.iter().enumerate() {
584                sv.push_str(&format!(
585                    "    assign c{}[{} +: 16] = 16'sd{};\n",
586                    id.0,
587                    i * 16,
588                    v
589                ));
590            }
591        }
592    }
593}
594
595#[cfg(test)]
596mod tests {
597    use super::*;
598    use crate::ir::builder::ScGraphBuilder;
599    use crate::ir::sv_target::{SkuKind, SvTarget};
600
601    #[test]
602    fn dcls_layer_emits_core_with_q88_contract_ports() {
603        let mut builder = ScGraphBuilder::new("dcls_contract");
604        let spike = builder.input("spike_in", ScType::Bool);
605        let weights = builder.constant(
606            ScConst::I64Vec(vec![256, 128, -64]),
607            ScType::Vec {
608                element: Box::new(ScType::FixedPoint { width: 16, frac: 8 }),
609                count: 3,
610            },
611        );
612        let centre = builder.constant(ScConst::I64(256), ScType::FixedPoint { width: 16, frac: 8 });
613        let sigma = builder.constant(ScConst::I64(512), ScType::FixedPoint { width: 16, frac: 8 });
614        let result = builder.dcls_layer(
615            spike,
616            weights,
617            centre,
618            sigma,
619            DclsParams {
620                n_taps: 3,
621                data_width: 16,
622                fraction: 8,
623                delay_depth: 31,
624                ptr_width: 5,
625                tap_offsets: vec![0, 1, 2],
626            },
627        );
628        builder.output("weighted_sum", result);
629
630        let sv = emit(&builder.build()).expect("DCLS layer should emit synthesizable RTL");
631        assert!(sv.contains("sc_dcls_layer_core"));
632        assert!(sv.contains(".tap_offsets({5'd2, 5'd1, 5'd0})"));
633        assert!(sv.contains(".accumulator_q16_16(v4_accumulator_q16_16)"));
634        assert!(sv.contains(".overflow(v4_overflow)"));
635        assert!(sv.contains(".invalid_sigma(v4_invalid_sigma)"));
636        assert!(sv.contains("assign weighted_sum = v4;"));
637    }
638
639    #[test]
640    fn ultrascale_plus_target_emits_dsp48e2_metadata_and_resource_report() {
641        let mut builder = ScGraphBuilder::new("ultrascale_dense");
642        let inputs = builder.input(
643            "inputs",
644            ScType::Vec {
645                element: Box::new(ScType::FixedPoint { width: 16, frac: 8 }),
646                count: 4,
647            },
648        );
649        let weights = builder.constant(
650            ScConst::I64Vec(vec![128; 12]),
651            ScType::Vec {
652                element: Box::new(ScType::FixedPoint { width: 16, frac: 8 }),
653                count: 12,
654            },
655        );
656        let leak = builder.constant(ScConst::I64(16), ScType::FixedPoint { width: 16, frac: 8 });
657        let gain = builder.constant(ScConst::I64(1), ScType::FixedPoint { width: 16, frac: 8 });
658        let result = builder.dense_forward(
659            inputs,
660            weights,
661            leak,
662            gain,
663            DenseParams {
664                n_inputs: 4,
665                n_neurons: 3,
666                ..DenseParams::default()
667            },
668        );
669        builder.output("spikes", result);
670
671        let (sv, report) = emit_systemverilog_with_target(
672            &builder.build(),
673            SvTarget::zynq_ultrascale_plus(SkuKind::Zu3eg, 250),
674        )
675        .expect("UltraScale+ target emission should succeed");
676
677        assert!(sv.contains("Target: Zynq UltraScale+ MPSoC ZU3EG"));
678        assert!(sv.contains("sc_target_dsp = \"DSP48E2\""));
679        assert!(sv.contains("(* ram_style = \"distributed\" *)"));
680        assert_eq!(report.device_part, "xczu3eg-sbva484-1-e");
681        assert!(report.dsp_estimated >= 12);
682        assert!(report.fits_dsp_budget);
683    }
684
685    #[test]
686    fn ultrascale_plus_over_budget_dense_emits_fold_plan_comment() {
687        let mut builder = ScGraphBuilder::new("ultrascale_fold_dense");
688        let inputs = builder.input(
689            "inputs",
690            ScType::Vec {
691                element: Box::new(ScType::FixedPoint { width: 16, frac: 8 }),
692                count: 64,
693            },
694        );
695        let weights = builder.constant(
696            ScConst::I64Vec(vec![128; 64 * 32]),
697            ScType::Vec {
698                element: Box::new(ScType::FixedPoint { width: 16, frac: 8 }),
699                count: 64 * 32,
700            },
701        );
702        let leak = builder.constant(ScConst::I64(16), ScType::FixedPoint { width: 16, frac: 8 });
703        let gain = builder.constant(ScConst::I64(1), ScType::FixedPoint { width: 16, frac: 8 });
704        let result = builder.dense_forward(
705            inputs,
706            weights,
707            leak,
708            gain,
709            DenseParams {
710                n_inputs: 64,
711                n_neurons: 32,
712                ..DenseParams::default()
713            },
714        );
715        builder.output("spikes", result);
716
717        let (sv, report) = emit_systemverilog_with_target(
718            &builder.build(),
719            SvTarget::zynq_ultrascale_plus(SkuKind::Zu3eg, 250),
720        )
721        .expect("UltraScale+ target emission should produce fold-plan metadata");
722
723        assert!(sv.contains("Dense fold plan: unfurled_macs=2048"));
724        assert!(sv.contains("dsp_per_cycle=320"));
725        assert!(sv.contains("compute_cycles=7"));
726        assert!(report.dense_fold_plan.is_some());
727    }
728}