1use rayon::prelude::*;
15
16#[derive(Clone, Debug)]
20pub struct Candidate {
21 pub bitstream_length: u32,
22 pub decorrelator: u8, pub mode: u8, pub luts: i64,
25 pub power: f64,
26 pub accuracy: f64,
27 pub latency: i64,
28}
29
30pub fn estimate_resources(mac_count: i64, length: u32, decorr: u8, mode: u8) -> Candidate {
32 let len = length as f64;
33 let log2_len = (len.log2()).floor() as i64;
34
35 if mode == 1 {
36 return Candidate {
38 bitstream_length: length,
39 decorrelator: decorr,
40 mode,
41 luts: mac_count * 120,
42 power: mac_count as f64 * 0.5,
43 accuracy: 1.0,
44 latency: 1,
45 };
46 }
47
48 if mode == 2 {
49 let sc_frac = 0.7_f64;
51 let det_frac = 0.3_f64;
52 let sc_macs = (mac_count as f64 * sc_frac) as i64;
53 let det_macs = (mac_count as f64 * det_frac) as i64;
54 let mut luts = sc_macs * 2 + log2_len * 5 + det_macs * 120;
55 let power = sc_macs as f64 * 0.01 * (len / 256.0) + det_macs as f64 * 0.5;
56 let mut accuracy = 0.95_f64;
57
58 match decorr {
59 2 => {
60 luts += (sc_macs as f64 * 15.0) as i64;
61 accuracy = 0.97;
62 }
63 1 => {
64 luts += 16;
65 accuracy = 0.96;
66 }
67 _ => {}
68 }
69
70 return Candidate {
71 bitstream_length: length,
72 decorrelator: decorr,
73 mode,
74 luts,
75 power,
76 accuracy: accuracy.min(1.0),
77 latency: length as i64,
78 };
79 }
80
81 let mut luts = mac_count * 2 + log2_len * 5;
83 let power = mac_count as f64 * 0.01 * (len / 256.0);
84
85 let accuracy = match decorr {
86 2 => {
87 luts += mac_count * 15;
88 1.0 - 1.0 / len
89 } 3 => {
91 luts += mac_count * 12;
92 1.0 - 1.2 / len
93 } 4 => {
95 luts += mac_count * 8;
96 1.0 - 1.5 / len
97 } 1 => {
99 luts += 16;
100 1.0 - 1.0 / len.sqrt()
101 } _ => 1.0 - 2.0 / len.sqrt(), };
104
105 Candidate {
106 bitstream_length: length,
107 decorrelator: decorr,
108 mode,
109 luts,
110 power,
111 accuracy: accuracy.clamp(0.1, 1.0),
112 latency: length as i64,
113 }
114}
115
116pub fn generate_candidates(mac_count: i64) -> Vec<Candidate> {
118 let lengths: &[u32] = &[64, 128, 256, 512, 1024, 2048];
119 let decorrs: &[u8] = &[0, 1, 2, 3, 4];
120 let mut result = Vec::with_capacity(1 + lengths.len() * decorrs.len() * 2);
121
122 result.push(estimate_resources(mac_count, 1, 0, 1));
124
125 for &mode in &[0u8, 2] {
127 for &length in lengths {
128 for &decorr in decorrs {
129 result.push(estimate_resources(mac_count, length, decorr, mode));
130 }
131 }
132 }
133 result
134}
135
136struct Xoshiro256pp([u64; 4]);
140
141impl Xoshiro256pp {
142 fn new(seed: u64) -> Self {
143 let mut s = [0u64; 4];
144 let mut z = seed.wrapping_add(0x9E3779B97F4A7C15);
145 for slot in s.iter_mut() {
146 z = (z ^ (z >> 30)).wrapping_mul(0xBF58476D1CE4E5B9);
147 z = (z ^ (z >> 27)).wrapping_mul(0x94D049BB133111EB);
148 *slot = z ^ (z >> 31);
149 }
150 Self(s)
151 }
152
153 fn next_u64(&mut self) -> u64 {
154 let result = (self.0[0].wrapping_add(self.0[3]))
155 .rotate_left(23)
156 .wrapping_add(self.0[0]);
157 let t = self.0[1] << 17;
158 self.0[2] ^= self.0[0];
159 self.0[3] ^= self.0[1];
160 self.0[1] ^= self.0[2];
161 self.0[0] ^= self.0[3];
162 self.0[2] ^= t;
163 self.0[3] = self.0[3].rotate_left(45);
164 result
165 }
166
167 fn next_usize(&mut self, bound: usize) -> usize {
168 (self.next_u64() % bound as u64) as usize
169 }
170
171 fn next_f64(&mut self) -> f64 {
172 (self.next_u64() >> 11) as f64 * (1.0 / (1u64 << 53) as f64)
173 }
174}
175
176#[derive(Clone, Debug)]
178pub struct SAResult {
179 pub best_config: Vec<usize>, pub best_score: f64,
181 pub pareto_luts: Vec<i64>,
182 pub pareto_power: Vec<f64>,
183 pub pareto_score: Vec<f64>,
184}
185
186pub fn simulated_annealing(
192 candidates: &[Vec<Candidate>],
193 weights: &[f64],
194 max_luts: i64,
195 max_power: f64,
196 max_latency: i64,
197 t_init: f64,
198 t_min: f64,
199 alpha: f64,
200 max_iter: usize,
201 seed: u64,
202) -> Option<SAResult> {
203 let n = candidates.len();
204 let mut rng = Xoshiro256pp::new(seed);
205
206 let mut current: Vec<usize> = (0..n)
208 .map(|i| {
209 candidates[i]
210 .iter()
211 .enumerate()
212 .min_by_key(|(_, c)| c.luts)
213 .map(|(idx, _)| idx)
214 .unwrap_or(0)
215 })
216 .collect();
217
218 if !is_feasible(candidates, ¤t, max_luts, max_power, max_latency) {
219 return None;
220 }
221
222 let weight_sum: f64 = weights.iter().sum();
223 let score_fn = |cfg: &[usize]| -> f64 {
224 let mut total = 0.0;
225 for i in 0..n {
226 total += candidates[i][cfg[i]].accuracy * weights[i];
227 }
228 total / weight_sum
229 };
230
231 let mut best = current.clone();
232 let mut best_score = score_fn(&best);
233 let mut current_score = best_score;
234 let mut t = t_init;
235
236 let mut pareto_luts = Vec::new();
237 let mut pareto_power = Vec::new();
238 let mut pareto_score = Vec::new();
239
240 for _ in 0..max_iter {
241 if t <= t_min {
242 break;
243 }
244
245 let layer = rng.next_usize(n);
246 let cand_idx = rng.next_usize(candidates[layer].len());
247
248 let old = current[layer];
249 current[layer] = cand_idx;
250
251 if !is_feasible(candidates, ¤t, max_luts, max_power, max_latency) {
252 current[layer] = old;
253 t *= alpha;
254 continue;
255 }
256
257 let trial_score = score_fn(¤t);
258 let delta = trial_score - current_score;
259
260 if delta > 0.0 || rng.next_f64() < (delta / t).exp() {
261 current_score = trial_score;
262
263 if current_score > best_score {
264 best = current.clone();
265 best_score = current_score;
266 }
267
268 let luts: i64 = (0..n).map(|i| candidates[i][current[i]].luts).sum();
269 let power: f64 = (0..n).map(|i| candidates[i][current[i]].power).sum();
270 pareto_luts.push(luts);
271 pareto_power.push(power);
272 pareto_score.push(current_score);
273 } else {
274 current[layer] = old;
275 }
276
277 t *= alpha;
278 }
279
280 Some(SAResult {
281 best_config: best,
282 best_score,
283 pareto_luts,
284 pareto_power,
285 pareto_score,
286 })
287}
288
289fn is_feasible(
290 candidates: &[Vec<Candidate>],
291 config: &[usize],
292 max_luts: i64,
293 max_power: f64,
294 max_latency: i64,
295) -> bool {
296 let n = candidates.len();
297 let mut total_luts: i64 = 0;
298 let mut total_power: f64 = 0.0;
299 let mut max_lat: i64 = 0;
300 for i in 0..n {
301 let c = &candidates[i][config[i]];
302 total_luts += c.luts;
303 total_power += c.power;
304 if c.latency > max_lat {
305 max_lat = c.latency;
306 }
307 }
308 total_luts <= max_luts
309 && total_power <= max_power
310 && (max_latency <= 0 || max_lat <= max_latency)
311}
312
313pub fn extract_pareto(luts: &[i64], power: &[f64], score: &[f64]) -> Vec<usize> {
317 let n = luts.len();
318 if n == 0 {
319 return vec![];
320 }
321
322 let indices: Vec<usize> = (0..n)
323 .into_par_iter()
324 .filter(|&i| {
325 for j in 0..n {
326 if j == i {
327 continue;
328 }
329 if luts[j] <= luts[i]
330 && power[j] <= power[i]
331 && score[j] >= score[i]
332 && (luts[j] < luts[i] || power[j] < power[i] || score[j] > score[i])
333 {
334 return false;
335 }
336 }
337 true
338 })
339 .collect();
340
341 indices
342}
343
344#[cfg(test)]
347mod tests {
348 use super::*;
349
350 #[test]
351 fn test_estimate_deterministic() {
352 let c = estimate_resources(100, 1, 0, 1);
353 assert_eq!(c.luts, 12000);
354 assert!((c.accuracy - 1.0).abs() < 1e-10);
355 assert_eq!(c.latency, 1);
356 }
357
358 #[test]
359 fn test_estimate_sc_sobol() {
360 let c = estimate_resources(10, 256, 2, 0);
361 assert!(c.luts > 0);
362 assert!(c.accuracy > 0.99);
363 }
364
365 #[test]
366 fn test_estimate_hybrid() {
367 let c = estimate_resources(10, 256, 2, 2);
368 assert!((c.accuracy - 0.97).abs() < 1e-10);
369 }
370
371 #[test]
372 fn test_generate_candidates_count() {
373 let cands = generate_candidates(10);
374 assert_eq!(cands.len(), 61);
376 }
377
378 #[test]
379 fn test_sa_finds_solution() {
380 let cands: Vec<Vec<Candidate>> = (0..3).map(|_| generate_candidates(10)).collect();
381 let weights = vec![1.0, 1.0, 2.0];
382 let result = simulated_annealing(
383 &cands, &weights, 100_000, 1000.0, 0, 1.0, 0.001, 0.95, 500, 42,
384 );
385 assert!(result.is_some());
386 let r = result.unwrap();
387 assert!(r.best_score > 0.5);
388 }
389
390 #[test]
391 fn test_sa_infeasible() {
392 let cands: Vec<Vec<Candidate>> = (0..3).map(|_| generate_candidates(10)).collect();
393 let weights = vec![1.0, 1.0, 1.0];
394 let result = simulated_annealing(&cands, &weights, 1, 0.001, 0, 1.0, 0.001, 0.95, 100, 42);
396 assert!(result.is_none());
397 }
398
399 #[test]
400 fn test_pareto_single_point() {
401 let indices = extract_pareto(&[100], &[1.0], &[0.9]);
402 assert_eq!(indices, vec![0]);
403 }
404
405 #[test]
406 fn test_pareto_dominated() {
407 let indices = extract_pareto(&[200, 100], &[2.0, 1.0], &[0.8, 0.9]);
409 assert_eq!(indices, vec![1]);
410 }
411
412 #[test]
413 fn test_pareto_both_nondominated() {
414 let indices = extract_pareto(&[100, 200], &[1.0, 2.0], &[0.8, 0.95]);
416 assert_eq!(indices.len(), 2);
417 }
418
419 #[test]
420 fn test_xoshiro_reproducible() {
421 let mut rng1 = Xoshiro256pp::new(42);
422 let mut rng2 = Xoshiro256pp::new(42);
423 for _ in 0..100 {
424 assert_eq!(rng1.next_u64(), rng2.next_u64());
425 }
426 }
427}