Skip to main content

sc_neurocore_engine/simd/
rvv.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later | Commercial license available
2// © Concepts 1996–2026 Miroslav Šotek. All rights reserved.
3// © Code 2020–2026 Miroslav Šotek. All rights reserved.
4// ORCID: 0009-0009-3560-0851
5// Contact: www.anulum.li | protoscience@anulum.li
6// SC-NeuroCore — Scalar fallback for RISC-V Vector (RVV) targets
7
8//! Scalar fallback for RISC-V Vector (RVV) targets.
9//! Hardware RVV intrinsics are not yet implemented; all operations
10//! use the portable scalar path from `super::mod.rs`.
11//!
12//! RVV 1.0 provides variable-length SIMD (VLEN = 128–16384 bits).
13//! When Rust stabilises `core::arch::riscv64` vector intrinsics,
14//! replace the bodies below with vl-strided vector loops.
15//!
16//! Build with:
17//!   RUSTFLAGS="-C target-feature=+v" cargo build --target riscv64gc-unknown-linux-gnu
18//!
19//! Cross-compile without hardware:
20//!   cargo install cross
21//!   cross build --target riscv64gc-unknown-linux-gnu --release
22
23/// Pack u8 bit array into u64 words using RVV vector loads.
24///
25/// # Safety
26/// Caller must ensure the target CPU supports RVV 1.0.
27#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
28pub unsafe fn pack_rvv(bits: &[u8]) -> Vec<u64> {
29    // RVV: vle8_v_u8m1 + bit gathering via vslide/vmask operations.
30    // Pending stabilisation of core::arch::riscv64 vector intrinsics.
31    crate::bitstream::pack_fast(bits).data
32}
33
34/// Pack u8 bit array into u64 words (portable fallback).
35///
36/// # Safety
37/// No hardware requirements in fallback mode.
38#[cfg(not(all(target_arch = "riscv64", target_feature = "v")))]
39pub unsafe fn pack_rvv(bits: &[u8]) -> Vec<u64> {
40    crate::bitstream::pack_fast(bits).data
41}
42
43/// Count set bits using RVV VCPOP instruction.
44///
45/// # Safety
46/// Caller must ensure the target CPU supports RVV 1.0.
47#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
48pub unsafe fn popcount_rvv(data: &[u64]) -> u64 {
49    // RVV: vle64_v_u64m1 + vcpop.m for per-element popcount.
50    // Pending stabilisation of core::arch::riscv64 vector intrinsics.
51    crate::bitstream::popcount_words_portable(data)
52}
53
54/// Count set bits (portable fallback).
55///
56/// # Safety
57/// No hardware requirements in fallback mode.
58#[cfg(not(all(target_arch = "riscv64", target_feature = "v")))]
59pub unsafe fn popcount_rvv(data: &[u64]) -> u64 {
60    crate::bitstream::popcount_words_portable(data)
61}
62
63/// Fused AND + popcount using RVV.
64///
65/// # Safety
66/// Caller must ensure the target CPU supports RVV 1.0.
67#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
68pub unsafe fn fused_and_popcount_rvv(a: &[u64], b: &[u64]) -> u64 {
69    // RVV: vand.vv + vcpop.m in a single vl-strided loop.
70    // Pending intrinsic stabilisation.
71    let len = a.len().min(b.len());
72    a[..len]
73        .iter()
74        .zip(&b[..len])
75        .map(|(&wa, &wb)| (wa & wb).count_ones() as u64)
76        .sum()
77}
78
79/// Fused AND + popcount (portable fallback).
80///
81/// # Safety
82/// No hardware requirements in fallback mode.
83#[cfg(not(all(target_arch = "riscv64", target_feature = "v")))]
84pub unsafe fn fused_and_popcount_rvv(a: &[u64], b: &[u64]) -> u64 {
85    let len = a.len().min(b.len());
86    a[..len]
87        .iter()
88        .zip(&b[..len])
89        .map(|(&wa, &wb)| (wa & wb).count_ones() as u64)
90        .sum()
91}
92
93/// Fused XOR + popcount using RVV.
94///
95/// # Safety
96/// No hardware requirements (portable implementation).
97pub unsafe fn fused_xor_popcount_rvv(a: &[u64], b: &[u64]) -> u64 {
98    let len = a.len().min(b.len());
99    a[..len]
100        .iter()
101        .zip(&b[..len])
102        .map(|(&wa, &wb)| (wa ^ wb).count_ones() as u64)
103        .sum()
104}
105
106// --- f64 operations (portable fallback, RVV f64 intrinsics pending stabilisation) ---
107
108/// # Safety
109/// No hardware requirements (portable implementation).
110pub unsafe fn dot_f64_rvv(a: &[f64], b: &[f64]) -> f64 {
111    let len = a.len().min(b.len());
112    a[..len].iter().zip(&b[..len]).map(|(&x, &y)| x * y).sum()
113}
114
115/// # Safety
116/// No hardware requirements (portable implementation).
117pub unsafe fn max_f64_rvv(a: &[f64]) -> f64 {
118    a.iter().copied().fold(f64::NEG_INFINITY, f64::max)
119}
120
121/// # Safety
122/// No hardware requirements (portable implementation).
123pub unsafe fn sum_f64_rvv(a: &[f64]) -> f64 {
124    a.iter().sum()
125}
126
127/// # Safety
128/// No hardware requirements (portable implementation).
129pub unsafe fn scale_f64_rvv(alpha: f64, y: &mut [f64]) {
130    for v in y.iter_mut() {
131        *v *= alpha;
132    }
133}
134
135/// Hamming distance between two packed bitstream slices.
136///
137/// # Safety
138/// No hardware requirements (portable implementation).
139pub unsafe fn hamming_distance_rvv(a: &[u64], b: &[u64]) -> u64 {
140    fused_xor_popcount_rvv(a, b)
141}
142
143/// In-place softmax (portable fallback for RVV).
144///
145/// # Safety
146/// No hardware requirements (portable implementation).
147pub unsafe fn softmax_inplace_f64_rvv(scores: &mut [f64]) {
148    if scores.is_empty() {
149        return;
150    }
151    let max_val = max_f64_rvv(scores);
152    for s in scores.iter_mut() {
153        *s = (*s - max_val).exp();
154    }
155    let exp_sum = sum_f64_rvv(scores);
156    if exp_sum > 0.0 {
157        scale_f64_rvv(1.0 / exp_sum, scores);
158    }
159}
160
161#[cfg(test)]
162mod tests {
163    use super::*;
164
165    #[test]
166    fn rvv_popcount_matches_portable() {
167        let data: Vec<u64> = vec![0xFFFF_FFFF_FFFF_FFFF, 0x0, 0xAAAA_AAAA_AAAA_AAAA];
168        let expected = 64 + 32;
169        let got = unsafe { popcount_rvv(&data) };
170        assert_eq!(got, expected);
171    }
172
173    #[test]
174    fn rvv_softmax_sums_to_one() {
175        let mut scores: Vec<f64> = (0..20).map(|i| (i as f64 * 0.5) - 5.0).collect();
176        unsafe { super::softmax_inplace_f64_rvv(&mut scores) };
177        let sum: f64 = scores.iter().sum();
178        assert!((sum - 1.0).abs() < 1e-10);
179        assert!(scores.iter().all(|&s| s >= 0.0));
180    }
181
182    #[test]
183    fn rvv_hamming_distance() {
184        let a = vec![0xFFu64, 0x00];
185        let b = vec![0x0Fu64, 0x00];
186        let expected = (0xFFu64 ^ 0x0F).count_ones() as u64;
187        let got = unsafe { super::hamming_distance_rvv(&a, &b) };
188        assert_eq!(got, expected);
189    }
190
191    #[test]
192    fn rvv_fused_and_popcount() {
193        let a = vec![0xFFu64, 0xF0];
194        let b = vec![0x0Fu64, 0xFF];
195        let expected = (0xFFu64 & 0x0F).count_ones() as u64 + (0xF0u64 & 0xFF).count_ones() as u64;
196        let got = unsafe { fused_and_popcount_rvv(&a, &b) };
197        assert_eq!(got, expected);
198    }
199}