Skip to main content

sc_neurocore_engine/simd/
rvv.rs

1// SPDX-License-Identifier: AGPL-3.0-or-later
2// Commercial license available
3// © Concepts 1996–2026 Miroslav Šotek. All rights reserved.
4// © Code 2020–2026 Miroslav Šotek. All rights reserved.
5// ORCID: 0009-0009-3560-0851
6// Contact: www.anulum.li | protoscience@anulum.li
7// SC-NeuroCore — Scalar fallback for RISC-V Vector (RVV) targets
8
9//! Scalar fallback for RISC-V Vector (RVV) targets.
10//! Hardware RVV intrinsics are not yet implemented; all operations
11//! use the portable scalar path from `super::mod.rs`.
12//!
13//! RVV 1.0 provides variable-length SIMD (VLEN = 128–16384 bits).
14//! When Rust stabilises `core::arch::riscv64` vector intrinsics,
15//! replace the bodies below with vl-strided vector loops.
16//!
17//! Build with:
18//!   RUSTFLAGS="-C target-feature=+v" cargo build --target riscv64gc-unknown-linux-gnu
19//!
20//! Cross-compile without hardware:
21//!   cargo install cross
22//!   cross build --target riscv64gc-unknown-linux-gnu --release
23
24/// Pack u8 bit array into u64 words using RVV vector loads.
25///
26/// # Safety
27/// Caller must ensure the target CPU supports RVV 1.0.
28#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
29pub unsafe fn pack_rvv(bits: &[u8]) -> Vec<u64> {
30    // RVV: vle8_v_u8m1 + bit gathering via vslide/vmask operations.
31    // Pending stabilisation of core::arch::riscv64 vector intrinsics.
32    crate::bitstream::pack_fast(bits).data
33}
34
35/// Pack u8 bit array into u64 words (portable fallback).
36///
37/// # Safety
38/// No hardware requirements in fallback mode.
39#[cfg(not(all(target_arch = "riscv64", target_feature = "v")))]
40pub unsafe fn pack_rvv(bits: &[u8]) -> Vec<u64> {
41    crate::bitstream::pack_fast(bits).data
42}
43
44/// Count set bits using RVV VCPOP instruction.
45///
46/// # Safety
47/// Caller must ensure the target CPU supports RVV 1.0.
48#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
49pub unsafe fn popcount_rvv(data: &[u64]) -> u64 {
50    // RVV: vle64_v_u64m1 + vcpop.m for per-element popcount.
51    // Pending stabilisation of core::arch::riscv64 vector intrinsics.
52    crate::bitstream::popcount_words_portable(data)
53}
54
55/// Count set bits (portable fallback).
56///
57/// # Safety
58/// No hardware requirements in fallback mode.
59#[cfg(not(all(target_arch = "riscv64", target_feature = "v")))]
60pub unsafe fn popcount_rvv(data: &[u64]) -> u64 {
61    crate::bitstream::popcount_words_portable(data)
62}
63
64/// Fused AND + popcount using RVV.
65///
66/// # Safety
67/// Caller must ensure the target CPU supports RVV 1.0.
68#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
69pub unsafe fn fused_and_popcount_rvv(a: &[u64], b: &[u64]) -> u64 {
70    // RVV: vand.vv + vcpop.m in a single vl-strided loop.
71    // Pending intrinsic stabilisation.
72    let len = a.len().min(b.len());
73    a[..len]
74        .iter()
75        .zip(&b[..len])
76        .map(|(&wa, &wb)| (wa & wb).count_ones() as u64)
77        .sum()
78}
79
80/// Fused AND + popcount (portable fallback).
81///
82/// # Safety
83/// No hardware requirements in fallback mode.
84#[cfg(not(all(target_arch = "riscv64", target_feature = "v")))]
85pub unsafe fn fused_and_popcount_rvv(a: &[u64], b: &[u64]) -> u64 {
86    let len = a.len().min(b.len());
87    a[..len]
88        .iter()
89        .zip(&b[..len])
90        .map(|(&wa, &wb)| (wa & wb).count_ones() as u64)
91        .sum()
92}
93
94/// Fused XOR + popcount using RVV.
95///
96/// # Safety
97/// No hardware requirements (portable implementation).
98pub unsafe fn fused_xor_popcount_rvv(a: &[u64], b: &[u64]) -> u64 {
99    let len = a.len().min(b.len());
100    a[..len]
101        .iter()
102        .zip(&b[..len])
103        .map(|(&wa, &wb)| (wa ^ wb).count_ones() as u64)
104        .sum()
105}
106
107// --- f64 operations (portable fallback, RVV f64 intrinsics pending stabilisation) ---
108
109/// # Safety
110/// No hardware requirements (portable implementation).
111pub unsafe fn dot_f64_rvv(a: &[f64], b: &[f64]) -> f64 {
112    let len = a.len().min(b.len());
113    a[..len].iter().zip(&b[..len]).map(|(&x, &y)| x * y).sum()
114}
115
116/// # Safety
117/// No hardware requirements (portable implementation).
118pub unsafe fn max_f64_rvv(a: &[f64]) -> f64 {
119    a.iter().copied().fold(f64::NEG_INFINITY, f64::max)
120}
121
122/// # Safety
123/// No hardware requirements (portable implementation).
124pub unsafe fn sum_f64_rvv(a: &[f64]) -> f64 {
125    a.iter().sum()
126}
127
128/// # Safety
129/// No hardware requirements (portable implementation).
130pub unsafe fn scale_f64_rvv(alpha: f64, y: &mut [f64]) {
131    for v in y.iter_mut() {
132        *v *= alpha;
133    }
134}
135
136/// Hamming distance between two packed bitstream slices.
137///
138/// # Safety
139/// No hardware requirements (portable implementation).
140pub unsafe fn hamming_distance_rvv(a: &[u64], b: &[u64]) -> u64 {
141    fused_xor_popcount_rvv(a, b)
142}
143
144/// In-place softmax (portable fallback for RVV).
145///
146/// # Safety
147/// No hardware requirements (portable implementation).
148pub unsafe fn softmax_inplace_f64_rvv(scores: &mut [f64]) {
149    if scores.is_empty() {
150        return;
151    }
152    let max_val = max_f64_rvv(scores);
153    for s in scores.iter_mut() {
154        *s = (*s - max_val).exp();
155    }
156    let exp_sum = sum_f64_rvv(scores);
157    if exp_sum > 0.0 {
158        scale_f64_rvv(1.0 / exp_sum, scores);
159    }
160}
161
162#[cfg(test)]
163mod tests {
164    use super::*;
165
166    #[test]
167    fn rvv_popcount_matches_portable() {
168        let data: Vec<u64> = vec![0xFFFF_FFFF_FFFF_FFFF, 0x0, 0xAAAA_AAAA_AAAA_AAAA];
169        let expected = 64 + 32;
170        let got = unsafe { popcount_rvv(&data) };
171        assert_eq!(got, expected);
172    }
173
174    #[test]
175    fn rvv_softmax_sums_to_one() {
176        let mut scores: Vec<f64> = (0..20).map(|i| (i as f64 * 0.5) - 5.0).collect();
177        unsafe { super::softmax_inplace_f64_rvv(&mut scores) };
178        let sum: f64 = scores.iter().sum();
179        assert!((sum - 1.0).abs() < 1e-10);
180        assert!(scores.iter().all(|&s| s >= 0.0));
181    }
182
183    #[test]
184    fn rvv_hamming_distance() {
185        let a = vec![0xFFu64, 0x00];
186        let b = vec![0x0Fu64, 0x00];
187        let expected = (0xFFu64 ^ 0x0F).count_ones() as u64;
188        let got = unsafe { super::hamming_distance_rvv(&a, &b) };
189        assert_eq!(got, expected);
190    }
191
192    #[test]
193    fn rvv_fused_and_popcount() {
194        let a = vec![0xFFu64, 0xF0];
195        let b = vec![0x0Fu64, 0xFF];
196        let expected = (0xFFu64 & 0x0F).count_ones() as u64 + (0xF0u64 & 0xFF).count_ones() as u64;
197        let got = unsafe { fused_and_popcount_rvv(&a, &b) };
198        assert_eq!(got, expected);
199    }
200}