sc_neurocore_engine/simd/
rvv.rs1#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
29pub unsafe fn pack_rvv(bits: &[u8]) -> Vec<u64> {
30 crate::bitstream::pack_fast(bits).data
33}
34
35#[cfg(not(all(target_arch = "riscv64", target_feature = "v")))]
40pub unsafe fn pack_rvv(bits: &[u8]) -> Vec<u64> {
41 crate::bitstream::pack_fast(bits).data
42}
43
44#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
49pub unsafe fn popcount_rvv(data: &[u64]) -> u64 {
50 crate::bitstream::popcount_words_portable(data)
53}
54
55#[cfg(not(all(target_arch = "riscv64", target_feature = "v")))]
60pub unsafe fn popcount_rvv(data: &[u64]) -> u64 {
61 crate::bitstream::popcount_words_portable(data)
62}
63
64#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
69pub unsafe fn fused_and_popcount_rvv(a: &[u64], b: &[u64]) -> u64 {
70 let len = a.len().min(b.len());
73 a[..len]
74 .iter()
75 .zip(&b[..len])
76 .map(|(&wa, &wb)| (wa & wb).count_ones() as u64)
77 .sum()
78}
79
80#[cfg(not(all(target_arch = "riscv64", target_feature = "v")))]
85pub unsafe fn fused_and_popcount_rvv(a: &[u64], b: &[u64]) -> u64 {
86 let len = a.len().min(b.len());
87 a[..len]
88 .iter()
89 .zip(&b[..len])
90 .map(|(&wa, &wb)| (wa & wb).count_ones() as u64)
91 .sum()
92}
93
94pub unsafe fn fused_xor_popcount_rvv(a: &[u64], b: &[u64]) -> u64 {
99 let len = a.len().min(b.len());
100 a[..len]
101 .iter()
102 .zip(&b[..len])
103 .map(|(&wa, &wb)| (wa ^ wb).count_ones() as u64)
104 .sum()
105}
106
107pub unsafe fn dot_f64_rvv(a: &[f64], b: &[f64]) -> f64 {
112 let len = a.len().min(b.len());
113 a[..len].iter().zip(&b[..len]).map(|(&x, &y)| x * y).sum()
114}
115
116pub unsafe fn max_f64_rvv(a: &[f64]) -> f64 {
119 a.iter().copied().fold(f64::NEG_INFINITY, f64::max)
120}
121
122pub unsafe fn sum_f64_rvv(a: &[f64]) -> f64 {
125 a.iter().sum()
126}
127
128pub unsafe fn scale_f64_rvv(alpha: f64, y: &mut [f64]) {
131 for v in y.iter_mut() {
132 *v *= alpha;
133 }
134}
135
136pub unsafe fn hamming_distance_rvv(a: &[u64], b: &[u64]) -> u64 {
141 fused_xor_popcount_rvv(a, b)
142}
143
144pub unsafe fn softmax_inplace_f64_rvv(scores: &mut [f64]) {
149 if scores.is_empty() {
150 return;
151 }
152 let max_val = max_f64_rvv(scores);
153 for s in scores.iter_mut() {
154 *s = (*s - max_val).exp();
155 }
156 let exp_sum = sum_f64_rvv(scores);
157 if exp_sum > 0.0 {
158 scale_f64_rvv(1.0 / exp_sum, scores);
159 }
160}
161
162#[cfg(test)]
163mod tests {
164 use super::*;
165
166 #[test]
167 fn rvv_popcount_matches_portable() {
168 let data: Vec<u64> = vec![0xFFFF_FFFF_FFFF_FFFF, 0x0, 0xAAAA_AAAA_AAAA_AAAA];
169 let expected = 64 + 32;
170 let got = unsafe { popcount_rvv(&data) };
171 assert_eq!(got, expected);
172 }
173
174 #[test]
175 fn rvv_softmax_sums_to_one() {
176 let mut scores: Vec<f64> = (0..20).map(|i| (i as f64 * 0.5) - 5.0).collect();
177 unsafe { super::softmax_inplace_f64_rvv(&mut scores) };
178 let sum: f64 = scores.iter().sum();
179 assert!((sum - 1.0).abs() < 1e-10);
180 assert!(scores.iter().all(|&s| s >= 0.0));
181 }
182
183 #[test]
184 fn rvv_hamming_distance() {
185 let a = vec![0xFFu64, 0x00];
186 let b = vec![0x0Fu64, 0x00];
187 let expected = (0xFFu64 ^ 0x0F).count_ones() as u64;
188 let got = unsafe { super::hamming_distance_rvv(&a, &b) };
189 assert_eq!(got, expected);
190 }
191
192 #[test]
193 fn rvv_fused_and_popcount() {
194 let a = vec![0xFFu64, 0xF0];
195 let b = vec![0x0Fu64, 0xFF];
196 let expected = (0xFFu64 & 0x0F).count_ones() as u64 + (0xF0u64 & 0xFF).count_ones() as u64;
197 let got = unsafe { fused_and_popcount_rvv(&a, &b) };
198 assert_eq!(got, expected);
199 }
200}