sc_neurocore_engine/simd/
rvv.rs1#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
28pub unsafe fn pack_rvv(bits: &[u8]) -> Vec<u64> {
29 crate::bitstream::pack_fast(bits).data
32}
33
34#[cfg(not(all(target_arch = "riscv64", target_feature = "v")))]
39pub unsafe fn pack_rvv(bits: &[u8]) -> Vec<u64> {
40 crate::bitstream::pack_fast(bits).data
41}
42
43#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
48pub unsafe fn popcount_rvv(data: &[u64]) -> u64 {
49 crate::bitstream::popcount_words_portable(data)
52}
53
54#[cfg(not(all(target_arch = "riscv64", target_feature = "v")))]
59pub unsafe fn popcount_rvv(data: &[u64]) -> u64 {
60 crate::bitstream::popcount_words_portable(data)
61}
62
63#[cfg(all(target_arch = "riscv64", target_feature = "v"))]
68pub unsafe fn fused_and_popcount_rvv(a: &[u64], b: &[u64]) -> u64 {
69 let len = a.len().min(b.len());
72 a[..len]
73 .iter()
74 .zip(&b[..len])
75 .map(|(&wa, &wb)| (wa & wb).count_ones() as u64)
76 .sum()
77}
78
79#[cfg(not(all(target_arch = "riscv64", target_feature = "v")))]
84pub unsafe fn fused_and_popcount_rvv(a: &[u64], b: &[u64]) -> u64 {
85 let len = a.len().min(b.len());
86 a[..len]
87 .iter()
88 .zip(&b[..len])
89 .map(|(&wa, &wb)| (wa & wb).count_ones() as u64)
90 .sum()
91}
92
93pub unsafe fn fused_xor_popcount_rvv(a: &[u64], b: &[u64]) -> u64 {
98 let len = a.len().min(b.len());
99 a[..len]
100 .iter()
101 .zip(&b[..len])
102 .map(|(&wa, &wb)| (wa ^ wb).count_ones() as u64)
103 .sum()
104}
105
106pub unsafe fn dot_f64_rvv(a: &[f64], b: &[f64]) -> f64 {
111 let len = a.len().min(b.len());
112 a[..len].iter().zip(&b[..len]).map(|(&x, &y)| x * y).sum()
113}
114
115pub unsafe fn max_f64_rvv(a: &[f64]) -> f64 {
118 a.iter().copied().fold(f64::NEG_INFINITY, f64::max)
119}
120
121pub unsafe fn sum_f64_rvv(a: &[f64]) -> f64 {
124 a.iter().sum()
125}
126
127pub unsafe fn scale_f64_rvv(alpha: f64, y: &mut [f64]) {
130 for v in y.iter_mut() {
131 *v *= alpha;
132 }
133}
134
135pub unsafe fn hamming_distance_rvv(a: &[u64], b: &[u64]) -> u64 {
140 fused_xor_popcount_rvv(a, b)
141}
142
143pub unsafe fn softmax_inplace_f64_rvv(scores: &mut [f64]) {
148 if scores.is_empty() {
149 return;
150 }
151 let max_val = max_f64_rvv(scores);
152 for s in scores.iter_mut() {
153 *s = (*s - max_val).exp();
154 }
155 let exp_sum = sum_f64_rvv(scores);
156 if exp_sum > 0.0 {
157 scale_f64_rvv(1.0 / exp_sum, scores);
158 }
159}
160
161#[cfg(test)]
162mod tests {
163 use super::*;
164
165 #[test]
166 fn rvv_popcount_matches_portable() {
167 let data: Vec<u64> = vec![0xFFFF_FFFF_FFFF_FFFF, 0x0, 0xAAAA_AAAA_AAAA_AAAA];
168 let expected = 64 + 32;
169 let got = unsafe { popcount_rvv(&data) };
170 assert_eq!(got, expected);
171 }
172
173 #[test]
174 fn rvv_softmax_sums_to_one() {
175 let mut scores: Vec<f64> = (0..20).map(|i| (i as f64 * 0.5) - 5.0).collect();
176 unsafe { super::softmax_inplace_f64_rvv(&mut scores) };
177 let sum: f64 = scores.iter().sum();
178 assert!((sum - 1.0).abs() < 1e-10);
179 assert!(scores.iter().all(|&s| s >= 0.0));
180 }
181
182 #[test]
183 fn rvv_hamming_distance() {
184 let a = vec![0xFFu64, 0x00];
185 let b = vec![0x0Fu64, 0x00];
186 let expected = (0xFFu64 ^ 0x0F).count_ones() as u64;
187 let got = unsafe { super::hamming_distance_rvv(&a, &b) };
188 assert_eq!(got, expected);
189 }
190
191 #[test]
192 fn rvv_fused_and_popcount() {
193 let a = vec![0xFFu64, 0xF0];
194 let b = vec![0x0Fu64, 0xFF];
195 let expected = (0xFFu64 & 0x0F).count_ones() as u64 + (0xF0u64 & 0xFF).count_ones() as u64;
196 let got = unsafe { fused_and_popcount_rvv(&a, &b) };
197 assert_eq!(got, expected);
198 }
199}