Deluge Firmware 1.3.0
Build date: 2025.06.05
Loading...
Searching...
No Matches
intrinsics.h
1
17
18#include <algorithm>
19#include <bit>
20#include <cstdint>
21#include <type_traits>
22
23#ifdef __GNUC__
24#ifdef __clang__
25#define COMPILER_CLANG 1
26#define COMPILER_GCC 0
27#else
28#define COMPILER_CLANG 0
29#define COMPILER_GCC 1
30#endif
31#else
32#ifdef __clang__
33#define COMPILER_CLANG 1
34#define COMPILER_GCC 0
35#else
36#define COMPILER_CLANG 0
37#define COMPILER_GCC 0
38#endif
39#endif
40
41#ifndef __ARM_ARCH
42#define __ARM_ARCH 0
43#endif
44
45#ifndef __ARM_ARCH_7A__
46#define __ARM_ARCH_7A__ 0
47#endif
48
49constexpr bool ARMv7a = (__ARM_ARCH == 7 && __ARM_ARCH_7A__);
50constexpr bool kCompilerClang = COMPILER_CLANG;
51
52// This multiplies two numbers in signed Q31 fixed point as if they were q32, so the return value is half what it should
53// be. Use this when several corrective shifts can be accumulated and then combined
54[[gnu::always_inline]] static constexpr int32_t multiply_32x32_rshift32(int32_t a, int32_t b) {
55 if !consteval {
56 if constexpr (ARMv7a && !kCompilerClang) {
57 int32_t out;
58 asm("smmul %0, %1, %2" : "=r"(out) : "r"(a), "r"(b));
59 return out;
60 }
61 }
62
63 return (int32_t)(((int64_t)a * b) >> 32);
64}
65
66// This multiplies two numbers in signed Q31 fixed point and rounds the result
67[[gnu::always_inline]] static constexpr int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b) {
68 if !consteval {
69 if constexpr (ARMv7a && !kCompilerClang) {
70 int32_t out;
71 asm("smmulr %0, %1, %2" : "=r"(out) : "r"(a), "r"(b));
72 return out;
73 }
74 }
75
76 return (int32_t)(((int64_t)a * b + 0x80000000) >> 32);
77}
78
81[[gnu::always_inline]] static constexpr int32_t q31_mult(int32_t a, int32_t b) {
82 return multiply_32x32_rshift32(a, b) << 1;
83}
84
87[[gnu::always_inline]] static constexpr int32_t q31_mult_rounded(int32_t a, int32_t b) {
88 return multiply_32x32_rshift32_rounded(a, b) << 1;
89}
90
91// Multiplies A and B, adds to sum, and returns output
92[[gnu::always_inline]] static constexpr int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a,
93 int32_t b) {
94 if !consteval {
95 if constexpr (ARMv7a && !kCompilerClang) {
96 int32_t out;
97 asm("smmlar %0, %1, %2, %3" : "=r"(out) : "r"(a), "r"(b), "r"(sum));
98 return out;
99 }
100 }
101
102 return (int32_t)(((((int64_t)sum) << 32) + ((int64_t)a * b) + 0x80000000) >> 32);
103}
104
105// Multiplies A and B, adds to sum, and returns output
106[[gnu::always_inline]] static constexpr int32_t multiply_accumulate_32x32_rshift32(int32_t sum, int32_t a, int32_t b) {
107 if !consteval {
108 if constexpr (ARMv7a && !kCompilerClang) {
109 int32_t out;
110 asm("smmla %0, %1, %2, %3" : "=r"(out) : "r"(a), "r"(b), "r"(sum));
111 return out;
112 }
113 }
114
115 return (int32_t)(((((int64_t)sum) << 32) + ((int64_t)a * b)) >> 32);
116}
117
118// Multiplies A and B, subtracts from sum, and returns output
119[[gnu::always_inline]] static constexpr int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a,
120 int32_t b) {
121 if !consteval {
122 if constexpr (ARMv7a && !kCompilerClang) {
123 int32_t out;
124 asm("smmlsr %0, %1, %2, %3" : "=r"(out) : "r"(a), "r"(b), "r"(sum));
125 return out;
126 }
127 }
128
129 return (int32_t)((((((int64_t)sum) << 32) - ((int64_t)a * b)) + 0x80000000) >> 32);
130}
131
132// computes limit((val >> rshift), 2**bits)
133template <size_t bits>
134requires(bits < 32)
135[[gnu::always_inline]] static constexpr int32_t signed_saturate(int32_t val) {
136 if !consteval {
137 if constexpr (ARMv7a) {
138 int32_t out;
139 asm("ssat %0, %1, %2" : "=r"(out) : "I"(bits), "r"(val));
140 return out;
141 }
142 }
144 return std::clamp<int32_t>(val, -(1LL << (bits - 1)), (1LL << (bits - 1)) - 1);
145
148}
149
150template <size_t bits>
151requires(bits <= 32)
152[[gnu::always_inline]] static constexpr uint32_t unsigned_saturate(uint32_t val) {
154 if !consteval {
155 if constexpr (ARMv7a) {
156 int32_t out;
157 asm("usat %0, %1, %2" : "=r"(out) : "I"(bits), "r"(val));
158 return out;
159 }
160 }
161
162 return std::clamp<uint32_t>(val, 0u, (1uL << bits) - 1);
163}
164
165template <size_t shift, size_t bits = 32>
166requires(shift < 32 && bits <= 32)
167[[gnu::always_inline]] static constexpr int32_t shift_left_saturate(int32_t val) {
168 if !consteval {
169 if constexpr (ARMv7a) {
170 int32_t out;
171 asm("ssat %0, %1, %2, LSL %3" : "=r"(out) : "I"(bits), "r"(val), "I"(shift));
172 return out;
173 }
174 }
175
176 return std::clamp<int64_t>((int64_t)val << shift, -(1LL << (bits - 1)), (1LL << (bits - 1)) - 1);
177}
178
179template <size_t shift, size_t bits = 32>
180requires(shift < 32 && bits <= 32)
181[[gnu::always_inline]] static constexpr uint32_t shift_left_saturate(uint32_t val) {
182 if !consteval {
183 if constexpr (ARMv7a) {
184 int32_t out;
185 asm("usat %0, %1, %2, LSL %3" : "=r"(out) : "I"(bits), "r"(val), "I"(shift));
186 return out;
187 }
188 }
189 return std::clamp<uint64_t>((uint64_t)val << shift, 0u, (1uLL << bits) - 1);
190}
191
192[[gnu::always_inline]] static constexpr int32_t add_saturate(int32_t a, int32_t b) {
193 if !consteval {
194 if constexpr (ARMv7a) {
195 int32_t out;
196 asm("qadd %0, %1, %2" : "=r"(out) : "r"(a), "r"(b));
197 return out;
198 }
199 }
200 return std::clamp<int64_t>(a + b, -(1LL << 31), (1LL << 31) - 1);
201}
202
203[[gnu::always_inline]] static constexpr int32_t subtract_saturate(int32_t a, int32_t b) {
204 if !consteval {
205 if constexpr (ARMv7a) {
206 int32_t out;
207 asm("qsub %0, %1, %2" : "=r"(out) : "r"(a), "r"(b));
208 return out;
209 }
210 }
211 return std::clamp<int64_t>(a - b, -(1LL << 31), (1LL << 31) - 1);
212}
213
216
220static inline int32_t q31_from_float(float value) {
221 asm("vcvt.s32.f32 %0, %0, #31" : "=t"(value) : "t"(value));
222 return std::bit_cast<int32_t>(value);
223}
224
228static inline float int32_to_float(int32_t value) {
229 asm("vcvt.f32.s32 %0, %0, #31" : "=t"(value) : "t"(value));
230 return std::bit_cast<float>(value);
231}