Deluge Firmware 1.3.0
Build date: 2025.04.16
Loading...
Searching...
No Matches
intrinsics.h
1
17
18#include <algorithm>
19#include <bit>
20#include <cstdint>
21#include <type_traits>
22
23#ifdef __GNUC__
24#ifdef __clang__
25#define COMPILER_CLANG 1
26#define COMPILER_GCC 0
27#else
28#define COMPILER_CLANG 0
29#define COMPILER_GCC 1
30#endif
31#else
32#ifdef __clang__
33#define COMPILER_CLANG 1
34#define COMPILER_GCC 0
35#else
36#define COMPILER_CLANG 0
37#define COMPILER_GCC 0
38#endif
39#endif
40
41#ifndef __ARM_ARCH
42#define __ARM_ARCH 0
43#endif
44
45#ifndef __ARM_ARCH_7A__
46#define __ARM_ARCH_7A__ 0
47#endif
48
49constexpr bool ARMv7a = (__ARM_ARCH == 7 && __ARM_ARCH_7A__);
50constexpr bool kCompilerClang = COMPILER_CLANG;
51
52// This multiplies two numbers in signed Q31 fixed point as if they were q32, so the return value is half what it should
53// be. Use this when several corrective shifts can be accumulated and then combined
54[[gnu::always_inline]] static constexpr int32_t multiply_32x32_rshift32(int32_t a, int32_t b) {
55 if !consteval {
56 if constexpr (ARMv7a && !kCompilerClang) {
57 int32_t out;
58 asm("smmul %0, %1, %2" : "=r"(out) : "r"(a), "r"(b));
59 return out;
60 }
61 }
62
63 return (int32_t)(((int64_t)a * b) >> 32);
64}
65
66// This multiplies two numbers in signed Q31 fixed point and rounds the result
67[[gnu::always_inline]] static constexpr int32_t multiply_32x32_rshift32_rounded(int32_t a, int32_t b) {
68 if !consteval {
69 if constexpr (ARMv7a && !kCompilerClang) {
70 int32_t out;
71 asm("smmulr %0, %1, %2" : "=r"(out) : "r"(a), "r"(b));
72 return out;
73 }
74 }
75
76 return (int32_t)(((int64_t)a * b + 0x80000000) >> 32);
77}
78
81[[gnu::always_inline]] static constexpr int32_t q31_mult(int32_t a, int32_t b) {
82 if !consteval {
83 if constexpr (ARMv7a) {
84 return multiply_32x32_rshift32(a, b) << 1;
85 }
86 }
87 return (int32_t)(((int64_t)a * b) >> 32) << 1;
88}
89
90// Multiplies A and B, adds to sum, and returns output
91[[gnu::always_inline]] static constexpr int32_t multiply_accumulate_32x32_rshift32_rounded(int32_t sum, int32_t a,
92 int32_t b) {
93 if !consteval {
94 if constexpr (ARMv7a && !kCompilerClang) {
95 int32_t out;
96 asm("smmlar %0, %1, %2, %3" : "=r"(out) : "r"(a), "r"(b), "r"(sum));
97 return out;
98 }
99 }
100
101 return (int32_t)(((((int64_t)sum) << 32) + ((int64_t)a * b) + 0x80000000) >> 32);
102}
103
104// Multiplies A and B, adds to sum, and returns output
105[[gnu::always_inline]] static constexpr int32_t multiply_accumulate_32x32_rshift32(int32_t sum, int32_t a, int32_t b) {
106 if !consteval {
107 if constexpr (ARMv7a && !kCompilerClang) {
108 int32_t out;
109 asm("smmla %0, %1, %2, %3" : "=r"(out) : "r"(a), "r"(b), "r"(sum));
110 return out;
111 }
112 }
113
114 return (int32_t)(((((int64_t)sum) << 32) + ((int64_t)a * b)) >> 32);
115}
116
117// Multiplies A and B, subtracts from sum, and returns output
118[[gnu::always_inline]] static constexpr int32_t multiply_subtract_32x32_rshift32_rounded(int32_t sum, int32_t a,
119 int32_t b) {
120 if !consteval {
121 if constexpr (ARMv7a && !kCompilerClang) {
122 int32_t out;
123 asm("smmlsr %0, %1, %2, %3" : "=r"(out) : "r"(a), "r"(b), "r"(sum));
124 return out;
125 }
126 }
127
128 return (int32_t)((((((int64_t)sum) << 32) - ((int64_t)a * b)) + 0x80000000) >> 32);
129}
130
131// computes limit((val >> rshift), 2**bits)
132template <size_t bits>
133requires(bits < 32)
134[[gnu::always_inline]] static constexpr int32_t signed_saturate(int32_t val) {
135 if !consteval {
136 if constexpr (ARMv7a) {
137 int32_t out;
138 asm("ssat %0, %1, %2" : "=r"(out) : "I"(bits), "r"(val));
139 return out;
140 }
141 }
143 return std::clamp<int32_t>(val, -(1LL << (bits - 1)), (1LL << (bits - 1)) - 1);
144
147}
148
149template <size_t bits>
150requires(bits <= 32)
151[[gnu::always_inline]] static constexpr uint32_t unsigned_saturate(uint32_t val) {
153 if !consteval {
154 if constexpr (ARMv7a) {
155 int32_t out;
156 asm("usat %0, %1, %2" : "=r"(out) : "I"(bits), "r"(val));
157 return out;
158 }
159 }
160
161 return std::clamp<uint32_t>(val, 0u, (1uL << bits) - 1);
162}
163
164template <size_t shift, size_t bits = 32>
165requires(shift < 32 && bits <= 32)
166[[gnu::always_inline]] static constexpr int32_t shift_left_saturate(int32_t val) {
167 if !consteval {
168 if constexpr (ARMv7a) {
169 int32_t out;
170 asm("ssat %0, %1, %2, LSL %3" : "=r"(out) : "I"(bits), "r"(val), "I"(shift));
171 return out;
172 }
173 }
174
175 return std::clamp<int64_t>((int64_t)val << shift, -(1LL << (bits - 1)), (1LL << (bits - 1)) - 1);
176}
177
178template <size_t shift, size_t bits = 32>
179requires(shift < 32 && bits <= 32)
180[[gnu::always_inline]] static constexpr uint32_t shift_left_saturate(uint32_t val) {
181 if !consteval {
182 if constexpr (ARMv7a) {
183 int32_t out;
184 asm("usat %0, %1, %2, LSL %3" : "=r"(out) : "I"(bits), "r"(val), "I"(shift));
185 return out;
186 }
187 }
188 return std::clamp<uint64_t>((uint64_t)val << shift, 0u, (1uLL << bits) - 1);
189}
190
191[[gnu::always_inline]] static constexpr int32_t add_saturate(int32_t a, int32_t b) {
192 if !consteval {
193 if constexpr (ARMv7a) {
194 int32_t out;
195 asm("qadd %0, %1, %2" : "=r"(out) : "r"(a), "r"(b));
196 return out;
197 }
198 }
199 return std::clamp<int64_t>(a + b, -(1LL << 31), (1LL << 31) - 1);
200}
201
202[[gnu::always_inline]] static constexpr int32_t subtract_saturate(int32_t a, int32_t b) {
203 if !consteval {
204 if constexpr (ARMv7a) {
205 int32_t out;
206 asm("qsub %0, %1, %2" : "=r"(out) : "r"(a), "r"(b));
207 return out;
208 }
209 }
210 return std::clamp<int64_t>(a - b, -(1LL << 31), (1LL << 31) - 1);
211}
212
215
219static inline int32_t q31_from_float(float value) {
220 asm("vcvt.s32.f32 %0, %0, #31" : "=t"(value) : "t"(value));
221 return std::bit_cast<int32_t>(value);
222}
223
227static inline float int32_to_float(int32_t value) {
228 asm("vcvt.f32.s32 %0, %0, #31" : "=t"(value) : "t"(value));
229 return std::bit_cast<float>(value);
230}