Bela
Real-time, ultra-low-latency audio and sensor processing system for BeagleBone Black
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Macros Groups
QuadBiquad.h
1 #pragma once
2 #include <array>
3 #include <arm_neon.h>
4 #include <new>
5 #include <stdlib.h>
6 #include "Biquad.h"
7 
15 {
16 public:
24  std::array<BiquadCoeffT<float>, 4> filters;
25 
35  {
36  if(size_t(this) & size_t(alignof(QuadBiquad) - 1))
37  {
38  fprintf(stderr, "QuadBiquad object is improperly aligned. Avoid heap allocation, use operator new or use -std=c++17");
39  std::bad_alloc e;
40  throw(e);
41  }
42  }
43 
47  void* operator new(size_t sz) {
48  auto ptr = aligned_alloc(alignof(QuadBiquad), sz);
49  if(!ptr)
50  {
51  std::bad_alloc e;
52  throw(e);
53  }
54  return ptr;
55  }
56 
63  int setup(const BiquadCoeff::Settings& settings);
64 
69  void update();
70 
77  void process(float data[4])
78  {
79  // See here https://developer.arm.com/architectures/instruction-sets/simd-isas/neon/intrinsics
80  // for more on ARM intrinsics
81  float32x4_t in = vld1q_f32(data);
82  // We need to do the following: (see Biquad::process)
83  // out = in * a0 + z1; (A)
84  // z1 = in * a1 + z2 - b1 * out; split into two(B, C):
85  // z2 = in * a2 - b2 * out; split into two(D, E):
86  // store the output (F)
87  // These are interleaved below to avoid NEON to stall while
88  // waiting for results.
89 
90  // A: out = in * a0 + z1;
91  float32x4_t out = vmlaq_f32(z1, in, a0);
92  // B: z1 = in * a1 + z2;
93  z1 = vmlaq_f32(z2, in, a1);
94  // D: z2 = in * a2;
95  z2 = vmulq_f32(in, a2);
96  // F: store the output
97  vst1q_f32(data, out);
98  // C: z1 = z1 - b1 * out; ***
99  z1 = vmlaq_f32(z1, b1, out);
100  // E: z2 = z2 - b2 * out; ***
101  z2 = vmlaq_f32(z2, b2, out);
102  // ***: note that we inverted the sign of the b1 and b2 coefficients
103  // in update() and we use vmlaq instead of vmlsq here. This is
104  // because vmlaq seems to be slightly faster than vmlsq.
105  }
106 private:
107  float32x4_t z1;
108  float32x4_t z2;
109  float32x4_t a0;
110  float32x4_t a1;
111  float32x4_t a2;
112  float32x4_t b1;
113  float32x4_t b2;
114 };
115 extern template class BiquadCoeffT<float>;
int setup(const BiquadCoeff::Settings &settings)
Definition: Biquad.cpp:121
Definition: Biquad.h:34
Definition: Biquad.h:47
void process(float data[4])
Definition: QuadBiquad.h:77
void update()
Definition: Biquad.cpp:129
QuadBiquad()
Definition: QuadBiquad.h:34
Definition: QuadBiquad.h:14
std::array< BiquadCoeffT< float >, 4 > filters
Definition: QuadBiquad.h:24