fft.cpp 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694
  1. /*
  2. ESP32 FFT
  3. =========
  4. This provides a vanilla radix-2 FFT implementation and a test example.
  5. Author
  6. ------
  7. This code was written by [Robin Scheibler](http://www.robinscheibler.org) during rainy days in October 2017.
  8. License
  9. -------
  10. Copyright (c) 2017 Robin Scheibler
  11. Permission is hereby granted, free of charge, to any person obtaining a copy
  12. of this software and associated documentation files (the "Software"), to deal
  13. in the Software without restriction, including without limitation the rights
  14. to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  15. copies of the Software, and to permit persons to whom the Software is
  16. furnished to do so, subject to the following conditions:
  17. The above copyright notice and this permission notice shall be included in all
  18. copies or substantial portions of the Software.
  19. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  20. IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  21. FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
  22. AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  23. LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  24. OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  25. SOFTWARE.
  26. */
  27. #include <stdlib.h>
  28. #include <stdio.h>
  29. #include <math.h>
  30. #include <complex.h>
  31. #include "fft.h"
  32. #define TWO_PI 6.28318530
  33. #define USE_SPLIT_RADIX 1
  34. #define LARGE_BASE_CASE 1
  35. fft_config_t *fft_init(int size, fft_type_t type, fft_direction_t direction, float *input, float *output)
  36. {
  37. /*
  38. * Prepare an FFT of correct size and types.
  39. *
  40. * If no input or output buffers are provided, they will be allocated.
  41. */
  42. int k,m;
  43. fft_config_t *config = (fft_config_t *)malloc(sizeof(fft_config_t));
  44. // Check if the size is a power of two
  45. if ((size & (size-1)) != 0) // tests if size is a power of two
  46. return NULL;
  47. // start configuration
  48. config->flags = 0;
  49. config->type = type;
  50. config->direction = direction;
  51. config->size = size;
  52. // Allocate and precompute twiddle factors
  53. config->twiddle_factors = (float *)malloc(2 * config->size * sizeof(float));
  54. float two_pi_by_n = TWO_PI / config->size;
  55. for (k = 0, m = 0 ; k < config->size ; k++, m+=2)
  56. {
  57. config->twiddle_factors[m] = cosf(two_pi_by_n * k); // real
  58. config->twiddle_factors[m+1] = sinf(two_pi_by_n * k); // imag
  59. }
  60. // Allocate input buffer
  61. if (input != NULL)
  62. config->input = input;
  63. else
  64. {
  65. if (config->type == FFT_REAL)
  66. config->input = (float *)malloc(config->size * sizeof(float));
  67. else if (config->type == FFT_COMPLEX)
  68. config->input = (float *)malloc(2 * config->size * sizeof(float));
  69. config->flags |= FFT_OWN_INPUT_MEM;
  70. }
  71. if (config->input == NULL)
  72. return NULL;
  73. // Allocate output buffer
  74. if (output != NULL)
  75. config->output = output;
  76. else
  77. {
  78. if (config->type == FFT_REAL)
  79. config->output = (float *)malloc(config->size * sizeof(float));
  80. else if (config->type == FFT_COMPLEX)
  81. config->output = (float *)malloc(2 * config->size * sizeof(float));
  82. config->flags |= FFT_OWN_OUTPUT_MEM;
  83. }
  84. if (config->output == NULL)
  85. return NULL;
  86. return config;
  87. }
  88. void fft_destroy(fft_config_t *config)
  89. {
  90. if (config->flags & FFT_OWN_INPUT_MEM)
  91. free(config->input);
  92. if (config->flags & FFT_OWN_OUTPUT_MEM)
  93. free(config->output);
  94. free(config->twiddle_factors);
  95. free(config);
  96. }
  97. void fft_execute(fft_config_t *config)
  98. {
  99. if (config->type == FFT_REAL && config->direction == FFT_FORWARD)
  100. rfft(config->input, config->output, config->twiddle_factors, config->size);
  101. else if (config->type == FFT_REAL && config->direction == FFT_BACKWARD)
  102. irfft(config->input, config->output, config->twiddle_factors, config->size);
  103. else if (config->type == FFT_COMPLEX && config->direction == FFT_FORWARD)
  104. fft(config->input, config->output, config->twiddle_factors, config->size);
  105. else if (config->type == FFT_COMPLEX && config->direction == FFT_BACKWARD)
  106. ifft(config->input, config->output, config->twiddle_factors, config->size);
  107. }
  108. void fft(float *input, float *output, float *twiddle_factors, int n)
  109. {
  110. /*
  111. * Forward fast Fourier transform
  112. * DIT, radix-2, out-of-place implementation
  113. *
  114. * Parameters
  115. * ----------
  116. * input (float *)
  117. * The input array containing the complex samples with
  118. * real/imaginary parts interleaved [Re(x0), Im(x0), ..., Re(x_n-1), Im(x_n-1)]
  119. * output (float *)
  120. * The output array containing the complex samples with
  121. * real/imaginary parts interleaved [Re(x0), Im(x0), ..., Re(x_n-1), Im(x_n-1)]
  122. * n (int)
  123. * The FFT size, should be a power of 2
  124. */
  125. #if USE_SPLIT_RADIX
  126. split_radix_fft(input, output, n, 2, twiddle_factors, 2);
  127. #else
  128. fft_primitive(input, output, n, 2, twiddle_factors, 2);
  129. #endif
  130. }
  131. void ifft(float *input, float *output, float *twiddle_factors, int n)
  132. {
  133. /*
  134. * Inverse fast Fourier transform
  135. * DIT, radix-2, out-of-place implementation
  136. *
  137. * Parameters
  138. * ----------
  139. * input (float *)
  140. * The input array containing the complex samples with
  141. * real/imaginary parts interleaved [Re(x0), Im(x0), ..., Re(x_n-1), Im(x_n-1)]
  142. * output (float *)
  143. * The output array containing the complex samples with
  144. * real/imaginary parts interleaved [Re(x0), Im(x0), ..., Re(x_n-1), Im(x_n-1)]
  145. * n (int)
  146. * The FFT size, should be a power of 2
  147. */
  148. ifft_primitive(input, output, n, 2, twiddle_factors, 2);
  149. }
  150. void rfft(float *x, float *y, float *twiddle_factors, int n)
  151. {
  152. // This code uses the two-for-the-price-of-one strategy
  153. #if USE_SPLIT_RADIX
  154. split_radix_fft(x, y, n / 2, 2, twiddle_factors, 4);
  155. #else
  156. fft_primitive(x, y, n / 2, 2, twiddle_factors, 4);
  157. #endif
  158. // Now apply post processing to recover positive
  159. // frequencies of the real FFT
  160. float t = y[0];
  161. y[0] = t + y[1]; // DC coefficient
  162. y[1] = t - y[1]; // Center coefficient
  163. // Apply post processing to quarter element
  164. // this boils down to taking complex conjugate
  165. y[n/2+1] = -y[n/2+1];
  166. // Now process all the other frequencies
  167. int k;
  168. for (k = 2 ; k < n / 2 ; k += 2)
  169. {
  170. float xer, xei, xor_t, xoi, c, s, tr, ti;
  171. c = twiddle_factors[k];
  172. s = twiddle_factors[k+1];
  173. // even half coefficient
  174. xer = 0.5 * (y[k] + y[n-k]);
  175. xei = 0.5 * (y[k+1] - y[n-k+1]);
  176. // odd half coefficient
  177. xor_t = 0.5 * (y[k+1] + y[n-k+1]);
  178. xoi = - 0.5 * (y[k] - y[n-k]);
  179. tr = c * xor_t + s * xoi;
  180. ti = -s * xor_t + c * xoi;
  181. y[k] = xer + tr;
  182. y[k+1] = xei + ti;
  183. y[n-k] = xer - tr;
  184. y[n-k+1] = -(xei - ti);
  185. }
  186. }
  187. void irfft(float *x, float *y, float *twiddle_factors, int n)
  188. {
  189. /*
  190. * Destroys content of input vector
  191. */
  192. int k;
  193. // Here we need to apply a pre-processing first
  194. float t = x[0];
  195. x[0] = 0.5 * (t + x[1]);
  196. x[1] = 0.5 * (t - x[1]);
  197. x[n/2+1] = -x[n/2+1];
  198. for (k = 2 ; k < n / 2 ; k += 2)
  199. {
  200. float xer, xei, xor_t, xoi, c, s, tr, ti;
  201. c = twiddle_factors[k];
  202. s = twiddle_factors[k+1];
  203. xer = 0.5 * (x[k] + x[n-k]);
  204. tr = 0.5 * (x[k] - x[n-k]);
  205. xei = 0.5 * (x[k+1] - x[n-k+1]);
  206. ti = 0.5 * (x[k+1] + x[n-k+1]);
  207. xor_t = c * tr - s * ti;
  208. xoi = s * tr + c * ti;
  209. x[k] = xer - xoi;
  210. x[k+1] = xor_t + xei;
  211. x[n-k] = xer + xoi;
  212. x[n-k+1] = xor_t - xei;
  213. }
  214. ifft_primitive(x, y, n / 2, 2, twiddle_factors, 4);
  215. }
  216. void fft_primitive(float *x, float *y, int n, int stride, float *twiddle_factors, int tw_stride)
  217. {
  218. /*
  219. * This code will compute the FFT of the input vector x
  220. *
  221. * The input data is assumed to be real/imag interleaved
  222. *
  223. * The size n should be a power of two
  224. *
  225. * y is an output buffer of size 2n to accomodate for complex numbers
  226. *
  227. * Forward fast Fourier transform
  228. * DIT, radix-2, out-of-place implementation
  229. *
  230. * For a complex FFT, call first stage as:
  231. * fft(x, y, n, 2, 2);
  232. *
  233. * Parameters
  234. * ----------
  235. * x (float *)
  236. * The input array containing the complex samples with
  237. * real/imaginary parts interleaved [Re(x0), Im(x0), ..., Re(x_n-1), Im(x_n-1)]
  238. * y (float *)
  239. * The output array containing the complex samples with
  240. * real/imaginary parts interleaved [Re(x0), Im(x0), ..., Re(x_n-1), Im(x_n-1)]
  241. * n (int)
  242. * The FFT size, should be a power of 2
  243. * stride (int)
  244. * The number of elements to skip between two successive samples
  245. * tw_stride (int)
  246. * The number of elements to skip between two successive twiddle factors
  247. */
  248. int k;
  249. float t;
  250. #if LARGE_BASE_CASE
  251. // End condition, stop at n=8 to avoid one trivial recursion
  252. if (n == 8)
  253. {
  254. fft8(x, stride, y, 2);
  255. return;
  256. }
  257. #else
  258. // End condition, stop at n=2 to avoid one trivial recursion
  259. if (n == 2)
  260. {
  261. y[0] = x[0] + x[stride];
  262. y[1] = x[1] + x[stride + 1];
  263. y[2] = x[0] - x[stride];
  264. y[3] = x[1] - x[stride + 1];
  265. return;
  266. }
  267. #endif
  268. // Recursion -- Decimation In Time algorithm
  269. fft_primitive(x, y, n / 2, 2 * stride, twiddle_factors, 2 * tw_stride); // even half
  270. fft_primitive(x + stride, y+n, n / 2, 2 * stride, twiddle_factors, 2 * tw_stride); // odd half
  271. // Stitch back together
  272. // We can a few multiplications in the first step
  273. t = y[0];
  274. y[0] = t + y[n];
  275. y[n] = t - y[n];
  276. t = y[1];
  277. y[1] = t + y[n+1];
  278. y[n+1] = t - y[n+1];
  279. for (k = 1 ; k < n / 2 ; k++)
  280. {
  281. float x1r, x1i, x2r, x2i, c, s;
  282. c = twiddle_factors[k * tw_stride];
  283. s = twiddle_factors[k * tw_stride + 1];
  284. x1r = y[2 * k];
  285. x1i = y[2 * k + 1];
  286. x2r = c * y[n + 2 * k] + s * y[n + 2 * k + 1];
  287. x2i = -s * y[n + 2 * k] + c * y[n + 2 * k + 1];
  288. y[2 * k] = x1r + x2r;
  289. y[2 * k + 1] = x1i + x2i;
  290. y[n + 2 * k] = x1r - x2r;
  291. y[n + 2 * k + 1] = x1i - x2i;
  292. }
  293. }
  294. void split_radix_fft(float *x, float *y, int n, int stride, float *twiddle_factors, int tw_stride)
  295. {
  296. /*
  297. * This code will compute the FFT of the input vector x
  298. *
  299. * The input data is assumed to be real/imag interleaved
  300. *
  301. * The size n should be a power of two
  302. *
  303. * y is an output buffer of size 2n to accomodate for complex numbers
  304. *
  305. * Forward fast Fourier transform
  306. * Split-Radix
  307. * DIT, radix-2, out-of-place implementation
  308. *
  309. * For a complex FFT, call first stage as:
  310. * fft(x, y, n, 2, 2);
  311. *
  312. * Parameters
  313. * ----------
  314. * x (float *)
  315. * The input array containing the complex samples with
  316. * real/imaginary parts interleaved [Re(x0), Im(x0), ..., Re(x_n-1), Im(x_n-1)]
  317. * y (float *)
  318. * The output array containing the complex samples with
  319. * real/imaginary parts interleaved [Re(x0), Im(x0), ..., Re(x_n-1), Im(x_n-1)]
  320. * n (int)
  321. * The FFT size, should be a power of 2
  322. * stride (int)
  323. * The number of elements to skip between two successive samples
  324. * twiddle_factors (float *)
  325. * The array of twiddle factors
  326. * tw_stride (int)
  327. * The number of elements to skip between two successive twiddle factors
  328. */
  329. int k;
  330. #if LARGE_BASE_CASE
  331. // End condition, stop at n=2 to avoid one trivial recursion
  332. if (n == 8)
  333. {
  334. fft8(x, stride, y, 2);
  335. return;
  336. }
  337. else if (n == 4)
  338. {
  339. fft4(x, stride, y, 2);
  340. return;
  341. }
  342. #else
  343. // End condition, stop at n=2 to avoid one trivial recursion
  344. if (n == 2)
  345. {
  346. y[0] = x[0] + x[stride];
  347. y[1] = x[1] + x[stride + 1];
  348. y[2] = x[0] - x[stride];
  349. y[3] = x[1] - x[stride + 1];
  350. return;
  351. }
  352. else if (n == 1)
  353. {
  354. y[0] = x[0];
  355. y[1] = x[1];
  356. return;
  357. }
  358. #endif
  359. // Recursion -- Decimation In Time algorithm
  360. split_radix_fft(x, y, n / 2, 2 * stride, twiddle_factors, 2 * tw_stride);
  361. split_radix_fft(x + stride, y + n, n / 4, 4 * stride, twiddle_factors, 4 * tw_stride);
  362. split_radix_fft(x + 3 * stride, y + n + n / 2, n / 4, 4 * stride, twiddle_factors, 4 * tw_stride);
  363. // Stitch together the output
  364. float u1r, u1i, u2r, u2i, x1r, x1i, x2r, x2i;
  365. float t;
  366. // We can save a few multiplications in the first step
  367. u1r = y[0];
  368. u1i = y[1];
  369. u2r = y[n / 2];
  370. u2i = y[n / 2 + 1];
  371. x1r = y[n];
  372. x1i = y[n + 1];
  373. x2r = y[n / 2 + n];
  374. x2i = y[n / 2 + n + 1];
  375. t = x1r + x2r;
  376. y[0] = u1r + t;
  377. y[n] = u1r - t;
  378. t = x1i + x2i;
  379. y[1] = u1i + t;
  380. y[n + 1] = u1i - t;
  381. t = x2i - x1i;
  382. y[n / 2] = u2r - t;
  383. y[n + n / 2] = u2r + t;
  384. t = x1r - x2r;
  385. y[n / 2 + 1] = u2i - t;
  386. y[n + n / 2 + 1] = u2i + t;
  387. for (k = 1 ; k < n / 4 ; k++)
  388. {
  389. float u1r, u1i, u2r, u2i, x1r, x1i, x2r, x2i, c1, s1, c2, s2;
  390. c1 = twiddle_factors[k * tw_stride];
  391. s1 = twiddle_factors[k * tw_stride + 1];
  392. c2 = twiddle_factors[3 * k * tw_stride];
  393. s2 = twiddle_factors[3 * k * tw_stride + 1];
  394. u1r = y[2 * k];
  395. u1i = y[2 * k + 1];
  396. u2r = y[2 * k + n / 2];
  397. u2i = y[2 * k + n / 2 + 1];
  398. x1r = c1 * y[n + 2 * k] + s1 * y[n + 2 * k + 1];
  399. x1i = -s1 * y[n + 2 * k] + c1 * y[n + 2 * k + 1];
  400. x2r = c2 * y[n / 2 + n + 2 * k] + s2 * y[n / 2 + n + 2 * k + 1];
  401. x2i = -s2 * y[n / 2 + n + 2 * k] + c2 * y[n / 2 + n + 2 * k + 1];
  402. t = x1r + x2r;
  403. y[2 * k] = u1r + t;
  404. y[2 * k + n] = u1r - t;
  405. t = x1i + x2i;
  406. y[2 * k + 1] = u1i + t;
  407. y[2 * k + n + 1] = u1i - t;
  408. t = x2i - x1i;
  409. y[2 * k + n / 2] = u2r - t;
  410. y[2 * k + n + n / 2] = u2r + t;
  411. t = x1r - x2r;
  412. y[2 * k + n / 2 + 1] = u2i - t;
  413. y[2 * k + n + n / 2 + 1] = u2i + t;
  414. }
  415. }
  416. void ifft_primitive(float *input, float *output, int n, int stride, float *twiddle_factors, int tw_stride)
  417. {
  418. #if USE_SPLIT_RADIX
  419. split_radix_fft(input, output, n, stride, twiddle_factors, tw_stride);
  420. #else
  421. fft_primitive(input, output, n, stride, twiddle_factors, tw_stride);
  422. #endif
  423. int ks;
  424. int ns = n * stride;
  425. // reverse all coefficients from 1 to n / 2 - 1
  426. for (ks = stride ; ks < ns / 2 ; ks += stride)
  427. {
  428. float t;
  429. t = output[ks];
  430. output[ks] = output[ns-ks];
  431. output[ns-ks] = t;
  432. t = output[ks+1];
  433. output[ks+1] = output[ns-ks+1];
  434. output[ns-ks+1] = t;
  435. }
  436. // Apply normalization
  437. float norm = 1. / n;
  438. for (ks = 0 ; ks < ns ; ks += stride)
  439. {
  440. output[ks] *= norm;
  441. output[ks+1] *= norm;
  442. }
  443. }
  444. inline void fft8(float *input, int stride_in, float *output, int stride_out)
  445. {
  446. /*
  447. * Unrolled implementation of FFT8 for a little more performance
  448. */
  449. float a0r, a1r, a2r, a3r, a4r, a5r, a6r, a7r;
  450. float a0i, a1i, a2i, a3i, a4i, a5i, a6i, a7i;
  451. float b0r, b1r, b2r, b3r, b4r, b5r, b6r, b7r;
  452. float b0i, b1i, b2i, b3i, b4i, b5i, b6i, b7i;
  453. float t;
  454. float sin_pi_4 = 0.7071067812;
  455. a0r = input[0];
  456. a0i = input[1];
  457. a1r = input[stride_in];
  458. a1i = input[stride_in+1];
  459. a2r = input[2*stride_in];
  460. a2i = input[2*stride_in+1];
  461. a3r = input[3*stride_in];
  462. a3i = input[3*stride_in+1];
  463. a4r = input[4*stride_in];
  464. a4i = input[4*stride_in+1];
  465. a5r = input[5*stride_in];
  466. a5i = input[5*stride_in+1];
  467. a6r = input[6*stride_in];
  468. a6i = input[6*stride_in+1];
  469. a7r = input[7*stride_in];
  470. a7i = input[7*stride_in+1];
  471. // Stage 1
  472. b0r = a0r + a4r;
  473. b0i = a0i + a4i;
  474. b1r = a1r + a5r;
  475. b1i = a1i + a5i;
  476. b2r = a2r + a6r;
  477. b2i = a2i + a6i;
  478. b3r = a3r + a7r;
  479. b3i = a3i + a7i;
  480. b4r = a0r - a4r;
  481. b4i = a0i - a4i;
  482. b5r = a1r - a5r;
  483. b5i = a1i - a5i;
  484. // W_8^1 = 1/sqrt(2) - j / sqrt(2)
  485. t = b5r + b5i;
  486. b5i = (b5i - b5r) * sin_pi_4;
  487. b5r = t * sin_pi_4;
  488. // W_8^2 = -j
  489. b6r = a2i - a6i;
  490. b6i = a6r - a2r;
  491. b7r = a3r - a7r;
  492. b7i = a3i - a7i;
  493. // W_8^3 = -1 / sqrt(2) + j / sqrt(2)
  494. t = sin_pi_4 * (b7i - b7r);
  495. b7i = - (b7r + b7i) * sin_pi_4;
  496. b7r = t;
  497. // Stage 2
  498. a0r = b0r + b2r;
  499. a0i = b0i + b2i;
  500. a1r = b1r + b3r;
  501. a1i = b1i + b3i;
  502. a2r = b0r - b2r;
  503. a2i = b0i - b2i;
  504. // * j
  505. a3r = b1i - b3i;
  506. a3i = b3r - b1r;
  507. a4r = b4r + b6r;
  508. a4i = b4i + b6i;
  509. a5r = b5r + b7r;
  510. a5i = b5i + b7i;
  511. a6r = b4r - b6r;
  512. a6i = b4i - b6i;
  513. // * j
  514. a7r = b5i - b7i;
  515. a7i = b7r - b5r;
  516. // Stage 3
  517. // X[0]
  518. output[0] = a0r + a1r;
  519. output[1] = a0i + a1i;
  520. // X[4]
  521. output[4*stride_out] = a0r - a1r;
  522. output[4*stride_out+1] = a0i - a1i;
  523. // X[2]
  524. output[2*stride_out] = a2r + a3r;
  525. output[2*stride_out+1] = a2i + a3i;
  526. // X[6]
  527. output[6*stride_out] = a2r - a3r;
  528. output[6*stride_out+1] = a2i - a3i;
  529. // X[1]
  530. output[stride_out] = a4r + a5r;
  531. output[stride_out+1] = a4i + a5i;
  532. // X[5]
  533. output[5*stride_out] = a4r - a5r;
  534. output[5*stride_out+1] = a4i - a5i;
  535. // X[3]
  536. output[3*stride_out] = a6r + a7r;
  537. output[3*stride_out+1] = a6i + a7i;
  538. // X[7]
  539. output[7*stride_out] = a6r - a7r;
  540. output[7*stride_out+1] = a6i - a7i;
  541. }
  542. inline void fft4(float *input, int stride_in, float *output, int stride_out)
  543. {
  544. /*
  545. * Unrolled implementation of FFT4 for a little more performance
  546. */
  547. float t1, t2;
  548. t1 = input[0] + input[2*stride_in];
  549. t2 = input[stride_in] + input[3*stride_in];
  550. output[0] = t1 + t2;
  551. output[2*stride_out] = t1 - t2;
  552. t1 = input[1] + input[2*stride_in+1];
  553. t2 = input[stride_in+1] + input[3*stride_in+1];
  554. output[1] = t1 + t2;
  555. output[2*stride_out+1] = t1 - t2;
  556. t1 = input[0] - input[2*stride_in];
  557. t2 = input[stride_in+1] - input[3*stride_in+1];
  558. output[stride_out] = t1 + t2;
  559. output[3*stride_out] = t1 - t2;
  560. t1 = input[1] - input[2*stride_in+1];
  561. t2 = input[3*stride_in] - input[stride_in];
  562. output[stride_out+1] = t1 + t2;
  563. output[3*stride_out+1] = t1 - t2;
  564. }