// To compile g++ -std=c++11 -mavx sin_avx.cpp #include #include #include double factorial(int num) { double fact = 1; for (int i = 1; i <= num; i++) { fact *= i; } return fact; } void sinx_avx(int N, int terms, float *x, float *result) { for (int i = 0; i < N; i += 8) { __m256 origx = _mm256_load_ps(&x[i]); __m256 value = origx; __m256 numer = _mm256_mul_ps(origx, _mm256_mul_ps(origx, origx)); // __m256 denom = _mm256_broadcast_ss(&three_fact); int sign = -1; for (int j = 1; j <= terms; j++) { float fact = factorial(2 * j + 1); __m256 denom = _mm256_broadcast_ss(&fact); // value += sign * numer / denom __m256 tmp = _mm256_div_ps(_mm256_mul_ps(_mm256_set1_ps(sign), numer), denom); value = _mm256_add_ps(value, tmp); numer = _mm256_mul_ps(numer, _mm256_mul_ps(origx, origx)); float broad = (2 * j + 2) * (2 * j + 3); denom = _mm256_mul_ps(denom, _mm256_broadcast_ss(&broad)); sign *= -1; } _mm256_store_ps(&result[i], value); } } int main(int argc, const char **argv) { alignas(32) std::array input; alignas(32) std::array result; auto counter = 0; for (auto &inp : input) { inp = counter++; } sinx_avx(8, 20, (float *)&input, (float *)&result); for (int i = 0; i < 8; i++) { std::cout << "sin(" << input[i] << ") = " << result[i] << "\n"; } return 0; }