#include #include #include using namespace std; using namespace std::chrono; #define SSE_WIDTH 4 #define ALIGNED __attribute__((aligned(16))) float SimdMulSum( float *a, float *b, int len ) { float sum[4] = { 0., 0., 0., 0. }; int limit = ( len/SSE_WIDTH ) * SSE_WIDTH; { float *atmp = a, *btmp = b; register __int128 tmp __asm ("xmm2") = 0; for (int i = 0; i < limit; i += SSE_WIDTH) { __asm ( ".att_syntax\n\t" "movups (%0), %%xmm0\n\t" // load the first sse register "movups (%1), %%xmm1\n\t" // load the second sse register "mulps %%xmm1, %%xmm0\n\t" // do the multiply "addps %%xmm0, %2\n\t" // do the add "addq $16, %0\n\t" "addq $16, %1\n\t" : /* outputs */ "+r" (atmp), "+r" (btmp), "+x" (tmp) : /* inputs */ "m" (*atmp), "m" (*btmp) : /* clobbers */ "xmm0", "xmm1" ); } __asm ("movups %1,%0" : "=m" (sum) : "x" (tmp) : /*no clobbers*/); } for( int i = limit; i < len; i++ ) { sum[0] += a[i] * b[i]; } return sum[0] + sum[1] + sum[2] + sum[3]; } int main() { ifstream in_file; // An input file stream variable int size = 0; // A number of elements in a signal series // Open the file stream in_file.open("signal.txt"); // Check that the file was opened if (!in_file) { cerr << "Unable to open file signal.txt"; exit(1); // call system to stop } in_file >> size; cout << "Number of elements: " << size << endl; float *A = new float[ 2 * size ]; float *Sums = new float[ 1 * size ]; cout << "Reading series..."; for (int i = 0; i < size; i++) { float value = 0.0; in_file >> value; A[i] = value; A[i + size] = value; } cout << "Done." << endl; // Serial implementation of the autocorrelation sum cout << "Calculating sums using SIMD..."; high_resolution_clock::time_point t1 = high_resolution_clock::now(); for( int shift = 0; shift < size; shift++ ) { Sums[shift] = SimdMulSum( &A[0], &A[0+shift], size ); } high_resolution_clock::time_point t2 = high_resolution_clock::now(); duration time_span = t2 - t1; cout << "Done in " << time_span.count() << " seconds." << endl; // Close the input stream in_file.close(); delete [] A; delete [] Sums; return 0; }