(a)
double a[100], b[100], c[100];
...
for (i = 0; i < 100; i++) {
S6: a[i] = b[i] * c[i];
}
(b)
xor ecx, ecx ; reset loop counter
L: movapd xmm0, _b[ecx] ; load 2 DP elements from b into xmm0 register
mulpd xmm0, _c[ecx] ; multiply 2 DP elements from c into xmm0 register
movapd _a[ecx], xmm0 ; store 2 DP elements from xmm0 register into a
add ecx, 16 ;
cmp ecx, 800 ;
jl L ; looping logic (iterates 50 times)
(c)
for (i = 0; i < 8; i++) {
S7: a[i+2] = a[i] * b[i];
}
(d)
float x[85], y[85];
...
for (i = 1; i < 85; i++) {
S8: x[i] = x[i-1] + y[i];
}
(e)
for (i = 1; i < 85; i++) {
S9: x[i-1] = x[i] + y[i];
}
Example 2: Auto-vectorization.
Back to Article