(a)
double a[100], b[100], c[100];
 ...                             
  for (i = 0; i < 100; i++) {   
S6:   a[i] = b[i] * c[i];        
  }                             

(b)
    xor    ecx, ecx      ; reset loop counter
L:  movapd xmm0, _b[ecx]  ; load     2 DP elements from b into xmm0 register
    mulpd  xmm0, _c[ecx]  ; multiply 2 DP elements from c into xmm0 register
    movapd _a[ecx], xmm0  ; store    2 DP elements from xmm0 register into a 
    add    ecx, 16       ;
    cmp    ecx, 800      ;
    jl     L             ; looping logic (iterates 50 times)

(c)
      for (i = 0; i < 8; i++) {   
S7:      a[i+2] = a[i] * b[i];        
      }               

(d)
      float x[85], y[85];
      ...
      for (i = 1; i < 85; i++) {   
S8:      x[i] = x[i-1] + y[i];        
      }                           

(e)
      for (i = 1; i < 85; i++) {   
S9:      x[i-1] = x[i] + y[i];        
      }                             

Example 2: Auto-vectorization.

Back to Article