diff --git a/Utilities/otbsiftfast/libsiftfast.cpp b/Utilities/otbsiftfast/libsiftfast.cpp
index a61f5b31bdf47bd67a5882bcafc74cbd1e14c8f7..129492ec9bfd488c4e4366551eef63d6dcabf27d 100644
--- a/Utilities/otbsiftfast/libsiftfast.cpp
+++ b/Utilities/otbsiftfast/libsiftfast.cpp
@@ -63,10 +63,10 @@ using namespace std;
 #define SQRT2 1.4142136f
 
 // if defined, will profile the critical functions and write results to prof.txt
-//#define DVPROFILE 
+//#define DVPROFILE
 
 // if defined will align all image rows to 16 bytes
-// usually aligning is faster (can save ~100ms), however for 1024x768 
+// usually aligning is faster (can save ~100ms), however for 1024x768
 // cache misses with the enlarged rows make it ~400-500ms slower
 //#define ALIGNED_IMAGE_ROWS
 
@@ -132,7 +132,7 @@ inline u64 GetMicroTime()
 {
 //OTB - 28/11/2014
 /*For mingw LARGE_INTEGER is not defined but timeval struct exists in winsock2.h */
-#if defined(_WIN32) && !defined(_MSC_VER)
+#if defined(_WIN32) && defined(_MSC_VER)
     LARGE_INTEGER count, freq;
     QueryPerformanceCounter(&count);
     QueryPerformanceFrequency(&freq);
@@ -223,7 +223,7 @@ Image CreateImage(int rows, int cols)
     im->rows = rows;
     im->cols = cols;
 
-    // cannot make 16 byte aligned since 1024x768 images 
+    // cannot make 16 byte aligned since 1024x768 images
 #if defined(ALIGNED_IMAGE_ROWS) && defined(__SSE__)
     im->stride = (cols+3)&~3;
 #else
@@ -265,7 +265,7 @@ Image CreateImageFromMatlabData(double* pdata, int rows, int cols)
             __m128d m2 = _mm_loadu_pd(pf+2*rows);
             __m128d m3 = _mm_loadu_pd(pf+3*rows);
 #endif
-            
+
             __m128 mrows0 = _mm_shuffle_ps(_mm_cvtpd_ps(m0),_mm_cvtpd_ps(m1),0x44);
             __m128 mrows1 = _mm_shuffle_ps(_mm_cvtpd_ps(m2),_mm_cvtpd_ps(m3),0x44);
 
@@ -302,7 +302,7 @@ Keypoint GetKeypoints(Image porgimage, unsigned int nbScales)
 #ifdef DVPROFILE
     DVProfClear();
 #endif
-    
+
     Image pimage = NULL;
     float fscale = 1.0f;
     Image halfimage = NULL;
@@ -321,11 +321,11 @@ Keypoint GetKeypoints(Image porgimage, unsigned int nbScales)
         }
         else
             pimage = SiftCopyImage(porgimage);
-    
+
         float fnewscale = 1.0f;
         if( !DoubleImSize )
             fnewscale = 0.5f;
-    
+
         if( InitSigma > fnewscale ) {
             GaussianBlur(pimage, pimage, sqrtf(InitSigma*InitSigma - fnewscale*fnewscale));
 //            {
@@ -367,7 +367,7 @@ Keypoint GetKeypoints(Image porgimage, unsigned int nbScales)
 #ifdef DVPROFILE
     DVProfWrite("prof.txt");
 #endif
-    
+
     return keypts;
 }
 
@@ -386,7 +386,7 @@ Image SiftDoubleSize(Image im)
             pdst[newstride+2*j+1] = 0.25f*(psrc[j] + psrc[j+1] + psrc[stride+j] + psrc[stride+j+1]);
         }
     }
-    
+
     return newim;
 }
 
@@ -433,7 +433,7 @@ Keypoint OctaveKeypoints(Image pimage, Image* phalfimage, float fscale, Keypoint
 
         s_imgaus[i]->rows = rows; s_imgaus[i]->cols = cols; s_imgaus[i]->stride = stride;
         GaussianBlur(s_imgaus[i], s_imgaus[i-1], fincsigma * sigma);
-        
+
         s_imdiff[i-1]->rows = rows; s_imdiff[i-1]->cols = cols; s_imdiff[i-1]->stride = stride;
         SubtractImage(s_imdiff[i-1],s_imgaus[i-1],s_imgaus[i]);
 
@@ -442,7 +442,7 @@ Keypoint OctaveKeypoints(Image pimage, Image* phalfimage, float fscale, Keypoint
 
     s_imgrad->rows = rows; s_imgrad->cols = cols; s_imgrad->stride = stride;
     s_imorient->rows = rows; s_imorient->cols = cols; s_imorient->stride = stride;
-    
+
     *phalfimage = s_imgaus[nbScales];
     return FindMaxMin(s_imdiff, s_imgaus, fscale, prevkeypts,nbScales);
 }
@@ -458,7 +458,7 @@ void SubtractImage(Image imgdst, Image img0, Image img1)
         float* pixels0 = _pixels0+j*stride;
         float* pixels1 = _pixels1+j*stride;
         float* pdst = _pdst + j*stride;
-                    
+
         for(int k = 0; k < (cols&~7); k += 8) {
             _MM_STORE_ALIGNED(pdst+k,_mm_sub_ps(_MM_LOAD_ALIGNED(pixels0+k), _MM_LOAD_ALIGNED(pixels1+k)));
             _MM_STORE_ALIGNED(pdst+k+4,_mm_sub_ps(_MM_LOAD_ALIGNED(pixels0+k+4), _MM_LOAD_ALIGNED(pixels1+k+4)));
@@ -483,13 +483,13 @@ void GaussianBlur(Image imgdst, Image image, float fblur)
     DVSTARTPROFILE();
 
     const float GaussTruncate = 4.0f;
-    
+
 
     int ksize = (int)(2.0f * GaussTruncate * fblur + 1.0f);
     if( ksize < 3 )
         ksize = 3;
-    ksize += !(ksize&1); // make it odd    
-        
+    ksize += !(ksize&1); // make it odd
+
     float* kernel = NULL;
     for( map<float, float* >::iterator it = s_mapkernel.begin(); it != s_mapkernel.end(); ++it) {
         if( fabsf(fblur-it->first) < 0.001f ) {
@@ -503,14 +503,14 @@ void GaussianBlur(Image imgdst, Image image, float fblur)
 
         // +4 for alignment and padding issues with sse
         kernel = (float*)sift_aligned_malloc((ksize+9)*sizeof(float),16)+1;
-        
+
         int width = (ksize >= 0 ? ksize : ksize-1)>>1;
         for(int i = 0; i <= ksize; ++i) {
             float fweight = expf( - (float)(i-width)*(i-width) / (2.0f*fblur*fblur) );
             faccum += (double)fweight;
             kernel[i] = fweight;
         }
-        
+
         for(int i = 0; i < ksize; ++i) // shouldn't it be <=?
             kernel[i] /= (float)faccum;
         memset(kernel+ksize,0,sizeof(float)*8);
@@ -608,9 +608,9 @@ void ConvHorizontalFast(Image imgdst, Image image, float* kernel, int ksize)
 #ifdef ALIGNED_IMAGE_ROWS
     assert( !(image->stride&3) );
 #endif
-    
+
     DVSTARTPROFILE();
-    
+
     int width = (ksize >= 0 ? ksize : ksize-1)>>1;
     float* _pixels = image->pixels, *_pdst = imgdst->pixels;
 
@@ -620,7 +620,7 @@ void ConvHorizontalFast(Image imgdst, Image image, float* kernel, int ksize)
         for(LISTBUF::iterator it = s_listconvbuf.begin(); it != s_listconvbuf.end(); ++it)
             sift_aligned_free(*it);
         s_listconvbuf.clear();
-        
+
         // create at least one
         s_listconvbuf.push_back((float*)sift_aligned_malloc(convsize,16));
         s_convbufsize = convsize;
@@ -644,10 +644,10 @@ void ConvHorizontalFast(Image imgdst, Image image, float* kernel, int ksize)
 
     #pragma omp parallel for schedule(dynamic,16)
     for(int i = 0; i < rows; i++) {
-    
+
 #ifdef _OPENMP
         float* pconvbuf;
-        
+
         // need to get a free buffer
         #pragma omp critical
         {
@@ -664,10 +664,10 @@ void ConvHorizontalFast(Image imgdst, Image image, float* kernel, int ksize)
 #endif
         // get 16 byte aligned array
         myaccum ac;
-        
+
         float* pixels = _pixels+i*stride;
         float* pdst = _pdst + i*stride;
-        
+
         float* buf = pconvbuf+1;
         float f0 = pixels[0], f0e = pixels[cols-1];
         for(int j = 0; j < width; ++j)
@@ -675,15 +675,15 @@ void ConvHorizontalFast(Image imgdst, Image image, float* kernel, int ksize)
         memcpy(buf+width,pixels,cols*sizeof(float));
         for(int j = 0; j < width; ++j)
             buf[cols+width+j] = f0e;
-        
+
         __m128 mkerbase = _mm_and_ps(_mm_loadu_ps(kernel), _mm_load_ps((float*)s_convmask));
-        
+
         for(int j = 0; j < 2*(cols>>2); ++j) {
             int off = 2*j-(j&1);
             buf = pconvbuf+1+off;
             __m128 maccum0 = _mm_mul_ps(_mm_loadu_ps(buf), mkerbase);
             __m128 maccum1 = _mm_mul_ps(_mm_loadu_ps(buf+2), mkerbase);
-            
+
             __m128 mbufprev = _mm_loadu_ps(buf+3);
             for(int k = 3; k < ksize; k += 8) {
                 __m128 mbuf0 = mbufprev;
@@ -726,7 +726,7 @@ void ConvHorizontalFast(Image imgdst, Image image, float* kernel, int ksize)
         for(int j=(cols&~3); j < cols; ++j) {
             buf = pconvbuf+j+1;
             __m128 maccum0 = _mm_mul_ps(_mm_loadu_ps(buf), mkerbase);
-                
+
             for(int k = 3; k < ksize; k += 4) {
                 __m128 mbuf0 = _mm_loadu_ps(buf+k);
                 __m128 mker0 = _mm_load_ps(kernel+k);
@@ -755,7 +755,7 @@ void ConvHorizontalFast(Image imgdst, Image image, float* kernel, int ksize)
 void ConvVerticalFast(Image image, float* kernel, int ksize)
 {
     int rows = image->rows, stride = image->stride;
-    
+
     assert( ksize >= 3); // 3 is cutting it close
 #ifdef ALIGNED_IMAGE_ROWS
     assert( !(image->stride&3) );
@@ -768,9 +768,9 @@ void ConvVerticalFast(Image image, float* kernel, int ksize)
     if( s_listconvbuf.size() == 0 || s_convbufsize < convsize ) {
         for(LISTBUF::iterator it = s_listconvbuf.begin(); it != s_listconvbuf.end(); ++it)
             sift_aligned_free(*it);
-        
+
         s_listconvbuf.clear();
-        
+
         // create at least one
         s_listconvbuf.push_back((float*)sift_aligned_malloc(convsize,16));
         s_convbufsize = convsize;
@@ -789,12 +789,12 @@ void ConvVerticalFast(Image image, float* kernel, int ksize)
 
     #pragma omp parallel for
     for(int j = 0; j < stride; j += 4) {
-        
+
         float* pixels = _pixels+j;
 #ifndef ALIGNED_IMAGE_ROWS
         myaccum ac;
 #endif
-        
+
 #ifdef _OPENMP
         float* pconvbuf;
 
@@ -810,9 +810,9 @@ void ConvVerticalFast(Image image, float* kernel, int ksize)
             }
         }
 #endif
-        
+
         __m128 mpprev = _MM_LOAD_ALIGNED(pixels);
-        
+
         __m128 mprev = mpprev;
         __m128 mker0 = _mm_load1_ps(kernel);
         __m128 mker1 = _mm_load1_ps(kernel+1);
@@ -837,7 +837,7 @@ void ConvVerticalFast(Image image, float* kernel, int ksize)
             mprev = mnew;
             buf += 8;
         }
-        
+
         _mm_store_ps(buf,mpprev); buf += 8;
         for(int i = rows-width+2; i < rows; ++i) {
             __m128 mnew = _mm_loadu_ps(pixels+i*stride);
@@ -916,8 +916,8 @@ Keypoint FindMaxMin(Image* imdiff, Image* imgaus, float fscale, Keypoint keypts,
         GradOriImages(imgaus[index],s_imgrad,s_imorient);
 #endif
         assert( imdiff[index]->stride == stride );
-        float* _diffpixels = imdiff[index]->pixels;        
-        
+        float* _diffpixels = imdiff[index]->pixels;
+
 //        for(int i = 0; i < rows; ++i) {
 //            for(int j = 0; j < cols; ++j) {
 //                if( isnan(imgaus[index]->pixels[i*cols+j]) ) {
@@ -939,7 +939,7 @@ Keypoint FindMaxMin(Image* imdiff, Image* imgaus, float fscale, Keypoint keypts,
             Keypoint newkeypts = NULL;
             float* diffpixels = _diffpixels + rowstart*stride;
             for( int colstart = 5; colstart < cols-5; ++colstart ) {
-                   
+
                 float fval = diffpixels[colstart];
                 if( fabsf(fval) > PeakThresh*0.8f ) {
                     if( LocalMaxMin(fval, imdiff[index],rowstart,colstart) &&
@@ -956,7 +956,7 @@ Keypoint FindMaxMin(Image* imdiff, Image* imgaus, float fscale, Keypoint keypts,
                 Keypoint lastkeypt = newkeypts;
                 while(lastkeypt->next)
                     lastkeypt = lastkeypt->next;;
-            
+
                 #pragma omp critical
                 {
                     lastkeypt->next = keypts;
@@ -972,7 +972,7 @@ Keypoint FindMaxMin(Image* imdiff, Image* imgaus, float fscale, Keypoint keypts,
 void GradOriImages(Image image, Image imgrad, Image imorient)
 {
     DVSTARTPROFILE();
-    
+
     int rows = image->rows, cols = image->cols, stride = image->stride;
     float* _pixels = image->pixels, *_pfgrad = imgrad->pixels, *_pforient = imorient->pixels;
     float fdiffc, fdiffr;
@@ -1008,18 +1008,18 @@ void GradOriImages(Image image, Image imgrad, Image imorient)
 void GradOriImagesFast(Image image, Image imgrad, Image imorient)
 {
     DVSTARTPROFILE();
-    
+
     int rows = image->rows, cols = image->cols, stride = image->stride;
     float* _pixels = image->pixels, *_pfgrad = imgrad->pixels, *_pforient = imorient->pixels;
     int endcol = ((cols-1)&~3);
-    
+
     {
         // first row is special 2*(_pixels[0]-_pixels[stride])
         float fdiffc, fdiffr;
 
         // first and last elt is 2*([1]-[0]), have to improvise for sse
         __m128 mprevj = _mm_set_ps(_pixels[2],_pixels[1],_pixels[0],2.0f*_pixels[0]-_pixels[1]);
-        
+
         for(int j = 0; j < endcol; j += 4) {
             float* pf = _pixels+j;
             __m128 mnewj = _mm_loadu_ps(pf+3);
@@ -1027,16 +1027,16 @@ void GradOriImagesFast(Image image, Image imgrad, Image imorient)
             __m128 mgradc = _mm_sub_ps(_mm_shuffle_ps(mprevj,mnewj,0x4e),mprevj);
             mgradr = _mm_sub_ps(mgradr, _MM_LOAD_ALIGNED(pf+stride));
             mgradr = _mm_add_ps(mgradr,mgradr);
-            
+
             __m128 mrad = _mm_sqrt_ps(_mm_add_ps(_mm_mul_ps(mgradr,mgradr),_mm_mul_ps(mgradc,mgradc)));
             __m128 morient = atan2f4(mgradr,mgradc);
-            
+
             _MM_STORE_ALIGNED(_pfgrad+j,mrad);
             mprevj = mnewj;
             _MM_STORE_ALIGNED(_pforient+j,morient);
         }
 
-        
+
         // compute the rest the old way
         for(int j = endcol; j < cols; ++j) {
             if( j == 0 )
@@ -1070,10 +1070,10 @@ void GradOriImagesFast(Image image, Image imgrad, Image imorient)
             __m128 mgradr = _MM_LOAD_ALIGNED(pf-stride);
             __m128 mgradc = _mm_sub_ps(_mm_shuffle_ps(mprevj,mnewj,0x4e),mprevj);
             mgradr = _mm_sub_ps(mgradr,_MM_LOAD_ALIGNED(pf+stride));
-            
+
             __m128 mrad = _mm_sqrt_ps(_mm_add_ps(_mm_mul_ps(mgradr,mgradr),_mm_mul_ps(mgradc,mgradc)));
             __m128 morient = atan2f4(mgradr,mgradc);
-            
+
             _MM_STORE_ALIGNED(pfgrad+j,mrad);
             mprevj = mnewj;
             _MM_STORE_ALIGNED(pforient+j,morient);
@@ -1111,10 +1111,10 @@ void GradOriImagesFast(Image image, Image imgrad, Image imorient)
             __m128 mgradc = _mm_sub_ps(_mm_shuffle_ps(mprevj,mnewj,0x4e),mprevj);
             mgradr = _mm_sub_ps(mgradr,_MM_LOAD_ALIGNED(pf));
             mgradr = _mm_add_ps(mgradr,mgradr);
-            
+
             __m128 mrad = _mm_sqrt_ps(_mm_add_ps(_mm_mul_ps(mgradr,mgradr),_mm_mul_ps(mgradc,mgradc)));
             __m128 morient = atan2f4(mgradr,mgradc);
-            
+
             _MM_STORE_ALIGNED(pfgrad+j,mrad);
             mprevj = mnewj;
             _MM_STORE_ALIGNED(pforient+j,morient);
@@ -1197,7 +1197,7 @@ Keypoint InterpKeyPoint(Image* imdiff, int index, int rowstart, int colstart,
       return InterpKeyPoint(imdiff,index,newrow,newcol,imgrad,imorient,pMaxMinArray,fscale,keypts,steps-1,nbScales);
 
     if(fabsf(X[0]) <= 1.5f && fabsf(X[1]) <= 1.5f && fabsf(X[2]) <= 1.5f && fabsf(fquadvalue) >= PeakThresh ) {
-        
+
         char* pmaxmin = pMaxMinArray + rowstart*imgrad->cols+colstart;
         bool bgetkeypts = false;
         #pragma omp critical
@@ -1207,7 +1207,7 @@ Keypoint InterpKeyPoint(Image* imdiff, int index, int rowstart, int colstart,
                 pmaxmin[0] = 1;
             }
         }
-        
+
         if( bgetkeypts ) {
             float fSize = InitSigma * powf(2.0f,((float)index + X[0])/(float)nbScales);
             return AssignOriHist(imgrad,imorient,fscale,fSize,(float)rowstart+X[1],(float)colstart+X[2],keypts);
@@ -1277,7 +1277,7 @@ void SolveLinearSystem(float* Y, float* H, int dim)
             Y[j] -= Y[i]*f;
         }
     }
-    
+
     // extract solution
     for(int i = dim-1; i >= 0; --i) {
         for(int j = dim-1; j > i; --j)
@@ -1308,7 +1308,7 @@ Keypoint AssignOriHist(Image imgrad, Image imorient, float fscale, float fSize,
             continue;
 
         for( int colcur = colstart-windowsize; colcur <= colstart+windowsize; ++colcur ) {
-            
+
             if( colcur < 0 || colcur >= cols-2 )
                 continue;
 
@@ -1316,22 +1316,22 @@ Keypoint AssignOriHist(Image imgrad, Image imorient, float fscale, float fSize,
             if( fdx > 0 ) {
                 float fdrow = (float)rowcur-frowstart, fdcol = (float)colcur-fcolstart;
                 float fradius2 = fdrow*fdrow+fdcol*fdcol;
-            
+
                 if( (float)(windowsize*windowsize) + 0.5f > fradius2 ) {
                     float fweight = expf(fradius2*fexpmult);
                     int binindex = (int)(pforient[rowcur*stride+colcur]*fbinmult+fbinadd);
-                    
+
                     // there is a bug in pforient where it could be 2*PI sometimes
                     if( binindex > 36 ) {
                         //if( binindex != 54 )
                             fprintf(stderr,"bin %d\n",binindex);
                         binindex = 0;
                     }
-                        
+
                     assert( binindex >= 0 && binindex <= 36 );
                     if( binindex == 36 )
                         binindex = 35;
-                
+
                     hists[binindex] += fdx*fweight;
                 }
             }
@@ -1341,7 +1341,7 @@ Keypoint AssignOriHist(Image imgrad, Image imorient, float fscale, float fSize,
     // pick an orientation with the highest votes
     for(int i = 0; i < 6; ++i)
         SmoothHistogram(hists,36);
-    
+
 #ifdef __SSE__
     float SIFT_ALIGNED16(fmaxval);
     __m128 m0 = _mm_load_ps(&hists[0]);
@@ -1368,19 +1368,19 @@ Keypoint AssignOriHist(Image imgrad, Image imorient, float fscale, float fSize,
             fmaxval = hists[i];
     }
 #endif
-    
+
     fmaxval *= 0.8f;
     const float foriadd = 0.5f*2*PI/36.0f - PI, forimult = 2*PI/36.0f;
-    
+
     int previndex = 35;
     for(int index = 0; index < 36; ++index) {
         if( index != 0 )
             previndex = index-1;
-    
+
         int nextindex = 0;
         if( index != 35 )
             nextindex = index+1;
-    
+
         if( hists[index] <= hists[previndex] || hists[index] <= hists[nextindex] || hists[index] < fmaxval )
             continue;
 
@@ -1390,7 +1390,7 @@ Keypoint AssignOriHist(Image imgrad, Image imorient, float fscale, float fSize,
 
         keypts = MakeKeypoint(imgrad,imorient,fscale,fSize,frowstart,fcolstart,forient,keypts);
     }
-    
+
     return keypts;
 }
 
@@ -1439,7 +1439,7 @@ Keypoint MakeKeypoint(Image imgrad, Image imorient, float fscale, float fSize,
     pnewkeypt->col = fscale*fcolstart;
     pnewkeypt->scale = fscale*fSize;
     MakeKeypointSample(pnewkeypt,imgrad,imorient,fSize,frowstart,fcolstart);
-    
+
     return pnewkeypt;
 }
 
@@ -1462,7 +1462,7 @@ void MakeKeypointSample(Keypoint pkeypt, Image imgrad, Image imorient,
         maccum1 = _mm_add_ps(maccum1,_mm_mul_ps(m1,m1));
     }
 
-    maccum0 = _mm_add_ps(maccum0,maccum1);    
+    maccum0 = _mm_add_ps(maccum0,maccum1);
 #ifdef __SSE3__
     maccum0 = _mm_hadd_ps(maccum0,maccum0);
     maccum0 = _mm_hadd_ps(maccum0,maccum0);
@@ -1470,7 +1470,7 @@ void MakeKeypointSample(Keypoint pkeypt, Image imgrad, Image imorient,
     maccum0 = _mm_add_ps(maccum0,_mm_shuffle_ps(maccum0,maccum0,0x4e));
     maccum0 = _mm_add_ss(maccum0,_mm_shuffle_ps(maccum0,maccum0,0x55));
 #endif
-    
+
     float fthresh;
     float SIFT_ALIGNED16(flength2);
     _mm_store_ss(&flength2, maccum0);
@@ -1501,7 +1501,7 @@ void MakeKeypointSample(Keypoint pkeypt, Image imgrad, Image imorient,
 //    float flength = 512.0f/sqrtf(flength2);
 //    maccum0 = _mm_load1_ps(&flength);
 //    unsigned char* pkeydesc = pkeypt->descrip;
-//    
+//
 //    for(int i = 0; i < 128; i += 16 ) {
 //        __m128 m0 = _mm_load_ps(fdesc+i);
 //        __m128 m1 = _mm_load_ps(fdesc+i+4);
@@ -1515,7 +1515,7 @@ void MakeKeypointSample(Keypoint pkeypt, Image imgrad, Image imorient,
 //    }
 #else
     NormalizeVec(fdesc,128);
-    
+
     bool brenormalize = false;
     for(int i = 0; i < 128; ++i) {
         if( fdesc[i] > 0.2f ) {
@@ -1523,7 +1523,7 @@ void MakeKeypointSample(Keypoint pkeypt, Image imgrad, Image imorient,
             brenormalize = true;
         }
     }
-    
+
     if( brenormalize )
         NormalizeVec(fdesc,128);
 #endif
@@ -1551,7 +1551,7 @@ void KeySample(float* fdesc, Keypoint pkeypt, Image imgrad, Image imorient,
     float frealsize = 3.0f*fSize;
     float firealsize = 1.0f/(3.0f*fSize);
     int windowsize = (int)(frealsize*SQRT2*5.0f*0.5f+0.5f);
-    
+
     float fsr = sinang*firealsize, fcr = cosang*firealsize, fdrr = -fdrow*firealsize, fdcr = -fdcol*firealsize;
 
     for(int row = -windowsize; row <= windowsize; ++row) {
@@ -1561,7 +1561,7 @@ void KeySample(float* fdesc, Keypoint pkeypt, Image imgrad, Image imorient,
 //#else
         float* fnewdesc = fdesc;
 //#endif
-        
+
         float frow = (float)row;
         float fcol = -(float)windowsize;
         for(int col = -windowsize; col <= windowsize; ++col, fcol += 1) {
@@ -1569,7 +1569,7 @@ void KeySample(float* fdesc, Keypoint pkeypt, Image imgrad, Image imorient,
             float cpos = fcr*fcol - fsr*frow + fdcr;
             float rx = rpos + (2.0f - 0.5f);
             float cx = cpos + (2.0f - 0.5f);
-            
+
             if( rx > -0.9999f && rx < 3.9999f && cx > -0.9999f && cx < 3.9999f ) {
                 AddSample(fnewdesc, pkeypt, imgrad, imorient, rowstart+row, colstart+col, rpos, cpos, rx, cx);
 //#ifdef _OPENMP
@@ -1605,7 +1605,7 @@ void AddSample(float* fdesc, Keypoint pkeypt, Image imgrad, Image imorient, int
     int rows = imgrad->rows, cols = imgrad->cols, stride = imgrad->stride;
     if( r < 0 || r >= rows || c < 0 || c >= cols )
         return;
-    
+
     float fgrad = imgrad->pixels[r*stride+c] * expf(-0.125f*(rpos*rpos+cpos*cpos));
     float forient = imorient->pixels[r*stride+c] - pkeypt->ori;
     while( forient > 2*PI )
@@ -1645,17 +1645,17 @@ void PlaceInIndex(float* fdesc, float mag, float ori, float rx, float cx)
     ofrac = oribin-(float)neworient;
 
     assert( newrow >= -1 && newrow < 4 && neworient >= 0 && neworient <= 8 && rfrac >= 0 && rfrac < 1);
-    
+
     for(int i = 0; i < 2; ++i) {
         if( (unsigned int)(i+newrow) >= 4 )
             continue;
-        
+
         float frowgrad;
         if( i == 0 )
             frowgrad = mag*(1-rfrac);
         else
             frowgrad = mag*rfrac;
-        
+
         for(int j = 0; j < 2; ++j) {
             if( (unsigned int)(j+newcol) >= 4 )
                 continue;
@@ -1665,7 +1665,7 @@ void PlaceInIndex(float* fdesc, float mag, float ori, float rx, float cx)
                 fcolgrad = frowgrad*(1-cfrac);
             else
                 fcolgrad = frowgrad*cfrac;
-            
+
             float* pfdescorient = fdesc + 8*(4*(i+newrow)+j+newcol);
             for(int k = 0; k < 2; ++k) {
                 float forigrad;
@@ -1711,12 +1711,12 @@ void DestroyAllResources()
 #define DEI_CONST(a,b) static  const vec_int4   a = {b,b,b,b};
 
 
-DEF_CONST(CF4_2414213562373095, 2.414213562373095f) 
-DEF_CONST(CF4_04142135623730950, 0.4142135623730950f) 
-DEF_CONST(CF4_805374449538e_2,      8.05374449538e-2f) 
-DEF_CONST(CF4_138776856032E_1,      1.38776856032E-1f) 
-DEF_CONST(CF4_199777106478E_1,      1.99777106478E-1f) 
-DEF_CONST(CF4_333329491539E_1,      3.33329491539E-1f) 
+DEF_CONST(CF4_2414213562373095, 2.414213562373095f)
+DEF_CONST(CF4_04142135623730950, 0.4142135623730950f)
+DEF_CONST(CF4_805374449538e_2,      8.05374449538e-2f)
+DEF_CONST(CF4_138776856032E_1,      1.38776856032E-1f)
+DEF_CONST(CF4_199777106478E_1,      1.99777106478E-1f)
+DEF_CONST(CF4_333329491539E_1,      3.33329491539E-1f)
 
 #define VEC_F2I(a,b)   asm("cvttps2dq %1, %0":"=x" (a) :"x" (b))
 #define VEC_I2F(a,b)   asm("cvtdq2ps  %1, %0":"=x" (a) :"x" (b))
@@ -1750,14 +1750,14 @@ vec_float4 C-style expressions
 #define vec_nmsub(a,b,c)      ((c)-(a)*(b))
 #define vec_splat(x,n)        (typeof(x))_mm_shuffle_ps(x,x,_MM_SHUFFLE(n,n,n,n))
 
-DEF_CONST(CF4_0,        0.0f) 
-DEF_CONST(CF4_2,        2.0f) 
+DEF_CONST(CF4_0,        0.0f)
+DEF_CONST(CF4_2,        2.0f)
 DEI_CONST(CI4_SIGN,     static_cast<int>(0x80000000u))
-DEF_CONST(CF4__1,       -1.0f) 
-DEF_CONST(CF4_1,        1.0f) 
-DEF_CONST(CF4_SMALL,    1.0E-35f) 
-DEF_CONST(CF4_PIO2F,    1.570796326794896619f) 
-DEF_CONST(CF4_PIO4F,    0.7853981633974483096f) 
+DEF_CONST(CF4__1,       -1.0f)
+DEF_CONST(CF4_1,        1.0f)
+DEF_CONST(CF4_SMALL,    1.0E-35f)
+DEF_CONST(CF4_PIO2F,    1.570796326794896619f)
+DEF_CONST(CF4_PIO4F,    0.7853981633974483096f)
 DEF_CONST(CF4_PIF,      3.14159265358979323846f)
 
 inline vec_int4  __attribute__((__always_inline__))
@@ -1774,11 +1774,11 @@ inline vec_float4 __attribute__((__always_inline__))
       /* make argument positive and save the sign */
       vec_int4 sign = _signf4( x );
       VEC_XOR(x, sign);
-      
+
       /* range reduction */
       a1 = (vec_int4)VEC_GT (x , CF4_2414213562373095 );
       a2 = (vec_int4)VEC_GT (x , CF4_04142135623730950 );
-      a3 = ~a2; 
+      a3 = ~a2;
       a2 ^= a1;
 
       z1 = CF4__1 / (x+CF4_SMALL);
@@ -1788,7 +1788,7 @@ inline vec_float4 __attribute__((__always_inline__))
       VEC_AND(x, a3);
       VEC_OR(x, z1);
       VEC_OR(x, z2);
-      
+
       y = CF4_PIO2F;
       z1 = CF4_PIO4F;
       VEC_AND(y, a1);