From 6a8c2a84c3683311a9fe5176fa392fb129176f84 Mon Sep 17 00:00:00 2001
From: Julien Michel <julien.michel@cnes.fr>
Date: Wed, 16 Nov 2016 09:55:34 +0100
Subject: [PATCH] PERF: Optimize BCOInterpolateImageFunction

Simplify the code and avoid some allocations to improve performance.

I haven't checked the definition of RealType, but merging the output and
value variables might arguably affect precision. The unit tests still
seem to pass, however.

Tested by 2x upsampling a 10-band 5490x5490 float32 image on an Intel
Celeron J1900.

There is further room for improvement by eliding the remaining memory
allocations. A container like the small_vector from Boost might be
useful here, but we can't use that since it's not available in older
Boost versions.

before:
661.44s user 9.31s system 304% cpu 3:40.52 total
594.06s user 9.43s system 303% cpu 3:19.00 total
591.15s user 9.43s system 301% cpu 3:19.28 total

after:
443.42s user 10.48s system 294% cpu 2:34.00 total
443.11s user 9.87s system 309% cpu 2:26.27 total
448.50s user 10.50s system 311% cpu 2:27.52 total
---
 .../otbBCOInterpolateImageFunction.txx        | 28 ++++++-------------
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/Modules/Core/Interpolation/include/otbBCOInterpolateImageFunction.txx b/Modules/Core/Interpolation/include/otbBCOInterpolateImageFunction.txx
index 89572aad83..6f0fd2ad70 100644
--- a/Modules/Core/Interpolation/include/otbBCOInterpolateImageFunction.txx
+++ b/Modules/Core/Interpolation/include/otbBCOInterpolateImageFunction.txx
@@ -142,8 +142,6 @@ BCOInterpolateImageFunction<TInputImage, TCoordRep>
   IndexType baseIndex;
   IndexType neighIndex;
 
-  std::vector<RealType> lineRes(this->m_WinSize, 0.);
-
   RealType value = itk::NumericTraits<RealType>::Zero;
 
   CoefContainerType BCOCoefX = this->EvaluateCoef(index[0]);
@@ -157,6 +155,7 @@ BCOInterpolateImageFunction<TInputImage, TCoordRep>
 
   for(unsigned int i = 0; i < this->m_WinSize; ++i )
     {
+    RealType lineRes = 0.;
     for(unsigned int j = 0; j < this->m_WinSize; ++j )
       {
       // get neighbor index
@@ -179,9 +178,9 @@ BCOInterpolateImageFunction<TInputImage, TCoordRep>
         {
         neighIndex[1] = this->m_StartIndex[1];
         }
-      lineRes[i] += static_cast<RealType>( this->GetInputImage()->GetPixel( neighIndex ) ) * BCOCoefY[j];
+      lineRes += static_cast<RealType>( this->GetInputImage()->GetPixel( neighIndex ) ) * BCOCoefY[j];
       }
-    value += lineRes[i]*BCOCoefX[i];
+    value += lineRes*BCOCoefX[i];
     }
 
 
@@ -211,14 +210,9 @@ BCOInterpolateImageFunction< otb::VectorImage<TPixel, VImageDimension> , TCoordR
   IndexType neighIndex;
 
 
-    
-  std::vector< std::vector<ScalarRealType> >  lineRes ( this->m_WinSize, std::vector<ScalarRealType>( componentNumber, itk::NumericTraits<ScalarRealType>::Zero) );
-  std::vector< ScalarRealType > value(componentNumber,itk::NumericTraits<ScalarRealType>::Zero);
-
-
-  OutputType output;
-
-  output.SetSize(componentNumber);
+  std::vector<ScalarRealType> lineRes(componentNumber);
+  OutputType output(componentNumber);
+  output.Fill(itk::NumericTraits<ScalarRealType>::Zero);
 
   CoefContainerType BCOCoefX = this->EvaluateCoef(index[0]);
   CoefContainerType BCOCoefY = this->EvaluateCoef(index[1]);
@@ -231,6 +225,7 @@ BCOInterpolateImageFunction< otb::VectorImage<TPixel, VImageDimension> , TCoordR
 
   for(unsigned int i = 0; i < this->m_WinSize; ++i )
     {
+    std::fill(lineRes.begin(), lineRes.end(), itk::NumericTraits<ScalarRealType>::Zero);
     for(unsigned int j = 0; j < this->m_WinSize; ++j )
       {
       // get neighbor index
@@ -256,20 +251,15 @@ BCOInterpolateImageFunction< otb::VectorImage<TPixel, VImageDimension> , TCoordR
       const InputPixelType & pixel = this->GetInputImage()->GetPixel( neighIndex );
       for( unsigned int k = 0; k<componentNumber; ++k)
         {
-        lineRes[i][k] += pixel.GetElement(k) * BCOCoefY[j];
+        lineRes[k] += pixel.GetElement(k) * BCOCoefY[j];
         }
       }
     for( unsigned int k = 0; k<componentNumber; ++k)
       {
-      value[k] += lineRes[i][k]*BCOCoefX[i];
+      output[k] += lineRes[k]*BCOCoefX[i];
       }
     }
 
-  for( unsigned int k = 0; k<componentNumber; ++k)
-    {
-    output.SetElement(k, value[k]);
-    }
-
   return ( output );
 }
 
-- 
GitLab