From 6a8c2a84c3683311a9fe5176fa392fb129176f84 Mon Sep 17 00:00:00 2001 From: Julien Michel <julien.michel@cnes.fr> Date: Wed, 16 Nov 2016 09:55:34 +0100 Subject: [PATCH] PERF: Optimize BCOInterpolateImageFunction Simplify the code and avoid some allocations to improve performance. I haven't checked the definition of RealType, but merging the output and value variables might arguably affect precision. The unit tests still seem to pass, however. Tested by 2x upsampling a 10-band 5490x5490 float32 image on an Intel Celeron J1900. There is further room for improvement by eliding the remaining memory allocations. A container like the small_vector from Boost might be useful here, but we can't use that since it's not available in older Boost versions. before: 661.44s user 9.31s system 304% cpu 3:40.52 total 594.06s user 9.43s system 303% cpu 3:19.00 total 591.15s user 9.43s system 301% cpu 3:19.28 total after: 443.42s user 10.48s system 294% cpu 2:34.00 total 443.11s user 9.87s system 309% cpu 2:26.27 total 448.50s user 10.50s system 311% cpu 2:27.52 total --- .../otbBCOInterpolateImageFunction.txx | 28 ++++++------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/Modules/Core/Interpolation/include/otbBCOInterpolateImageFunction.txx b/Modules/Core/Interpolation/include/otbBCOInterpolateImageFunction.txx index 89572aad83..6f0fd2ad70 100644 --- a/Modules/Core/Interpolation/include/otbBCOInterpolateImageFunction.txx +++ b/Modules/Core/Interpolation/include/otbBCOInterpolateImageFunction.txx @@ -142,8 +142,6 @@ BCOInterpolateImageFunction<TInputImage, TCoordRep> IndexType baseIndex; IndexType neighIndex; - std::vector<RealType> lineRes(this->m_WinSize, 0.); - RealType value = itk::NumericTraits<RealType>::Zero; CoefContainerType BCOCoefX = this->EvaluateCoef(index[0]); @@ -157,6 +155,7 @@ BCOInterpolateImageFunction<TInputImage, TCoordRep> for(unsigned int i = 0; i < this->m_WinSize; ++i ) { + RealType lineRes = 0.; for(unsigned int j = 0; j < this->m_WinSize; ++j ) { // get neighbor index @@ -179,9 +178,9 @@ BCOInterpolateImageFunction<TInputImage, TCoordRep> { neighIndex[1] = this->m_StartIndex[1]; } - lineRes[i] += static_cast<RealType>( this->GetInputImage()->GetPixel( neighIndex ) ) * BCOCoefY[j]; + lineRes += static_cast<RealType>( this->GetInputImage()->GetPixel( neighIndex ) ) * BCOCoefY[j]; } - value += lineRes[i]*BCOCoefX[i]; + value += lineRes*BCOCoefX[i]; } @@ -211,14 +210,9 @@ BCOInterpolateImageFunction< otb::VectorImage<TPixel, VImageDimension> , TCoordR IndexType neighIndex; - - std::vector< std::vector<ScalarRealType> > lineRes ( this->m_WinSize, std::vector<ScalarRealType>( componentNumber, itk::NumericTraits<ScalarRealType>::Zero) ); - std::vector< ScalarRealType > value(componentNumber,itk::NumericTraits<ScalarRealType>::Zero); - - - OutputType output; - - output.SetSize(componentNumber); + std::vector<ScalarRealType> lineRes(componentNumber); + OutputType output(componentNumber); + output.Fill(itk::NumericTraits<ScalarRealType>::Zero); CoefContainerType BCOCoefX = this->EvaluateCoef(index[0]); CoefContainerType BCOCoefY = this->EvaluateCoef(index[1]); @@ -231,6 +225,7 @@ BCOInterpolateImageFunction< otb::VectorImage<TPixel, VImageDimension> , TCoordR for(unsigned int i = 0; i < this->m_WinSize; ++i ) { + std::fill(lineRes.begin(), lineRes.end(), itk::NumericTraits<ScalarRealType>::Zero); for(unsigned int j = 0; j < this->m_WinSize; ++j ) { // get neighbor index @@ -256,20 +251,15 @@ BCOInterpolateImageFunction< otb::VectorImage<TPixel, VImageDimension> , TCoordR const InputPixelType & pixel = this->GetInputImage()->GetPixel( neighIndex ); for( unsigned int k = 0; k<componentNumber; ++k) { - lineRes[i][k] += pixel.GetElement(k) * BCOCoefY[j]; + lineRes[k] += pixel.GetElement(k) * BCOCoefY[j]; } } for( unsigned int k = 0; k<componentNumber; ++k) { - value[k] += lineRes[i][k]*BCOCoefX[i]; + output[k] += lineRes[k]*BCOCoefX[i]; } } - for( unsigned int k = 0; k<componentNumber; ++k) - { - output.SetElement(k, value[k]); - } - return ( output ); } -- GitLab