Skip to content
Snippets Groups Projects
Commit 6a8c2a84 authored by Julien Michel's avatar Julien Michel
Browse files

PERF: Optimize BCOInterpolateImageFunction

Simplify the code and avoid some allocations to improve performance.

I haven't checked the definition of RealType, but merging the output and
value variables might arguably affect precision. The unit tests still
seem to pass, however.

Tested by 2x upsampling a 10-band 5490x5490 float32 image on an Intel
Celeron J1900.

There is further room for improvement by eliding the remaining memory
allocations. A container like the small_vector from Boost might be
useful here, but we can't use that since it's not available in older
Boost versions.

before:
661.44s user 9.31s system 304% cpu 3:40.52 total
594.06s user 9.43s system 303% cpu 3:19.00 total
591.15s user 9.43s system 301% cpu 3:19.28 total

after:
443.42s user 10.48s system 294% cpu 2:34.00 total
443.11s user 9.87s system 309% cpu 2:26.27 total
448.50s user 10.50s system 311% cpu 2:27.52 total
parent 37b305f9
No related branches found
No related tags found
No related merge requests found
......@@ -142,8 +142,6 @@ BCOInterpolateImageFunction<TInputImage, TCoordRep>
IndexType baseIndex;
IndexType neighIndex;
std::vector<RealType> lineRes(this->m_WinSize, 0.);
RealType value = itk::NumericTraits<RealType>::Zero;
CoefContainerType BCOCoefX = this->EvaluateCoef(index[0]);
......@@ -157,6 +155,7 @@ BCOInterpolateImageFunction<TInputImage, TCoordRep>
for(unsigned int i = 0; i < this->m_WinSize; ++i )
{
RealType lineRes = 0.;
for(unsigned int j = 0; j < this->m_WinSize; ++j )
{
// get neighbor index
......@@ -179,9 +178,9 @@ BCOInterpolateImageFunction<TInputImage, TCoordRep>
{
neighIndex[1] = this->m_StartIndex[1];
}
lineRes[i] += static_cast<RealType>( this->GetInputImage()->GetPixel( neighIndex ) ) * BCOCoefY[j];
lineRes += static_cast<RealType>( this->GetInputImage()->GetPixel( neighIndex ) ) * BCOCoefY[j];
}
value += lineRes[i]*BCOCoefX[i];
value += lineRes*BCOCoefX[i];
}
......@@ -211,14 +210,9 @@ BCOInterpolateImageFunction< otb::VectorImage<TPixel, VImageDimension> , TCoordR
IndexType neighIndex;
std::vector< std::vector<ScalarRealType> > lineRes ( this->m_WinSize, std::vector<ScalarRealType>( componentNumber, itk::NumericTraits<ScalarRealType>::Zero) );
std::vector< ScalarRealType > value(componentNumber,itk::NumericTraits<ScalarRealType>::Zero);
OutputType output;
output.SetSize(componentNumber);
std::vector<ScalarRealType> lineRes(componentNumber);
OutputType output(componentNumber);
output.Fill(itk::NumericTraits<ScalarRealType>::Zero);
CoefContainerType BCOCoefX = this->EvaluateCoef(index[0]);
CoefContainerType BCOCoefY = this->EvaluateCoef(index[1]);
......@@ -231,6 +225,7 @@ BCOInterpolateImageFunction< otb::VectorImage<TPixel, VImageDimension> , TCoordR
for(unsigned int i = 0; i < this->m_WinSize; ++i )
{
std::fill(lineRes.begin(), lineRes.end(), itk::NumericTraits<ScalarRealType>::Zero);
for(unsigned int j = 0; j < this->m_WinSize; ++j )
{
// get neighbor index
......@@ -256,20 +251,15 @@ BCOInterpolateImageFunction< otb::VectorImage<TPixel, VImageDimension> , TCoordR
const InputPixelType & pixel = this->GetInputImage()->GetPixel( neighIndex );
for( unsigned int k = 0; k<componentNumber; ++k)
{
lineRes[i][k] += pixel.GetElement(k) * BCOCoefY[j];
lineRes[k] += pixel.GetElement(k) * BCOCoefY[j];
}
}
for( unsigned int k = 0; k<componentNumber; ++k)
{
value[k] += lineRes[i][k]*BCOCoefX[i];
output[k] += lineRes[k]*BCOCoefX[i];
}
}
for( unsigned int k = 0; k<componentNumber; ++k)
{
output.SetElement(k, value[k]);
}
return ( output );
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment