Skip to content
Snippets Groups Projects
Commit c951c094 authored by Gaëlle USSEGLIO's avatar Gaëlle USSEGLIO
Browse files

Merge branch 'replace_openMP_by_MultiThreader' into 'master'

ENH : Replace OpemMP // by itk::Multithreader into SARCoRegistration

See merge request remote_modules/diapotb!10
parents 414add5e 27fe49ef
No related branches found
No related tags found
1 merge request!10ENH : Replace OpemMP // by itk::Multithreader into SARCoRegistration
...@@ -5,17 +5,9 @@ if(NOT OTB_SOURCE_DIR) ...@@ -5,17 +5,9 @@ if(NOT OTB_SOURCE_DIR)
find_package(OTB REQUIRED) find_package(OTB REQUIRED)
list(APPEND CMAKE_MODULE_PATH ${OTB_CMAKE_DIR}) list(APPEND CMAKE_MODULE_PATH ${OTB_CMAKE_DIR})
include(${OTB_USE_FILE}) include(${OTB_USE_FILE})
if(NOT OTB_USE_OPENMP)
find_package(OpenMP REQUIRED)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
endif()
include(OTBModuleExternal) include(OTBModuleExternal)
else() else()
otb_module_impl() otb_module_impl()
if(NOT OTB_USE_OPENMP)
message(FATAL_ERROR "Diapotb remote module needs OpenMP, compile OTB with OTB_USE_OPENMP=ON")
endif()
endif() endif()
# Execute and Copy during installation # Execute and Copy during installation
......
...@@ -285,11 +285,9 @@ public: ...@@ -285,11 +285,9 @@ public:
/** /**
* Method Process * Method Process
*/ */
void Process(ImageInPointer inputPtr, ImageOutRegionType outputRegion, ImageOutPixelType * TilesResult) void Process(ImageInPointer inputPtr, ImageOutRegionType outputRegion, ImageOutPixelType * TilesResult,
int threadId=0)
{ {
// Get current thread id
int threadId = omp_get_thread_num();
// Chekc if the output Tile is cut // Chekc if the output Tile is cut
unsigned int lastInd = outputRegion.GetSize()[0]*outputRegion.GetSize()[1]; unsigned int lastInd = outputRegion.GetSize()[0]*outputRegion.GetSize()[1];
if (lastInd < (m_SizeTiles*m_SizeTiles)) if (lastInd < (m_SizeTiles*m_SizeTiles))
......
...@@ -129,7 +129,10 @@ public: ...@@ -129,7 +129,10 @@ public:
itkGetMacro(SizeTiles, unsigned int); itkGetMacro(SizeTiles, unsigned int);
itkGetMacro(Margin, unsigned int); itkGetMacro(Margin, unsigned int);
itkGetMacro(UseMasterGeo, bool); itkGetMacro(UseMasterGeo, bool);
itkGetMacro(NbTiles, unsigned int);
itkGetMacro(IndLFirstTile, unsigned int);
itkGetMacro(IndCFirstTile, unsigned int);
// Setter/Getter for inputs (image and potentially grid) // Setter/Getter for inputs (image and potentially grid)
/** Connect one of the operands for registration */ /** Connect one of the operands for registration */
void SetGridInput( const GridType* image); void SetGridInput( const GridType* image);
...@@ -183,6 +186,22 @@ protected: ...@@ -183,6 +186,22 @@ protected:
*/ */
GridRegionType OutputRegionToInputGridRegion(const ImageOutRegionType& outputRegion) const; GridRegionType OutputRegionToInputGridRegion(const ImageOutRegionType& outputRegion) const;
/** startIndex and stopIndex represent the indices of tiles to process in thread threadId and index for the
first tile */
virtual void ThreadedGenerateData(unsigned int startTileId, unsigned int stopTileId,
unsigned int firstTileIndexL, unsigned int firstTileIndexC,
itk::ThreadIdType threadId);
/** Static function used as a "callback" by the MultiThreader. The threading
* library will call this routine for each thread, which will delegate the
* control to ThreadedGenerateData(). */
static ITK_THREAD_RETURN_TYPE ThreaderCallback(void *arg);
/** Internal structure used for passing image data into the threading library */
struct ThreadStruct
{
Pointer Filter;
};
/** /**
* \sa ImageToImageFilter::ThreadedGenerateData(), * \sa ImageToImageFilter::ThreadedGenerateData(),
...@@ -229,6 +248,14 @@ protected: ...@@ -229,6 +248,14 @@ protected:
// Step (SLC or SAR geo) for input deformation grid // Step (SLC or SAR geo) for input deformation grid
GridSizeType m_GridStep; GridSizeType m_GridStep;
// Nb Tiles for each Pipeline (each call to GenerateData)
unsigned int m_NbTiles;
unsigned int m_NbTilesCol;
// Index for the first tile of each Pipeline (each call to GenerateData)
unsigned int m_IndLFirstTile;
unsigned int m_IndCFirstTile;
}; };
} // End namespace otb } // End namespace otb
......
...@@ -31,7 +31,6 @@ ...@@ -31,7 +31,6 @@
#include <cmath> #include <cmath>
#include <algorithm> #include <algorithm>
#include <omp.h>
namespace otb namespace otb
{ {
...@@ -41,7 +40,8 @@ namespace otb ...@@ -41,7 +40,8 @@ namespace otb
template <class TImageIn, class TImageOut, class TFunction> template <class TImageIn, class TImageOut, class TFunction>
TilesAnalysisImageFilter< TImageIn, TImageOut, TFunction >::TilesAnalysisImageFilter() TilesAnalysisImageFilter< TImageIn, TImageOut, TFunction >::TilesAnalysisImageFilter()
: m_SizeTiles(50), m_Margin(7), m_UseCache(false), m_MaxMemoryCacheInMB(0), m_MaxShiftInRange(0.), : m_SizeTiles(50), m_Margin(7), m_UseCache(false), m_MaxMemoryCacheInMB(0), m_MaxShiftInRange(0.),
m_MaxShiftInAzimut(0.), m_UseMasterGeo(false) m_MaxShiftInAzimut(0.), m_UseMasterGeo(false), m_NbTiles(0), m_IndLFirstTile(0), m_IndCFirstTile(0),
m_NbTilesCol(0)
{ {
m_ProcessedTilesIndex.clear(); m_ProcessedTilesIndex.clear();
m_ProcessedTiles.clear(); m_ProcessedTiles.clear();
...@@ -428,7 +428,7 @@ namespace otb ...@@ -428,7 +428,7 @@ namespace otb
/** /**
* Method ThreadedGenerateData * Method GenerateData
*/ */
template<class TImageIn, class TImageOut, class TFunction> template<class TImageIn, class TImageOut, class TFunction>
void void
...@@ -439,20 +439,10 @@ namespace otb ...@@ -439,20 +439,10 @@ namespace otb
this->AllocateOutputs(); this->AllocateOutputs();
int nbThreads = this->GetNumberOfThreads(); int nbThreads = this->GetNumberOfThreads();
omp_set_num_threads(nbThreads);
ImageInPointer inputPtr = const_cast< ImageInType * >( this->GetImageInput() );
// Get Output and Input Requested region // Get Output Requested region
ImageInRegionType inputRegion = this->GetImageInput()->GetRequestedRegion();
ImageOutRegionType outputRegion = this->GetOutput()->GetRequestedRegion(); ImageOutRegionType outputRegion = this->GetOutput()->GetRequestedRegion();
// Typedef for Iterators on selected region
typedef itk::ImageRegionIterator<ImageOutType> OutItType;
// Size of the whole output image
int outImage_SizeL = this->GetOutput()->GetLargestPossibleRegion().GetSize()[1];
// Define the number of tiles for processing // Define the number of tiles for processing
int nbTiles_Lines = outputRegion.GetSize()[1]/m_SizeTiles; int nbTiles_Lines = outputRegion.GetSize()[1]/m_SizeTiles;
int nbTiles_Col = outputRegion.GetSize()[0]/m_SizeTiles; int nbTiles_Col = outputRegion.GetSize()[0]/m_SizeTiles;
...@@ -470,145 +460,147 @@ namespace otb ...@@ -470,145 +460,147 @@ namespace otb
// First Tile = index of output requested region // First Tile = index of output requested region
int IndL_FirstTiles = outputRegion.GetIndex()[1]; int IndL_FirstTiles = outputRegion.GetIndex()[1];
int IndC_FirstTiles = outputRegion.GetIndex()[0]; int IndC_FirstTiles = outputRegion.GetIndex()[0];
///////////////////////// Pre Processing /////////////////////// ///////////////////////// Pre Processing ///////////////////////
bool *TilesIsIntoCache = new bool[nbTiles_Lines*nbTiles_Col];
if (m_UseCache)
{
///////////// Clear the map of Processed Tiles if cache used and maxMemory reached //////////////
// Transform maxMemory into elts
long unsigned int nbTilesMax = (1024*1024*m_MaxMemoryCacheInMB)/(sizeof(ImageOutPixelType)*m_SizeTiles*m_SizeTiles);
typedef typename std::map<int, ImageOutPixelType *>::iterator mapIt ;
// Erase all elts
if (m_ProcessedTiles.size() > nbTilesMax)
{
mapIt mapPT = m_ProcessedTiles.begin();
// Free Memory into m_ProcessedTiles
for (long unsigned int k = 0; k <m_ProcessedTiles.size(); k++)
{
delete mapPT->second;
mapPT->second = 0;
++mapPT;
}
// Erase elts
m_ProcessedTiles.clear();
m_ProcessedTilesIndex.clear();
}
///////////// Determine if some tiles are already into our cache //////////////
// For each Tiles
for (int k = 0; k < (nbTiles_Lines*nbTiles_Col); k++)
{
ImageOutIndexType index_current;
int id_L = k/nbTiles_Col; // quotient of Euclidean division (integer division)
int id_C = k - id_L*nbTiles_Col; // remainder of Euclidean division
index_current[1] = IndL_FirstTiles + id_L*m_SizeTiles;
index_current[0] = IndC_FirstTiles + id_C*m_SizeTiles;
TilesIsIntoCache[k] = (std::find(m_ProcessedTilesIndex.begin(), m_ProcessedTilesIndex.end(),
index_current) != m_ProcessedTilesIndex.end());
}
}
// Call Function initialization // Call Function initialization
m_FunctionOnTiles->Initialize(nbThreads); m_FunctionOnTiles->Initialize(nbThreads);
m_NbTiles = nbTiles_Lines*nbTiles_Col;
m_NbTilesCol = nbTiles_Col;
m_IndLFirstTile = IndL_FirstTiles;
m_IndCFirstTile = IndC_FirstTiles;
///////////////////////// Processing /////////////////////// ///////////////////////// Processing ///////////////////////
#pragma omp parallel for schedule(dynamic) // Set up the multithreaded processing
// For each Tiles ThreadStruct str;
for (int k = 0; k < (nbTiles_Lines*nbTiles_Col); k++) str.Filter = this;
{
// Transform Tiles id into index // Setting up multithreader
ImageOutIndexType index_current; this->GetMultiThreader()->SetNumberOfThreads(this->GetNumberOfThreads());
int id_L = k/nbTiles_Col; // quotient of Euclidean division (integer division) this->GetMultiThreader()->SetSingleMethod(this->ThreaderCallback, &str);
int id_C = k - id_L*nbTiles_Col; // remainder of Euclidean division
index_current[1] = IndL_FirstTiles + id_L*m_SizeTiles; // multithread the execution
index_current[0] = IndC_FirstTiles + id_C*m_SizeTiles; this->GetMultiThreader()->SingleMethodExecute();
///////////// Post Processing ////////////
// Call Function terminate
m_FunctionOnTiles->Terminate();
}
template<class TImageIn, class TImageOut, class TFunction>
void
TilesAnalysisImageFilter< TImageIn, TImageOut, TFunction >
::ThreadedGenerateData(unsigned int startTileId, unsigned int stopTileId,
unsigned int firstTileIndexL, unsigned int firstTileIndexC,
itk::ThreadIdType threadId)
{
ImageInPointer inputPtr = const_cast< ImageInType * >( this->GetImageInput() );
// Typedef for Iterators on selected region
typedef itk::ImageRegionIterator<ImageOutType> OutItType;
for (int k = startTileId; k < stopTileId; k++) //<= ??????
{
// Transform Tiles id into index
ImageOutIndexType index_current;
int id_L = k/m_NbTilesCol; // quotient of Euclidean division (integer division)
int id_C = k - id_L*m_NbTilesCol; // remainder of Euclidean division
index_current[1] = firstTileIndexL + id_L*m_SizeTiles;
index_current[0] = firstTileIndexC + id_C*m_SizeTiles;
// Construct the current tile // Construct the current tile
ImageOutIndexType currentOutTilesIndex; ImageOutIndexType currentOutTilesIndex;
ImageOutSizeType currentOutTilesSize; ImageOutSizeType currentOutTilesSize;
// Create regions with index and the given size // Create regions with index and the given size
// Output tiles // Output tiles
currentOutTilesIndex[0] = index_current[0]; currentOutTilesIndex[0] = index_current[0];
currentOutTilesIndex[1] = index_current[1]; currentOutTilesIndex[1] = index_current[1];
currentOutTilesSize[0] = m_SizeTiles; currentOutTilesSize[0] = m_SizeTiles;
currentOutTilesSize[1] = m_SizeTiles; currentOutTilesSize[1] = m_SizeTiles;
ImageOutRegionType outputCurrentRegion; ImageOutRegionType outputCurrentRegion;
outputCurrentRegion.SetIndex(currentOutTilesIndex); outputCurrentRegion.SetIndex(currentOutTilesIndex);
outputCurrentRegion.SetSize(currentOutTilesSize); outputCurrentRegion.SetSize(currentOutTilesSize);
outputCurrentRegion.Crop(this->GetOutput()->GetLargestPossibleRegion()); outputCurrentRegion.Crop(this->GetOutput()->GetLargestPossibleRegion());
// If Tile is not into cache or m_UseCache = false : process the current tile // Process the current tile
if (!TilesIsIntoCache[k] || !m_UseCache) ImageOutPixelType * TilesCache = new ImageOutPixelType[outputCurrentRegion.GetSize()[1]*
{ outputCurrentRegion.GetSize()[0]];
ImageOutPixelType * TilesCache = new ImageOutPixelType[outputCurrentRegion.GetSize()[1]*
outputCurrentRegion.GetSize()[0]]; // Call Function Process :
// Inputs : Ptr Image In and the output tile
// Call Function Process : // Output : Ptr on result array
// Inputs : Ptr Image In and the output tile m_FunctionOnTiles->Process(inputPtr, outputCurrentRegion, TilesCache, threadId);
// Output : Ptr on result array
m_FunctionOnTiles->Process(inputPtr, outputCurrentRegion, TilesCache); int compteur = 0;
// Assignate Output with function result
// Iterators on output
OutItType OutIt(this->GetOutput(), outputCurrentRegion);
OutIt.GoToBegin();
while (!OutIt.IsAtEnd())
{
OutIt.Set(TilesCache[compteur]);
++compteur;
++OutIt;
}
delete [] TilesCache;
TilesCache = 0;
}
}
// Callback routine used by the threading library. This routine just calls
// the ThreadedGenerateData method after setting the correct region for this
// thread.
template<class TImageIn, class TImageOut, class TFunction>
ITK_THREAD_RETURN_TYPE
TilesAnalysisImageFilter< TImageIn, TImageOut, TFunction >
::ThreaderCallback(void *arg)
{
ThreadStruct *str;
int threadId, threadCount;
unsigned int total, start, stop, indLFirstTile, indCFirstTile;
int compteur = 0; threadId = ((itk::MultiThreader::ThreadInfoStruct *) (arg))->ThreadID;
threadCount = ((itk::MultiThreader::ThreadInfoStruct *) (arg))->NumberOfThreads;
str = (ThreadStruct *) (((itk::MultiThreader::ThreadInfoStruct *) (arg))->UserData);
// Assignate Output with function result total = str->Filter->GetNbTiles();
// Iterators on output indLFirstTile = str->Filter->GetIndLFirstTile();
OutItType OutIt(this->GetOutput(), outputCurrentRegion); indCFirstTile = str->Filter->GetIndCFirstTile();
OutIt.GoToBegin();
while (!OutIt.IsAtEnd())
{
OutIt.Set(TilesCache[compteur]);
++compteur;
++OutIt;
}
if (m_UseCache) if (threadId < static_cast<int>(total))
{ {
// Store elts into our two caches : m_ProcessedTiles and m_ProcessedTilesIndex (monothread) start =
#pragma omp critical static_cast<unsigned int>(threadId *
{ std::ceil(static_cast<double>(total)/(static_cast<double>(threadCount))));
int ind = index_current[1] * outImage_SizeL + index_current[0]; stop =
m_ProcessedTiles[ind] = TilesCache; static_cast<unsigned int>((threadId+1) *
m_ProcessedTilesIndex.push_back(index_current); std::ceil(static_cast<double>(total)/(static_cast<double>(threadCount))));
}
} if (stop > total) stop = total;
else
{ // For very small graphs it might occur that start = stop. In this
delete [] TilesCache; // case the vertex at that index will be processed in the next strip.
TilesCache = 0; if (start != stop)
}
}
// Else use the cache
else
{ {
OutItType OutIt(this->GetOutput(), outputCurrentRegion); str->Filter->ThreadedGenerateData(start, stop, indLFirstTile, indCFirstTile, threadId);
OutIt.GoToBegin();
int ind = index_current[1] * outImage_SizeL + index_current[0];
ImageOutPixelType * TilesCache = m_ProcessedTiles[ind];
int compteur = 0;
while (!OutIt.IsAtEnd())
{
OutIt.Set(TilesCache[compteur]);
++compteur;
++OutIt;
}
} }
} }
// else
delete [] TilesIsIntoCache; // {
// otherwise don't use this thread. Sometimes the threads don't
///////////// Post Processing //////////// // break up very well and it is just as efficient to leave a
// Call Function terminate // few threads idle.
m_FunctionOnTiles->Terminate(); // }
return ITK_THREAD_RETURN_VALUE;
} }
......
...@@ -110,7 +110,8 @@ public: ...@@ -110,7 +110,8 @@ public:
void Terminate() {} void Terminate() {}
// Process // Process
void Process(ImageInPointer inputPtr, ImageOutRegionType outputRegion, ImageOutPixelType * TilesResult) void Process(ImageInPointer inputPtr, ImageOutRegionType outputRegion, ImageOutPixelType * TilesResult,
int threadId=0)
{ {
// Transpose outputRegion (current tile) into input image // Transpose outputRegion (current tile) into input image
// Input (= output now) // Input (= output now)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment