From 27fe49efe15f29a1c2713a8a8f319b4c490c6b11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ga=C3=ABlle=20USSEGLIO?= <gaelle.usseglio@cnes.fr> Date: Thu, 28 Feb 2019 10:02:56 +0000 Subject: [PATCH] ENH : Replace OpemMP // by itk::Multithreader into SARCoRegistration --- CMakeLists.txt | 8 - include/otbSARTilesCoregistrationFunctor.h | 6 +- include/otbTilesAnalysisImageFilter.h | 29 ++- include/otbTilesAnalysisImageFilter.txx | 272 ++++++++++----------- include/otbTilesExampleFunctor.h | 3 +- 5 files changed, 164 insertions(+), 154 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3bf3a64..8b9ad5f 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -5,17 +5,9 @@ if(NOT OTB_SOURCE_DIR) find_package(OTB REQUIRED) list(APPEND CMAKE_MODULE_PATH ${OTB_CMAKE_DIR}) include(${OTB_USE_FILE}) - if(NOT OTB_USE_OPENMP) - find_package(OpenMP REQUIRED) - set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}") - set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}") - endif() include(OTBModuleExternal) else() otb_module_impl() - if(NOT OTB_USE_OPENMP) - message(FATAL_ERROR "Diapotb remote module needs OpenMP, compile OTB with OTB_USE_OPENMP=ON") - endif() endif() # Execute and Copy during installation diff --git a/include/otbSARTilesCoregistrationFunctor.h b/include/otbSARTilesCoregistrationFunctor.h index 9afc034..1ceceb5 100644 --- a/include/otbSARTilesCoregistrationFunctor.h +++ b/include/otbSARTilesCoregistrationFunctor.h @@ -285,11 +285,9 @@ public: /** * Method Process */ - void Process(ImageInPointer inputPtr, ImageOutRegionType outputRegion, ImageOutPixelType * TilesResult) + void Process(ImageInPointer inputPtr, ImageOutRegionType outputRegion, ImageOutPixelType * TilesResult, + int threadId=0) { - // Get current thread id - int threadId = omp_get_thread_num(); - // Chekc if the output Tile is cut unsigned int lastInd = outputRegion.GetSize()[0]*outputRegion.GetSize()[1]; if (lastInd < (m_SizeTiles*m_SizeTiles)) diff --git a/include/otbTilesAnalysisImageFilter.h b/include/otbTilesAnalysisImageFilter.h index 38071fd..544a6ee 100644 --- a/include/otbTilesAnalysisImageFilter.h +++ b/include/otbTilesAnalysisImageFilter.h @@ -129,7 +129,10 @@ public: itkGetMacro(SizeTiles, unsigned int); itkGetMacro(Margin, unsigned int); itkGetMacro(UseMasterGeo, bool); - + itkGetMacro(NbTiles, unsigned int); + itkGetMacro(IndLFirstTile, unsigned int); + itkGetMacro(IndCFirstTile, unsigned int); + // Setter/Getter for inputs (image and potentially grid) /** Connect one of the operands for registration */ void SetGridInput( const GridType* image); @@ -183,6 +186,22 @@ protected: */ GridRegionType OutputRegionToInputGridRegion(const ImageOutRegionType& outputRegion) const; + /** startIndex and stopIndex represent the indices of tiles to process in thread threadId and index for the + first tile */ + virtual void ThreadedGenerateData(unsigned int startTileId, unsigned int stopTileId, + unsigned int firstTileIndexL, unsigned int firstTileIndexC, + itk::ThreadIdType threadId); + + /** Static function used as a "callback" by the MultiThreader. The threading + * library will call this routine for each thread, which will delegate the + * control to ThreadedGenerateData(). */ + static ITK_THREAD_RETURN_TYPE ThreaderCallback(void *arg); + + /** Internal structure used for passing image data into the threading library */ + struct ThreadStruct + { + Pointer Filter; + }; /** * \sa ImageToImageFilter::ThreadedGenerateData(), @@ -229,6 +248,14 @@ protected: // Step (SLC or SAR geo) for input deformation grid GridSizeType m_GridStep; + + // Nb Tiles for each Pipeline (each call to GenerateData) + unsigned int m_NbTiles; + unsigned int m_NbTilesCol; + + // Index for the first tile of each Pipeline (each call to GenerateData) + unsigned int m_IndLFirstTile; + unsigned int m_IndCFirstTile; }; } // End namespace otb diff --git a/include/otbTilesAnalysisImageFilter.txx b/include/otbTilesAnalysisImageFilter.txx index 4bcc6c4..5980b1a 100644 --- a/include/otbTilesAnalysisImageFilter.txx +++ b/include/otbTilesAnalysisImageFilter.txx @@ -31,7 +31,6 @@ #include <cmath> #include <algorithm> -#include <omp.h> namespace otb { @@ -41,7 +40,8 @@ namespace otb template <class TImageIn, class TImageOut, class TFunction> TilesAnalysisImageFilter< TImageIn, TImageOut, TFunction >::TilesAnalysisImageFilter() : m_SizeTiles(50), m_Margin(7), m_UseCache(false), m_MaxMemoryCacheInMB(0), m_MaxShiftInRange(0.), - m_MaxShiftInAzimut(0.), m_UseMasterGeo(false) + m_MaxShiftInAzimut(0.), m_UseMasterGeo(false), m_NbTiles(0), m_IndLFirstTile(0), m_IndCFirstTile(0), + m_NbTilesCol(0) { m_ProcessedTilesIndex.clear(); m_ProcessedTiles.clear(); @@ -428,7 +428,7 @@ namespace otb /** - * Method ThreadedGenerateData + * Method GenerateData */ template<class TImageIn, class TImageOut, class TFunction> void @@ -439,20 +439,10 @@ namespace otb this->AllocateOutputs(); int nbThreads = this->GetNumberOfThreads(); - omp_set_num_threads(nbThreads); - - ImageInPointer inputPtr = const_cast< ImageInType * >( this->GetImageInput() ); - // Get Output and Input Requested region - ImageInRegionType inputRegion = this->GetImageInput()->GetRequestedRegion(); + // Get Output Requested region ImageOutRegionType outputRegion = this->GetOutput()->GetRequestedRegion(); - - // Typedef for Iterators on selected region - typedef itk::ImageRegionIterator<ImageOutType> OutItType; - - // Size of the whole output image - int outImage_SizeL = this->GetOutput()->GetLargestPossibleRegion().GetSize()[1]; - + // Define the number of tiles for processing int nbTiles_Lines = outputRegion.GetSize()[1]/m_SizeTiles; int nbTiles_Col = outputRegion.GetSize()[0]/m_SizeTiles; @@ -470,145 +460,147 @@ namespace otb // First Tile = index of output requested region int IndL_FirstTiles = outputRegion.GetIndex()[1]; int IndC_FirstTiles = outputRegion.GetIndex()[0]; - - + ///////////////////////// Pre Processing /////////////////////// - bool *TilesIsIntoCache = new bool[nbTiles_Lines*nbTiles_Col]; - if (m_UseCache) - { - ///////////// Clear the map of Processed Tiles if cache used and maxMemory reached ////////////// - // Transform maxMemory into elts - long unsigned int nbTilesMax = (1024*1024*m_MaxMemoryCacheInMB)/(sizeof(ImageOutPixelType)*m_SizeTiles*m_SizeTiles); - typedef typename std::map<int, ImageOutPixelType *>::iterator mapIt ; - // Erase all elts - if (m_ProcessedTiles.size() > nbTilesMax) - { - mapIt mapPT = m_ProcessedTiles.begin(); - - // Free Memory into m_ProcessedTiles - for (long unsigned int k = 0; k <m_ProcessedTiles.size(); k++) - { - delete mapPT->second; - mapPT->second = 0; - ++mapPT; - } - // Erase elts - m_ProcessedTiles.clear(); - m_ProcessedTilesIndex.clear(); - } - - ///////////// Determine if some tiles are already into our cache ////////////// - // For each Tiles - for (int k = 0; k < (nbTiles_Lines*nbTiles_Col); k++) - { - ImageOutIndexType index_current; - int id_L = k/nbTiles_Col; // quotient of Euclidean division (integer division) - int id_C = k - id_L*nbTiles_Col; // remainder of Euclidean division - index_current[1] = IndL_FirstTiles + id_L*m_SizeTiles; - index_current[0] = IndC_FirstTiles + id_C*m_SizeTiles; - - TilesIsIntoCache[k] = (std::find(m_ProcessedTilesIndex.begin(), m_ProcessedTilesIndex.end(), - index_current) != m_ProcessedTilesIndex.end()); - } - } - // Call Function initialization m_FunctionOnTiles->Initialize(nbThreads); + m_NbTiles = nbTiles_Lines*nbTiles_Col; + m_NbTilesCol = nbTiles_Col; + m_IndLFirstTile = IndL_FirstTiles; + m_IndCFirstTile = IndC_FirstTiles; + ///////////////////////// Processing /////////////////////// -#pragma omp parallel for schedule(dynamic) - // For each Tiles - for (int k = 0; k < (nbTiles_Lines*nbTiles_Col); k++) - { - // Transform Tiles id into index - ImageOutIndexType index_current; - int id_L = k/nbTiles_Col; // quotient of Euclidean division (integer division) - int id_C = k - id_L*nbTiles_Col; // remainder of Euclidean division - index_current[1] = IndL_FirstTiles + id_L*m_SizeTiles; - index_current[0] = IndC_FirstTiles + id_C*m_SizeTiles; + // Set up the multithreaded processing + ThreadStruct str; + str.Filter = this; + + // Setting up multithreader + this->GetMultiThreader()->SetNumberOfThreads(this->GetNumberOfThreads()); + this->GetMultiThreader()->SetSingleMethod(this->ThreaderCallback, &str); + + // multithread the execution + this->GetMultiThreader()->SingleMethodExecute(); + + ///////////// Post Processing //////////// + // Call Function terminate + m_FunctionOnTiles->Terminate(); + } + + + template<class TImageIn, class TImageOut, class TFunction> + void + TilesAnalysisImageFilter< TImageIn, TImageOut, TFunction > + ::ThreadedGenerateData(unsigned int startTileId, unsigned int stopTileId, + unsigned int firstTileIndexL, unsigned int firstTileIndexC, + itk::ThreadIdType threadId) + { + ImageInPointer inputPtr = const_cast< ImageInType * >( this->GetImageInput() ); + + // Typedef for Iterators on selected region + typedef itk::ImageRegionIterator<ImageOutType> OutItType; + + for (int k = startTileId; k < stopTileId; k++) //<= ?????? + { + // Transform Tiles id into index + ImageOutIndexType index_current; + int id_L = k/m_NbTilesCol; // quotient of Euclidean division (integer division) + int id_C = k - id_L*m_NbTilesCol; // remainder of Euclidean division + index_current[1] = firstTileIndexL + id_L*m_SizeTiles; + index_current[0] = firstTileIndexC + id_C*m_SizeTiles; - // Construct the current tile - ImageOutIndexType currentOutTilesIndex; - ImageOutSizeType currentOutTilesSize; + // Construct the current tile + ImageOutIndexType currentOutTilesIndex; + ImageOutSizeType currentOutTilesSize; - // Create regions with index and the given size - // Output tiles - currentOutTilesIndex[0] = index_current[0]; - currentOutTilesIndex[1] = index_current[1]; - currentOutTilesSize[0] = m_SizeTiles; - currentOutTilesSize[1] = m_SizeTiles; - ImageOutRegionType outputCurrentRegion; - outputCurrentRegion.SetIndex(currentOutTilesIndex); - outputCurrentRegion.SetSize(currentOutTilesSize); + // Create regions with index and the given size + // Output tiles + currentOutTilesIndex[0] = index_current[0]; + currentOutTilesIndex[1] = index_current[1]; + currentOutTilesSize[0] = m_SizeTiles; + currentOutTilesSize[1] = m_SizeTiles; + ImageOutRegionType outputCurrentRegion; + outputCurrentRegion.SetIndex(currentOutTilesIndex); + outputCurrentRegion.SetSize(currentOutTilesSize); - outputCurrentRegion.Crop(this->GetOutput()->GetLargestPossibleRegion()); - - // If Tile is not into cache or m_UseCache = false : process the current tile - if (!TilesIsIntoCache[k] || !m_UseCache) - { - ImageOutPixelType * TilesCache = new ImageOutPixelType[outputCurrentRegion.GetSize()[1]* - outputCurrentRegion.GetSize()[0]]; - - // Call Function Process : - // Inputs : Ptr Image In and the output tile - // Output : Ptr on result array - m_FunctionOnTiles->Process(inputPtr, outputCurrentRegion, TilesCache); + outputCurrentRegion.Crop(this->GetOutput()->GetLargestPossibleRegion()); + + // Process the current tile + ImageOutPixelType * TilesCache = new ImageOutPixelType[outputCurrentRegion.GetSize()[1]* + outputCurrentRegion.GetSize()[0]]; + + // Call Function Process : + // Inputs : Ptr Image In and the output tile + // Output : Ptr on result array + m_FunctionOnTiles->Process(inputPtr, outputCurrentRegion, TilesCache, threadId); + + int compteur = 0; + + // Assignate Output with function result + // Iterators on output + OutItType OutIt(this->GetOutput(), outputCurrentRegion); + OutIt.GoToBegin(); + while (!OutIt.IsAtEnd()) + { + OutIt.Set(TilesCache[compteur]); + ++compteur; + ++OutIt; + } + + + delete [] TilesCache; + TilesCache = 0; + + } + } + + + // Callback routine used by the threading library. This routine just calls + // the ThreadedGenerateData method after setting the correct region for this + // thread. + template<class TImageIn, class TImageOut, class TFunction> + ITK_THREAD_RETURN_TYPE + TilesAnalysisImageFilter< TImageIn, TImageOut, TFunction > + ::ThreaderCallback(void *arg) + { + ThreadStruct *str; + int threadId, threadCount; + unsigned int total, start, stop, indLFirstTile, indCFirstTile; - int compteur = 0; + threadId = ((itk::MultiThreader::ThreadInfoStruct *) (arg))->ThreadID; + threadCount = ((itk::MultiThreader::ThreadInfoStruct *) (arg))->NumberOfThreads; + str = (ThreadStruct *) (((itk::MultiThreader::ThreadInfoStruct *) (arg))->UserData); - // Assignate Output with function result - // Iterators on output - OutItType OutIt(this->GetOutput(), outputCurrentRegion); - OutIt.GoToBegin(); - while (!OutIt.IsAtEnd()) - { - OutIt.Set(TilesCache[compteur]); - ++compteur; - ++OutIt; - } + total = str->Filter->GetNbTiles(); + indLFirstTile = str->Filter->GetIndLFirstTile(); + indCFirstTile = str->Filter->GetIndCFirstTile(); - if (m_UseCache) - { - // Store elts into our two caches : m_ProcessedTiles and m_ProcessedTilesIndex (monothread) -#pragma omp critical - { - int ind = index_current[1] * outImage_SizeL + index_current[0]; - m_ProcessedTiles[ind] = TilesCache; - m_ProcessedTilesIndex.push_back(index_current); - } - } - else - { - delete [] TilesCache; - TilesCache = 0; - } - } - // Else use the cache - else + if (threadId < static_cast<int>(total)) + { + start = + static_cast<unsigned int>(threadId * + std::ceil(static_cast<double>(total)/(static_cast<double>(threadCount)))); + stop = + static_cast<unsigned int>((threadId+1) * + std::ceil(static_cast<double>(total)/(static_cast<double>(threadCount)))); + + if (stop > total) stop = total; + + // For very small graphs it might occur that start = stop. In this + // case the vertex at that index will be processed in the next strip. + if (start != stop) { - OutItType OutIt(this->GetOutput(), outputCurrentRegion); - OutIt.GoToBegin(); - - int ind = index_current[1] * outImage_SizeL + index_current[0]; - ImageOutPixelType * TilesCache = m_ProcessedTiles[ind]; - - int compteur = 0; - - while (!OutIt.IsAtEnd()) - { - OutIt.Set(TilesCache[compteur]); - - ++compteur; - ++OutIt; - } + str->Filter->ThreadedGenerateData(start, stop, indLFirstTile, indCFirstTile, threadId); } } - - delete [] TilesIsIntoCache; - - ///////////// Post Processing //////////// - // Call Function terminate - m_FunctionOnTiles->Terminate(); + // else + // { + // otherwise don't use this thread. Sometimes the threads don't + // break up very well and it is just as efficient to leave a + // few threads idle. + // } + + return ITK_THREAD_RETURN_VALUE; } diff --git a/include/otbTilesExampleFunctor.h b/include/otbTilesExampleFunctor.h index c138ac5..4c9f7fe 100644 --- a/include/otbTilesExampleFunctor.h +++ b/include/otbTilesExampleFunctor.h @@ -110,7 +110,8 @@ public: void Terminate() {} // Process - void Process(ImageInPointer inputPtr, ImageOutRegionType outputRegion, ImageOutPixelType * TilesResult) + void Process(ImageInPointer inputPtr, ImageOutRegionType outputRegion, ImageOutPixelType * TilesResult, + int threadId=0) { // Transpose outputRegion (current tile) into input image // Input (= output now) -- GitLab