From 27fe49efe15f29a1c2713a8a8f319b4c490c6b11 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Ga=C3=ABlle=20USSEGLIO?= <gaelle.usseglio@cnes.fr>
Date: Thu, 28 Feb 2019 10:02:56 +0000
Subject: [PATCH] ENH : Replace OpemMP // by itk::Multithreader into
 SARCoRegistration

---
 CMakeLists.txt                             |   8 -
 include/otbSARTilesCoregistrationFunctor.h |   6 +-
 include/otbTilesAnalysisImageFilter.h      |  29 ++-
 include/otbTilesAnalysisImageFilter.txx    | 272 ++++++++++-----------
 include/otbTilesExampleFunctor.h           |   3 +-
 5 files changed, 164 insertions(+), 154 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 3bf3a64..8b9ad5f 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -5,17 +5,9 @@ if(NOT OTB_SOURCE_DIR)
     find_package(OTB REQUIRED)
     list(APPEND CMAKE_MODULE_PATH ${OTB_CMAKE_DIR})
     include(${OTB_USE_FILE})
-    if(NOT OTB_USE_OPENMP)
-      find_package(OpenMP REQUIRED)
-      set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
-      set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
-    endif()
     include(OTBModuleExternal)
 else()
     otb_module_impl()
-    if(NOT OTB_USE_OPENMP)
-      message(FATAL_ERROR "Diapotb remote module needs OpenMP, compile OTB with OTB_USE_OPENMP=ON")
-    endif()
 endif()
 
 # Execute and Copy during installation
diff --git a/include/otbSARTilesCoregistrationFunctor.h b/include/otbSARTilesCoregistrationFunctor.h
index 9afc034..1ceceb5 100644
--- a/include/otbSARTilesCoregistrationFunctor.h
+++ b/include/otbSARTilesCoregistrationFunctor.h
@@ -285,11 +285,9 @@ public:
   /**
    * Method Process
    */
-  void Process(ImageInPointer inputPtr, ImageOutRegionType outputRegion, ImageOutPixelType * TilesResult)
+  void Process(ImageInPointer inputPtr, ImageOutRegionType outputRegion, ImageOutPixelType * TilesResult, 
+	       int threadId=0)
   {    
-    // Get current thread id
-    int threadId = omp_get_thread_num();
-
     // Chekc if the output Tile is cut 
     unsigned int lastInd = outputRegion.GetSize()[0]*outputRegion.GetSize()[1];
     if (lastInd < (m_SizeTiles*m_SizeTiles))
diff --git a/include/otbTilesAnalysisImageFilter.h b/include/otbTilesAnalysisImageFilter.h
index 38071fd..544a6ee 100644
--- a/include/otbTilesAnalysisImageFilter.h
+++ b/include/otbTilesAnalysisImageFilter.h
@@ -129,7 +129,10 @@ public:
   itkGetMacro(SizeTiles, unsigned int);
   itkGetMacro(Margin, unsigned int);
   itkGetMacro(UseMasterGeo, bool);
-    
+  itkGetMacro(NbTiles, unsigned int);
+  itkGetMacro(IndLFirstTile, unsigned int);
+  itkGetMacro(IndCFirstTile, unsigned int);
+
   // Setter/Getter for inputs (image and potentially grid) 
   /** Connect one of the operands for registration */
   void SetGridInput( const GridType* image);
@@ -183,6 +186,22 @@ protected:
    */
   GridRegionType OutputRegionToInputGridRegion(const ImageOutRegionType& outputRegion) const;
 
+  /** startIndex and stopIndex represent the indices of tiles to process in thread threadId and index for the
+      first tile */
+  virtual void ThreadedGenerateData(unsigned int startTileId, unsigned int stopTileId, 
+				    unsigned int firstTileIndexL, unsigned int firstTileIndexC,
+				    itk::ThreadIdType threadId);
+
+ /** Static function used as a "callback" by the MultiThreader.  The threading
+  * library will call this routine for each thread, which will delegate the
+  * control to ThreadedGenerateData(). */
+  static ITK_THREAD_RETURN_TYPE ThreaderCallback(void *arg);
+
+  /** Internal structure used for passing image data into the threading library */
+  struct ThreadStruct
+  {
+    Pointer Filter;
+  };
 
   /** 
    * \sa ImageToImageFilter::ThreadedGenerateData(),
@@ -229,6 +248,14 @@ protected:
 
   // Step (SLC or SAR geo) for input deformation grid
   GridSizeType m_GridStep;
+
+  // Nb Tiles for each Pipeline (each call to GenerateData)
+  unsigned int m_NbTiles;
+  unsigned int m_NbTilesCol;
+
+  // Index for the first tile of each Pipeline (each call to GenerateData)
+  unsigned int m_IndLFirstTile;
+  unsigned int m_IndCFirstTile;
 };
 
 } // End namespace otb
diff --git a/include/otbTilesAnalysisImageFilter.txx b/include/otbTilesAnalysisImageFilter.txx
index 4bcc6c4..5980b1a 100644
--- a/include/otbTilesAnalysisImageFilter.txx
+++ b/include/otbTilesAnalysisImageFilter.txx
@@ -31,7 +31,6 @@
 
 #include <cmath>
 #include <algorithm>
-#include <omp.h>
 
 namespace otb
 {
@@ -41,7 +40,8 @@ namespace otb
   template <class TImageIn, class TImageOut, class TFunction> 
   TilesAnalysisImageFilter< TImageIn, TImageOut, TFunction >::TilesAnalysisImageFilter()
     :  m_SizeTiles(50), m_Margin(7), m_UseCache(false), m_MaxMemoryCacheInMB(0), m_MaxShiftInRange(0.), 
-       m_MaxShiftInAzimut(0.), m_UseMasterGeo(false)
+       m_MaxShiftInAzimut(0.), m_UseMasterGeo(false), m_NbTiles(0), m_IndLFirstTile(0), m_IndCFirstTile(0),
+       m_NbTilesCol(0)
   {
     m_ProcessedTilesIndex.clear();
     m_ProcessedTiles.clear();
@@ -428,7 +428,7 @@ namespace otb
 
 
   /**
-   * Method ThreadedGenerateData
+   * Method GenerateData
    */
   template<class TImageIn, class TImageOut, class TFunction>
   void
@@ -439,20 +439,10 @@ namespace otb
     this->AllocateOutputs();
 
     int nbThreads = this->GetNumberOfThreads();
-    omp_set_num_threads(nbThreads);
-
-    ImageInPointer  inputPtr = const_cast< ImageInType * >( this->GetImageInput() );
 	
-    // Get Output and Input Requested region
-    ImageInRegionType inputRegion = this->GetImageInput()->GetRequestedRegion();
+    // Get Output Requested region
     ImageOutRegionType outputRegion = this->GetOutput()->GetRequestedRegion();
-
-    // Typedef for Iterators on selected region
-    typedef itk::ImageRegionIterator<ImageOutType> OutItType;
-        
-    // Size of the whole output image
-    int outImage_SizeL = this->GetOutput()->GetLargestPossibleRegion().GetSize()[1];
-    
+            
     // Define the number of tiles for processing
     int nbTiles_Lines = outputRegion.GetSize()[1]/m_SizeTiles; 
     int nbTiles_Col = outputRegion.GetSize()[0]/m_SizeTiles;
@@ -470,145 +460,147 @@ namespace otb
     // First Tile = index of output requested region
     int IndL_FirstTiles = outputRegion.GetIndex()[1];
     int IndC_FirstTiles = outputRegion.GetIndex()[0];
-
-    
+     
     ///////////////////////// Pre Processing ///////////////////////
-    bool *TilesIsIntoCache = new bool[nbTiles_Lines*nbTiles_Col];
-    if (m_UseCache)
-      {
-	///////////// Clear the map of Processed Tiles if cache used and maxMemory reached //////////////
-	// Transform maxMemory into elts
-	long unsigned int nbTilesMax = (1024*1024*m_MaxMemoryCacheInMB)/(sizeof(ImageOutPixelType)*m_SizeTiles*m_SizeTiles);
-	typedef typename std::map<int, ImageOutPixelType *>::iterator mapIt ;
-	// Erase all elts
-	if (m_ProcessedTiles.size() > nbTilesMax)
-	  {
-	    mapIt mapPT = m_ProcessedTiles.begin();
-	
-	    // Free Memory into m_ProcessedTiles 
-	    for (long unsigned int k = 0; k <m_ProcessedTiles.size(); k++)
-	      {
-		delete mapPT->second;
-		mapPT->second = 0;
-		++mapPT;
-	      }
-	    // Erase elts
-	    m_ProcessedTiles.clear();
-	    m_ProcessedTilesIndex.clear();
-	  }
-
-	///////////// Determine if some tiles are already into our cache //////////////
-	// For each Tiles
-	for (int k = 0; k < (nbTiles_Lines*nbTiles_Col); k++) 
-	  {
-	    ImageOutIndexType index_current;
-	    int id_L = k/nbTiles_Col;        // quotient of Euclidean division (integer division)
-	    int id_C = k - id_L*nbTiles_Col; // remainder of Euclidean division
-	    index_current[1] = IndL_FirstTiles + id_L*m_SizeTiles;
-	    index_current[0] = IndC_FirstTiles + id_C*m_SizeTiles;
-
-	    TilesIsIntoCache[k] = (std::find(m_ProcessedTilesIndex.begin(), m_ProcessedTilesIndex.end(), 
-					 index_current) != m_ProcessedTilesIndex.end());
-	  }
-      }
-
     // Call Function initialization
     m_FunctionOnTiles->Initialize(nbThreads);
 
+    m_NbTiles = nbTiles_Lines*nbTiles_Col;
+    m_NbTilesCol = nbTiles_Col;
+    m_IndLFirstTile = IndL_FirstTiles;
+    m_IndCFirstTile = IndC_FirstTiles;
+
     ///////////////////////// Processing ///////////////////////
-#pragma omp parallel for schedule(dynamic)
-    // For each Tiles
-    for (int k = 0; k < (nbTiles_Lines*nbTiles_Col); k++) 
-      {
-	// Transform Tiles id into index
-	ImageOutIndexType index_current;
-	int id_L = k/nbTiles_Col;        // quotient of Euclidean division (integer division)
-	int id_C = k - id_L*nbTiles_Col; // remainder of Euclidean division
-	index_current[1] = IndL_FirstTiles + id_L*m_SizeTiles;
-	index_current[0] = IndC_FirstTiles + id_C*m_SizeTiles;
+    // Set up the multithreaded processing
+    ThreadStruct str;
+    str.Filter = this;
+
+    // Setting up multithreader
+    this->GetMultiThreader()->SetNumberOfThreads(this->GetNumberOfThreads());
+    this->GetMultiThreader()->SetSingleMethod(this->ThreaderCallback, &str);
+
+    // multithread the execution
+    this->GetMultiThreader()->SingleMethodExecute();
+    
+    ///////////// Post Processing ////////////
+    // Call Function terminate
+    m_FunctionOnTiles->Terminate();
+  }
+
+
+ template<class TImageIn, class TImageOut, class TFunction>
+  void
+  TilesAnalysisImageFilter< TImageIn, TImageOut, TFunction >
+ ::ThreadedGenerateData(unsigned int startTileId, unsigned int stopTileId, 
+			unsigned int firstTileIndexL, unsigned int firstTileIndexC, 
+			itk::ThreadIdType threadId)
+ {
+   ImageInPointer  inputPtr = const_cast< ImageInType * >( this->GetImageInput() );
+
+   // Typedef for Iterators on selected region
+   typedef itk::ImageRegionIterator<ImageOutType> OutItType;
+
+   for (int k = startTileId; k < stopTileId; k++)  //<= ??????
+     {
+       // Transform Tiles id into index
+       ImageOutIndexType index_current;
+       int id_L = k/m_NbTilesCol;        // quotient of Euclidean division (integer division)
+       int id_C = k - id_L*m_NbTilesCol; // remainder of Euclidean division
+       index_current[1] = firstTileIndexL + id_L*m_SizeTiles;
+       index_current[0] = firstTileIndexC + id_C*m_SizeTiles;
 	    
-	// Construct the current tile
-	ImageOutIndexType currentOutTilesIndex;
-	ImageOutSizeType currentOutTilesSize;
+       // Construct the current tile
+       ImageOutIndexType currentOutTilesIndex;
+       ImageOutSizeType currentOutTilesSize;
 	    
-	// Create regions with index and the given size
-	// Output tiles
-	currentOutTilesIndex[0] = index_current[0];
-	currentOutTilesIndex[1] = index_current[1];
-	currentOutTilesSize[0] = m_SizeTiles;
-	currentOutTilesSize[1] = m_SizeTiles;
-	ImageOutRegionType outputCurrentRegion;
-	outputCurrentRegion.SetIndex(currentOutTilesIndex);
-	outputCurrentRegion.SetSize(currentOutTilesSize);
+       // Create regions with index and the given size
+       // Output tiles
+       currentOutTilesIndex[0] = index_current[0];
+       currentOutTilesIndex[1] = index_current[1];
+       currentOutTilesSize[0] = m_SizeTiles;
+       currentOutTilesSize[1] = m_SizeTiles;
+       ImageOutRegionType outputCurrentRegion;
+       outputCurrentRegion.SetIndex(currentOutTilesIndex);
+       outputCurrentRegion.SetSize(currentOutTilesSize);
 	
-	outputCurrentRegion.Crop(this->GetOutput()->GetLargestPossibleRegion());		
-	
-	// If Tile is not into cache or m_UseCache = false : process the current tile
-	if (!TilesIsIntoCache[k] || !m_UseCache)
-	  {
-	    ImageOutPixelType * TilesCache = new ImageOutPixelType[outputCurrentRegion.GetSize()[1]*
-								   outputCurrentRegion.GetSize()[0]];
-
-	    // Call Function Process : 
-	    // Inputs : Ptr Image In and the output tile
-	    // Output : Ptr on result array
-	    m_FunctionOnTiles->Process(inputPtr, outputCurrentRegion, TilesCache);
+       outputCurrentRegion.Crop(this->GetOutput()->GetLargestPossibleRegion());		
+
+       // Process the current tile
+       ImageOutPixelType * TilesCache = new ImageOutPixelType[outputCurrentRegion.GetSize()[1]*
+							      outputCurrentRegion.GetSize()[0]];
+
+       // Call Function Process : 
+       // Inputs : Ptr Image In and the output tile
+       // Output : Ptr on result array
+       m_FunctionOnTiles->Process(inputPtr, outputCurrentRegion, TilesCache, threadId);
+
+       int compteur = 0;
+
+       // Assignate Output with function result
+       // Iterators on output
+       OutItType OutIt(this->GetOutput(), outputCurrentRegion);
+       OutIt.GoToBegin();
+       while (!OutIt.IsAtEnd())
+	 {
+	   OutIt.Set(TilesCache[compteur]);
+	   ++compteur; 
+	   ++OutIt;
+	 }
+
+	   
+       delete [] TilesCache;
+       TilesCache = 0;
+	   
+     }
+ }
+
+
+  // Callback routine used by the threading library. This routine just calls
+  // the ThreadedGenerateData method after setting the correct region for this
+  // thread.
+  template<class TImageIn, class TImageOut, class TFunction>
+  ITK_THREAD_RETURN_TYPE
+  TilesAnalysisImageFilter< TImageIn, TImageOut, TFunction >
+  ::ThreaderCallback(void *arg)
+  {
+    ThreadStruct *str;
+    int           threadId, threadCount;
+    unsigned int  total, start, stop, indLFirstTile, indCFirstTile;
 
-	    int compteur = 0;
+    threadId = ((itk::MultiThreader::ThreadInfoStruct *) (arg))->ThreadID;
+    threadCount = ((itk::MultiThreader::ThreadInfoStruct *) (arg))->NumberOfThreads;
+    str = (ThreadStruct *) (((itk::MultiThreader::ThreadInfoStruct *) (arg))->UserData);
 
-	    // Assignate Output with function result
-	    // Iterators on output
-	    OutItType OutIt(this->GetOutput(), outputCurrentRegion);
-	    OutIt.GoToBegin();
-	    while (!OutIt.IsAtEnd())
-	      {
-		OutIt.Set(TilesCache[compteur]);
-		++compteur; 
-		++OutIt;
-	      }
+    total = str->Filter->GetNbTiles();
+    indLFirstTile = str->Filter->GetIndLFirstTile();
+    indCFirstTile = str->Filter->GetIndCFirstTile();
 
-	    if (m_UseCache)
-	      {
-		// Store elts into our two caches : m_ProcessedTiles and m_ProcessedTilesIndex (monothread)
-#pragma omp critical
-		{
-		  int ind = index_current[1] * outImage_SizeL + index_current[0];
-		  m_ProcessedTiles[ind] = TilesCache;
-		  m_ProcessedTilesIndex.push_back(index_current);
-		}
-	      }
-	    else
-	      {
-		delete [] TilesCache;
-		TilesCache = 0;
-	      }
-	  }
-	// Else use the cache
-	else
+    if (threadId < static_cast<int>(total))
+      {
+	start =
+	  static_cast<unsigned int>(threadId * 
+				    std::ceil(static_cast<double>(total)/(static_cast<double>(threadCount))));
+	stop =
+	  static_cast<unsigned int>((threadId+1) * 
+				    std::ceil(static_cast<double>(total)/(static_cast<double>(threadCount))));
+
+	if (stop > total) stop = total;
+
+	// For very small graphs it might occur that start = stop. In this
+	// case the vertex at that index will be processed in the next strip.
+	if (start != stop)
 	  {
-	    OutItType OutIt(this->GetOutput(), outputCurrentRegion);
-	    OutIt.GoToBegin();
-		
-	    int ind = index_current[1] * outImage_SizeL + index_current[0];
-	    ImageOutPixelType * TilesCache = m_ProcessedTiles[ind];
-
-	    int compteur = 0;
-
-	    while (!OutIt.IsAtEnd())
-	      {
-		OutIt.Set(TilesCache[compteur]);
-		   		   
-		++compteur; 
-		++OutIt;
-	      }
+	    str->Filter->ThreadedGenerateData(start, stop, indLFirstTile, indCFirstTile, threadId);
 	  }
       }
-
-    delete [] TilesIsIntoCache;
-
-    ///////////// Post Processing ////////////
-    // Call Function terminate
-    m_FunctionOnTiles->Terminate();
+    // else
+    //   {
+    //   otherwise don't use this thread. Sometimes the threads don't
+    //   break up very well and it is just as efficient to leave a
+    //   few threads idle.
+    //   }
+
+    return ITK_THREAD_RETURN_VALUE;
   }
 
 
diff --git a/include/otbTilesExampleFunctor.h b/include/otbTilesExampleFunctor.h
index c138ac5..4c9f7fe 100644
--- a/include/otbTilesExampleFunctor.h
+++ b/include/otbTilesExampleFunctor.h
@@ -110,7 +110,8 @@ public:
   void Terminate() {}
 
   // Process
-  void Process(ImageInPointer inputPtr, ImageOutRegionType outputRegion, ImageOutPixelType * TilesResult)
+  void Process(ImageInPointer inputPtr, ImageOutRegionType outputRegion, ImageOutPixelType * TilesResult, 
+	       int threadId=0)
   {
     // Transpose outputRegion (current tile) into input image
     // Input (= output now)
-- 
GitLab