From c31a797fedb537605c8fb392fb7098a0fb66ac4b Mon Sep 17 00:00:00 2001
From: Julien Malik <julien.malik@c-s.fr>
Date: Mon, 14 Feb 2011 13:49:38 +0100
Subject: [PATCH] ADD: add a GPU implementation of
 StreamingStatisticsVectorImageFilter based on CuBLAS

---
 CMakeLists.txt                                |  13 +-
 Code/BasicFilters/CMakeLists.txt              |   1 +
 Code/BasicFilters/GPUCuda/CMakeLists.txt      |  13 +
 Code/BasicFilters/GPUCuda/foo.cxx             |   0
 ...blasStreamingStatisticsVectorImageFilter.h | 283 +++++++++++++++++
 ...asStreamingStatisticsVectorImageFilter.txx | 296 ++++++++++++++++++
 Code/CMakeLists.txt                           |   1 +
 Testing/CMakeLists.txt                        |  68 ++--
 ...asStreamingStatisticsVectorImageFilter.cxx |  64 ++++
 Testing/otbHyperCublasTests1.cxx              |  29 ++
 10 files changed, 746 insertions(+), 22 deletions(-)
 create mode 100644 Code/BasicFilters/GPUCuda/CMakeLists.txt
 create mode 100644 Code/BasicFilters/GPUCuda/foo.cxx
 create mode 100644 Code/BasicFilters/GPUCuda/otbCublasStreamingStatisticsVectorImageFilter.h
 create mode 100644 Code/BasicFilters/GPUCuda/otbCublasStreamingStatisticsVectorImageFilter.txx
 create mode 100644 Testing/otbCublasStreamingStatisticsVectorImageFilter.cxx
 create mode 100644 Testing/otbHyperCublasTests1.cxx

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 6689a36b95..8fac526c04 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -11,6 +11,8 @@ ENDIF(NOT EXECUTABLE_OUTPUT_PATH)
 
 INCLUDE_REGULAR_EXPRESSION("^.*$")
 
+OPTION(BUILD_SHARED_LIBS "Build OTB with shared libraries." ON)
+
 # Find OTB and load its settings.
 FIND_PACKAGE(OTB)
   IF(OTB_FOUND)
@@ -19,8 +21,16 @@ FIND_PACKAGE(OTB)
     MESSAGE(FATAL_ERROR "OTB not found.  Please set OTB_DIR")
 ENDIF(OTB_FOUND)
 
+OPTION(OTB_USE_CUBLAS "Use Cuda CUBLAS library." ON)
+IF(OTB_USE_CUBLAS)
+  FIND_PACKAGE(CUDA)
+  include_directories(${CUDA_INCLUDE_DIRS})
+ENDIF(OTB_USE_CUBLAS)
+
 INCLUDE_DIRECTORIES(
 ${Hyperspectral_SOURCE_DIR}/Code/BasicFilters
+${Hyperspectral_SOURCE_DIR}/Code/BasicFilters/GPUCuda
+${Hyperspectral_SOURCE_DIR}/Code/CudaMath
 ${Hyperspectral_SOURCE_DIR}/Code/Hyperspectral
 ${Hyperspectral_SOURCE_DIR}/Code/Vahine
 ${Hyperspectral_SOURCE_DIR}/Code/RX
@@ -31,6 +41,3 @@ ADD_SUBDIRECTORY(Application)
 
 ENABLE_TESTING()
 ADD_SUBDIRECTORY(Testing)
-
-
-
diff --git a/Code/BasicFilters/CMakeLists.txt b/Code/BasicFilters/CMakeLists.txt
index e69de29bb2..5429779534 100644
--- a/Code/BasicFilters/CMakeLists.txt
+++ b/Code/BasicFilters/CMakeLists.txt
@@ -0,0 +1 @@
+ADD_SUBDIRECTORY(GPUCuda)
diff --git a/Code/BasicFilters/GPUCuda/CMakeLists.txt b/Code/BasicFilters/GPUCuda/CMakeLists.txt
new file mode 100644
index 0000000000..0c8e46cc76
--- /dev/null
+++ b/Code/BasicFilters/GPUCuda/CMakeLists.txt
@@ -0,0 +1,13 @@
+# Sources of non-templated classes.
+FILE(GLOB OTBBasicFiltersGPUCuda_SRCS "*.cxx" )
+
+ADD_LIBRARY(OTBBasicFiltersGPUCuda ${OTBBasicFiltersGPUCuda_SRCS})
+TARGET_LINK_LIBRARIES (OTBBasicFiltersGPUCuda OTBCommon OTBBasicFilters ITKBasicFilters )
+
+IF(OTB_LIBRARY_PROPERTIES)
+  SET_TARGET_PROPERTIES(OTBBasicFiltersGPUCuda PROPERTIES ${OTB_LIBRARY_PROPERTIES})
+ENDIF(OTB_LIBRARY_PROPERTIES)
+
+IF(OTB_USE_CUBLAS)
+  CUDA_ADD_CUBLAS_TO_TARGET( OTBBasicFiltersGPUCuda )
+ENDIF(OTB_USE_CUBLAS)
diff --git a/Code/BasicFilters/GPUCuda/foo.cxx b/Code/BasicFilters/GPUCuda/foo.cxx
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/Code/BasicFilters/GPUCuda/otbCublasStreamingStatisticsVectorImageFilter.h b/Code/BasicFilters/GPUCuda/otbCublasStreamingStatisticsVectorImageFilter.h
new file mode 100644
index 0000000000..a791bf623a
--- /dev/null
+++ b/Code/BasicFilters/GPUCuda/otbCublasStreamingStatisticsVectorImageFilter.h
@@ -0,0 +1,283 @@
+/*=========================================================================
+
+  Program:   ORFEO Toolbox
+  Language:  C++
+  Date:      $Date$
+  Version:   $Revision$
+
+
+  Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
+  See OTBCopyright.txt for details.
+
+  Some parts of this code are derived from ITK. See ITKCopyright.txt
+  for details.
+
+
+     This software is distributed WITHOUT ANY WARRANTY; without even
+     the implied warranty of MERCHANT2ABILITY or FITNESS FOR A PARTICULAR
+     PURPOSE.  See the above copyright notices for more information.
+
+=========================================================================*/
+#ifndef __otbCublasStreamingStatisticsVectorImageFilter_h
+#define __otbCublasStreamingStatisticsVectorImageFilter_h
+
+#include "otbPersistentImageFilter.h"
+#include "otbPersistentFilterStreamingDecorator.h"
+#include "itkNumericTraits.h"
+#include "itkArray.h"
+#include "itkSimpleDataObjectDecorator.h"
+#include "itkImageRegionSplitter.h"
+#include "itkVariableSizeMatrix.h"
+#include "itkVariableLengthVector.h"
+
+#include "cublas.h"
+
+namespace otb
+{
+
+/** \class PersistentCublasStreamingStatisticsVectorImageFilter
+ * \brief Compute mean, covariance & correlation of a large image using streaming
+ *
+ *  This filter persists its temporary data. It means that if you Update it n times on n different
+ * requested regions, the output statistics will be the statitics of the whole set of n regions.
+ *
+ * To reset the temporary data, one should call the Reset() function.
+ *
+ * To get the statistics once the regions have been processed via the pipeline, use the Synthetize() method.
+ *
+ * \sa PersistentImageFilter
+ * \ingroup Streamed
+ * \ingroup Multithreaded
+ * \ingroup MathematicalStatisticsImageFilters
+ *
+ */
+template<class TInputImage>
+class ITK_EXPORT PersistentCublasStreamingStatisticsVectorImageFilter :
+  public PersistentImageFilter<TInputImage, TInputImage>
+{
+public:
+  /** Standard Self typedef */
+  typedef PersistentCublasStreamingStatisticsVectorImageFilter           Self;
+  typedef PersistentImageFilter<TInputImage, TInputImage> Superclass;
+  typedef itk::SmartPointer<Self>                         Pointer;
+  typedef itk::SmartPointer<const Self>                   ConstPointer;
+
+  /** Method for creation through the object factory. */
+  itkNewMacro(Self);
+
+  /** Runtime information support. */
+  itkTypeMacro(PersistentCublasStreamingStatisticsVectorImageFilter, PersistentImageFilter);
+
+  /** Image related typedefs. */
+  typedef TInputImage                             ImageType;
+  typedef typename TInputImage::Pointer           InputImagePointer;
+  typedef typename TInputImage::RegionType        RegionType;
+  typedef typename TInputImage::SizeType          SizeType;
+  typedef typename TInputImage::IndexType         IndexType;
+  typedef typename TInputImage::PixelType         PixelType;
+  typedef typename TInputImage::InternalPixelType InternalPixelType;
+
+  itkStaticConstMacro(InputImageDimension, unsigned int, TInputImage::ImageDimension);
+
+  /** Image related typedefs. */
+  itkStaticConstMacro(ImageDimension, unsigned int, TInputImage::ImageDimension);
+
+  /** Type to use for computations. */
+  //typedef typename itk::NumericTraits<InternalPixelType>::RealType RealType;
+  typedef float RealType;
+  typedef itk::VariableLengthVector<RealType>                      RealPixelType;
+
+  /** Smart Pointer type to a DataObject. */
+  typedef typename itk::DataObject::Pointer DataObjectPointer;
+
+  /** Type of DataObjects used for scalar outputs */
+  typedef itk::VariableSizeMatrix<RealType>    MatrixType;
+  typedef std::vector<MatrixType>              ArrayMatrixType;
+  typedef itk::Array<long>                     ArrayLongPixelType;
+  typedef std::vector<RealPixelType>           ArrayRealPixelType;
+  typedef std::vector<PixelType>               ArrayPixelType;
+  typedef itk::SimpleDataObjectDecorator<RealPixelType> RealPixelObjectType;
+  typedef itk::SimpleDataObjectDecorator<PixelType>     PixelObjectType;
+  typedef itk::SimpleDataObjectDecorator<MatrixType>    MatrixObjectType;
+
+  /** Return the computed Mean. */
+  RealPixelType GetMean() const
+  {
+    return this->GetMeanOutput()->Get();
+  }
+  RealPixelObjectType* GetMeanOutput();
+  const RealPixelObjectType* GetMeanOutput() const;
+
+  /** Return the computed Covariance. */
+  MatrixType GetCovariance() const
+  {
+    return this->GetCovarianceOutput()->Get();
+  }
+  MatrixObjectType* GetCovarianceOutput();
+  const MatrixObjectType* GetCovarianceOutput() const;
+
+  /** Return the computed Covariance. */
+  MatrixType GetCorrelation() const
+  {
+    return this->GetCorrelation()->Get();
+  }
+  MatrixObjectType* GetCorrelationOutput();
+  const MatrixObjectType* GetCorrelationOutput() const;
+
+  /** Make a DataObject of the correct type to be used as the specified
+   * output.
+   */
+  virtual DataObjectPointer MakeOutput(unsigned int idx);
+
+  /** Pass the input through unmodified. Do this by Grafting in the
+   *  AllocateOutputs method.
+   */
+  virtual void AllocateOutputs();
+  virtual void GenerateOutputInformation();
+  virtual void Synthetize(void);
+  virtual void Reset(void);
+
+protected:
+  PersistentCublasStreamingStatisticsVectorImageFilter();
+  virtual ~PersistentCublasStreamingStatisticsVectorImageFilter();
+
+  virtual void PrintSelf(std::ostream& os, itk::Indent indent) const;
+  /** Multi-thread version GenerateData. */
+  //void  ThreadedGenerateData(const RegionType& outputRegionForThread, int threadId);
+  void  GenerateData();
+
+private:
+  PersistentCublasStreamingStatisticsVectorImageFilter(const Self &); //purposely not implemented
+  void operator =(const Self&); //purposely not implemented
+
+  RealPixelType m_FirstOrderAccumulator;
+  MatrixType    m_SecondOrderAccumulator;
+
+  float*   m_GPUImage;
+  SizeType m_GPUImageSize;
+  float*   m_GPUSecondOrderAccumulator;
+
+}; // end of class PersistentCublasStreamingStatisticsVectorImageFilter
+
+
+/** \class CublasStreamingStatisticsVectorImageFilter
+ * \brief This class streams the whole input image through the PersistentCublasStreamingStatisticsVectorImageFilter.
+ *
+ * This way, it allows to compute the first and second order global statistics of this image. It calls the
+ * Reset() method of the PersistentCublasStreamingStatisticsVectorImageFilter before streaming the image and the
+ * Synthetize() method of the PersistentCublasStreamingStatisticsVectorImageFilter after having streamed the image
+ * to compute the statistics. The accessor on the results are wrapping the accessors of the
+ * internal PersistentCublasStreamingStatisticsVectorImageFilter.
+ *
+ * \sa PersistentCublasStreamingStatisticsVectorImageFilter
+ * \sa PersistentImageFilter
+ * \sa PersistentFilterStreamingDecorator
+ * \sa StreamingImageVirtualWriter
+ * \ingroup Streamed
+ * \ingroup Multithreaded
+ * \ingroup MathematicalStatisticsImageFilters
+ */
+
+template<class TInputImage>
+class ITK_EXPORT CublasStreamingStatisticsVectorImageFilter :
+  public PersistentFilterStreamingDecorator<PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage> >
+{
+public:
+  /** Standard Self typedef */
+  typedef CublasStreamingStatisticsVectorImageFilter Self;
+  typedef PersistentFilterStreamingDecorator
+  <PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage> > Superclass;
+  typedef itk::SmartPointer<Self>       Pointer;
+  typedef itk::SmartPointer<const Self> ConstPointer;
+
+  /** Type macro */
+  itkNewMacro(Self);
+
+  /** Creation through object factory macro */
+  itkTypeMacro(CublasStreamingStatisticsVectorImageFilter, PersistentFilterStreamingDecorator);
+
+  typedef TInputImage                                 InputImageType;
+  typedef typename Superclass::FilterType             StatFilterType;
+  typedef typename StatFilterType::PixelType          PixelType;
+  typedef typename StatFilterType::RealType           RealType;
+  typedef typename StatFilterType::RealPixelType      RealPixelType;
+  typedef typename StatFilterType::MatrixType         MatrixType;
+  typedef typename StatFilterType::ArrayMatrixType    ArrayMatrixType;
+  typedef typename StatFilterType::ArrayLongPixelType ArrayLongPixelType;
+  typedef typename StatFilterType::ArrayRealPixelType ArrayRealPixelType;
+  typedef typename StatFilterType::ArrayPixelType     ArrayPixelType;
+
+  /** Type of DataObjects used for scalar outputs */
+  typedef typename StatFilterType::RealPixelObjectType RealPixelObjectType;
+  typedef typename StatFilterType::PixelObjectType     PixelObjectType;
+  typedef typename StatFilterType::MatrixObjectType    MatrixObjectType;
+
+  void SetInput(InputImageType * input)
+  {
+    this->GetFilter()->SetInput(input);
+  }
+  const InputImageType * GetInput()
+  {
+    return this->GetFilter()->GetInput();
+  }
+
+  /** Return the computed Mean. */
+  RealPixelType GetMean() const
+  {
+    return this->GetFilter()->GetMeanOutput()->Get();
+  }
+  RealPixelObjectType* GetMeanOutput()
+  {
+    return this->GetFilter()->GetMeanOutput();
+  }
+  const RealPixelObjectType* GetMeanOutput() const
+  {
+    return this->GetFilter()->GetMeanOutput();
+  }
+
+  /** Return the computed Covariance. */
+  MatrixType GetCovariance() const
+  {
+    return this->GetFilter()->GetCovarianceOutput()->Get();
+  }
+  MatrixObjectType* GetCovarianceOutput()
+  {
+    return this->GetFilter()->GetCovarianceOutput();
+  }
+  const MatrixObjectType* GetCovarianceOutput() const
+  {
+    return this->GetFilter()->GetCovarianceOutput();
+  }
+
+  /** Return the computed Covariance. */
+  MatrixType GetCorrelation() const
+  {
+    return this->GetFilter()->GetCorrelationOutput()->Get();
+  }
+  MatrixObjectType* GetCorrelationOutput()
+  {
+    return this->GetFilter()->GetCorrelationOutput();
+  }
+  const MatrixObjectType* GetCorrelationOutput() const
+  {
+    return this->GetFilter()->GetCorrelationOutput();
+  }
+
+protected:
+  /** Constructor */
+  CublasStreamingStatisticsVectorImageFilter() {};
+  /** Destructor */
+  virtual ~CublasStreamingStatisticsVectorImageFilter() {}
+
+private:
+  CublasStreamingStatisticsVectorImageFilter(const Self &); //purposely not implemented
+  void operator =(const Self&); //purposely not implemented
+};
+
+} // end namespace otb
+
+#ifndef OTB_MANUAL_INSTANTIATION
+#include "otbCublasStreamingStatisticsVectorImageFilter.txx"
+#endif
+
+#endif
diff --git a/Code/BasicFilters/GPUCuda/otbCublasStreamingStatisticsVectorImageFilter.txx b/Code/BasicFilters/GPUCuda/otbCublasStreamingStatisticsVectorImageFilter.txx
new file mode 100644
index 0000000000..4e1d82a7a5
--- /dev/null
+++ b/Code/BasicFilters/GPUCuda/otbCublasStreamingStatisticsVectorImageFilter.txx
@@ -0,0 +1,296 @@
+/*=========================================================================
+
+ Program:   ORFEO Toolbox
+ Language:  C++
+ Date:      $Date$
+ Version:   $Revision$
+
+
+ Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
+ See OTBCopyright.txt for details.
+
+ Some parts of this code are derived from ITK. See ITKCopyright.txt
+ for details.
+
+
+ This software is distributed WITHOUT ANY WARRANTY; without even
+ the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+ PURPOSE.  See the above copyright notices for more information.
+
+ =========================================================================*/
+#ifndef __otbCublasStreamingStatisticsVectorImageFilter_txx
+#define __otbCublasStreamingStatisticsVectorImageFilter_txx
+
+#include "otbCublasStreamingStatisticsVectorImageFilter.h"
+
+#include "itkImageRegionIterator.h"
+#include "itkImageRegionConstIteratorWithIndex.h"
+#include "itkNumericTraits.h"
+#include "itkProgressReporter.h"
+#include "otbMacro.h"
+
+namespace otb
+{
+
+template<class TInputImage>
+PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::PersistentCublasStreamingStatisticsVectorImageFilter()
+{
+  // first output is a copy of the image, DataObject created by
+  // superclass
+  //
+  // allocate the data objects for the outputs which are
+  // just decorators around vector/matric types
+
+  this->itk::ProcessObject::SetNthOutput(1, this->MakeOutput(1).GetPointer());
+  this->itk::ProcessObject::SetNthOutput(2, this->MakeOutput(2).GetPointer());
+  this->itk::ProcessObject::SetNthOutput(3, this->MakeOutput(3).GetPointer());
+
+  cublasStatus status = cublasInit();
+  if (status != CUBLAS_STATUS_SUCCESS)
+    {
+    printf("cublasInit failed");
+    }
+
+  m_GPUImage = 0;
+  m_GPUImageSize.Fill(0);
+  m_GPUSecondOrderAccumulator = 0;
+}
+
+template<class TInputImage>
+PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::~PersistentCublasStreamingStatisticsVectorImageFilter()
+{
+  cublasStatus status = cublasShutdown();
+  if (status != CUBLAS_STATUS_SUCCESS)
+    {
+    printf("cublasShutdown failed");
+    }
+}
+
+template<class TInputImage>
+itk::DataObject::Pointer PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::MakeOutput(
+                                                                                                       unsigned int output)
+{
+  switch (output)
+    {
+    case 0:
+      return static_cast<itk::DataObject*> (TInputImage::New().GetPointer());
+      break;
+    case 1:
+      return static_cast<itk::DataObject*> (RealPixelObjectType::New().GetPointer());
+      break;
+    case 2:
+    case 3:
+      return static_cast<itk::DataObject*> (MatrixObjectType::New().GetPointer());
+      break;
+    default:
+      // might as well make an image
+      return static_cast<itk::DataObject*> (TInputImage::New().GetPointer());
+      break;
+    }
+}
+
+template<class TInputImage>
+typename PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::RealPixelObjectType*
+PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::GetMeanOutput()
+{
+  return static_cast<RealPixelObjectType*> (this->itk::ProcessObject::GetOutput(1));
+}
+
+template<class TInputImage>
+const typename PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::RealPixelObjectType*
+PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::GetMeanOutput() const
+{
+  return static_cast<const RealPixelObjectType*> (this->itk::ProcessObject::GetOutput(1));
+}
+
+template<class TInputImage>
+typename PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::MatrixObjectType*
+PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::GetCorrelationOutput()
+{
+  return static_cast<MatrixObjectType*> (this->itk::ProcessObject::GetOutput(2));
+}
+
+template<class TInputImage>
+const typename PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::MatrixObjectType*
+PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::GetCorrelationOutput() const
+{
+  return static_cast<const MatrixObjectType*> (this->itk::ProcessObject::GetOutput(2));
+}
+
+template<class TInputImage>
+typename PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::MatrixObjectType*
+PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::GetCovarianceOutput()
+{
+  return static_cast<MatrixObjectType*> (this->itk::ProcessObject::GetOutput(3));
+}
+
+template<class TInputImage>
+const typename PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::MatrixObjectType*
+PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::GetCovarianceOutput() const
+{
+  return static_cast<const MatrixObjectType*> (this->itk::ProcessObject::GetOutput(3));
+}
+
+template<class TInputImage>
+void PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::GenerateOutputInformation()
+{
+  Superclass::GenerateOutputInformation();
+  if (this->GetInput())
+    {
+    this->GetOutput()->CopyInformation(this->GetInput());
+    this->GetOutput()->SetLargestPossibleRegion(this->GetInput()->GetLargestPossibleRegion());
+
+    if (this->GetOutput()->GetRequestedRegion().GetNumberOfPixels() == 0)
+      {
+      this->GetOutput()->SetRequestedRegion(this->GetOutput()->GetLargestPossibleRegion());
+      }
+    }
+}
+
+template<class TInputImage>
+void PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::AllocateOutputs()
+{
+  // This is commented to prevent the streaming of the whole image for the first stream strip
+  // It shall not cause any problem because the output image of this filter is not intended to be used.
+  //InputImagePointer image = const_cast< TInputImage * >( this->GetInput() );
+  //this->GraftOutput( image );
+  // Nothing that needs to be allocated for the remaining outputs
+}
+
+template<class TInputImage>
+void PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::Reset()
+{
+  TInputImage * inputPtr = const_cast<TInputImage *> (this->GetInput());
+  inputPtr->UpdateOutputInformation();
+
+  unsigned int numberOfComponent = inputPtr->GetNumberOfComponentsPerPixel();
+
+  RealPixelType zeroRealPixel;
+  zeroRealPixel.SetSize(numberOfComponent);
+  zeroRealPixel.Fill(itk::NumericTraits<RealType>::Zero);
+  this->GetMeanOutput()->Set(zeroRealPixel);
+
+  MatrixType zeroMatrix;
+  zeroMatrix.SetSize(numberOfComponent, numberOfComponent);
+  zeroMatrix.Fill(itk::NumericTraits<RealType>::Zero);
+  this->GetCovarianceOutput()->Set(zeroMatrix);
+  this->GetCorrelationOutput()->Set(zeroMatrix);
+
+  m_FirstOrderAccumulator = zeroRealPixel;
+  m_SecondOrderAccumulator = zeroMatrix;
+
+  if (m_GPUSecondOrderAccumulator == 0)
+    {
+    otbMsgDevMacro( << "Allocating " << numberOfComponent << " * " << numberOfComponent << " matrix on GPU (" <<
+        numberOfComponent * numberOfComponent * sizeof(float) / 1024 / 1024 << " Mo)" );
+
+    cublasStatus status;
+    status = cublasAlloc(numberOfComponent * numberOfComponent, sizeof(float), (void**) &m_GPUSecondOrderAccumulator);
+
+    // TODO : check status, throw exception on error & clean up GPU memory
+    if (status != CUBLAS_STATUS_SUCCESS)
+      {
+      otbMsgDevMacro( "cublasAlloc m_GPUSecondOrderAccumulator failed" );
+      }
+
+    status = cublasSetMatrix(numberOfComponent, numberOfComponent, sizeof(float),
+                             m_SecondOrderAccumulator.GetVnlMatrix().data_block(), numberOfComponent,
+                             m_GPUSecondOrderAccumulator, numberOfComponent);
+
+    // TODO : check status, throw exception on error & clean up GPU memory
+    if (status != CUBLAS_STATUS_SUCCESS)
+      {
+      otbMsgDevMacro( "cublasSetMatrix m_GPUSecondOrderAccumulator failed" );
+      }
+
+    }
+
+  cublasFree(m_GPUImage);
+  cublasFree(m_GPUSecondOrderAccumulator);
+}
+
+template<class TInputImage>
+void PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::Synthetize()
+{
+  TInputImage * inputPtr = const_cast<TInputImage *> (this->GetInput());
+  const unsigned int numberOfComponent = inputPtr->GetNumberOfComponentsPerPixel();
+  const unsigned int nbPixels = inputPtr->GetBufferedRegion().GetNumberOfPixels();
+
+  MatrixType cpuSecondOrderAccumulator(numberOfComponent, numberOfComponent);
+  cublasStatus status;
+  status = cublasGetMatrix(numberOfComponent, numberOfComponent, sizeof(float), m_GPUSecondOrderAccumulator,
+                           numberOfComponent, cpuSecondOrderAccumulator.GetVnlMatrix().data_block(), numberOfComponent);
+  // TODO : check status, throw exception on error & clean up GPU memory
+  if (status != CUBLAS_STATUS_SUCCESS)
+    {
+    otbMsgDevMacro( "cublasGetMatrix m_GPUSecondOrderAccumulator failed");
+    }
+
+  MatrixType cor = cpuSecondOrderAccumulator / nbPixels;
+  this->GetCorrelationOutput()->Set(cor);
+
+}
+
+template<class TInputImage>
+void PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::GenerateData()
+{
+  // Support progress methods/callbacks
+  //  itk::ProgressReporter progress(this, threadId, outputRegionForThread.GetNumberOfPixels());
+
+  // Grab the input
+  InputImagePointer inputPtr = const_cast<TInputImage *> (this->GetInput());
+  const unsigned int nbPixels = inputPtr->GetBufferedRegion().GetNumberOfPixels();
+  otbMsgDevMacro( "buffered region : " << inputPtr->GetBufferedRegion() << " (" << nbPixels << " pixels)" );
+  const unsigned int numberOfComponent = inputPtr->GetNumberOfComponentsPerPixel();
+
+  cublasStatus status;
+
+  if (inputPtr->GetBufferedRegion().GetSize() != m_GPUImageSize)
+    {
+    otbMsgDevMacro( "Allocating " << nbPixels << " pixels on GPU (" << numberOfComponent * nbPixels * sizeof(float) / 1024
+        / 1024 << " Mo)" );
+
+
+    status = cublasAlloc(numberOfComponent * nbPixels, sizeof(float), (void**) &m_GPUImage);
+
+    // TODO : check status, throw exception on error & clean up GPU memory
+    if (status != CUBLAS_STATUS_SUCCESS)
+      {
+      otbMsgDevMacro( "cublasAlloc m_GPUImage failed" );
+      }
+
+    m_GPUImageSize = inputPtr->GetBufferedRegion().GetSize();
+
+    status = cublasSetMatrix(numberOfComponent, inputPtr->GetBufferedRegion().GetNumberOfPixels(), sizeof(float),
+                             inputPtr->GetBufferPointer(), numberOfComponent, m_GPUImage, numberOfComponent);
+    if (status != CUBLAS_STATUS_SUCCESS)
+      {
+      otbMsgDevMacro( "cublasSetMatrix m_GPUImage failed" );
+      }
+
+    }
+
+  cublasSsyrk('u', 'n', numberOfComponent, inputPtr->GetBufferedRegion().GetNumberOfPixels(), 1.0f, m_GPUImage,
+              numberOfComponent, 1, m_GPUSecondOrderAccumulator, numberOfComponent);
+
+  status = cublasGetError();
+  // TODO : check status, throw exception on error & clean up GPU memory
+  if (status != CUBLAS_STATUS_SUCCESS)
+    {
+    otbMsgDevMacro( "cublasSsyrk failed" );
+    }
+
+}
+
+template<class TImage>
+void PersistentCublasStreamingStatisticsVectorImageFilter<TImage>::PrintSelf(std::ostream& os, itk::Indent indent) const
+{
+  Superclass::PrintSelf(os, indent);
+
+  //os << indent << "Mean: "        << this->GetMeanOutput()->Get()        << std::endl;
+  //os << indent << "Covariance: "  << this->GetCovarianceOutput()->Get()  << std::endl;
+  os << indent << "Correlation: " << this->GetCorrelationOutput()->Get() << std::endl;
+}
+
+} // end namespace otb
+#endif
diff --git a/Code/CMakeLists.txt b/Code/CMakeLists.txt
index fb33973661..3f113a51de 100644
--- a/Code/CMakeLists.txt
+++ b/Code/CMakeLists.txt
@@ -1,3 +1,4 @@
 ADD_SUBDIRECTORY(BasicFilters)
+ADD_SUBDIRECTORY(Hyperspectral)
 ADD_SUBDIRECTORY(Vahine)
 ADD_SUBDIRECTORY(RX)
diff --git a/Testing/CMakeLists.txt b/Testing/CMakeLists.txt
index 6156805b45..b5e0577dd3 100644
--- a/Testing/CMakeLists.txt
+++ b/Testing/CMakeLists.txt
@@ -16,9 +16,7 @@ SET( otbHyperTests1_SRC
 )
 
 ADD_EXECUTABLE(otbHyperTests1 ${otbHyperTests1_SRC} otbHyperTests1.cxx)
-TARGET_LINK_LIBRARIES(otbHyperTests1 OTBCommon OTBIO OTBBasicFilters OTBTesting )
-
-
+TARGET_LINK_LIBRARIES(otbHyperTests1 OTBCommon OTBIO OTBBasicFilters OTBTesting)
 
 ADD_TEST(vahineVCA1
          ${TESTEXE_DIR}/otbHyperTests1
@@ -30,7 +28,7 @@ ADD_TEST(vahineVCA1
 ADD_TEST(vahineVCA2
          ${TESTEXE_DIR}/otbHyperTests1
          vahineVCA
-         ${DATA}/SYNTHETIC/hsi_cube.tif
+         ${DATA}/SYNTHETIC100/hsi_cube.tif
          ${TEMP}/synthetic_unmix_5endmembers.tif
          5)
 
@@ -49,18 +47,27 @@ ADD_TEST(bfTvStreamingStatisticsVectorImageFilter
          ${DATA}/AVIRIS/extract128/extract128_goodbands.hdr
          ${TEMP}/extract128_stats_otb.txt)
 
-ADD_TEST(bfStreamingStatisticsVectorImageFilterNew2
+ADD_TEST(bfTuStreamingStatisticsVectorImageFilterNew2
          ${TESTEXE_DIR}/otbHyperTests1
          otbStreamingStatisticsVectorImageFilter2NewTest)
 
+#ADD_TEST(bfTvStreamingStatisticsVectorImageFilter_AVIRIS512
+#         ${TESTEXE_DIR}/otbHyperTests1
+#         otbStreamingStatisticsVectorImageFilterTest
+#         ${DATA}/AVIRIS/extract512/f970619t01p02_r02_sc01_goodbands.hdr
+#         ${TEMP}/aviris512_stats_otb.txt)
+       
 ADD_TEST(bfTvStreamingStatisticsVectorImageFilter2
          ${TESTEXE_DIR}/otbHyperTests1
          otbStreamingStatisticsVectorImageFilter2Test
          ${DATA}/AVIRIS/extract128/extract128_goodbands.hdr
          ${TEMP}/extract128_stats_rt.txt)
 
-
-
+ADD_TEST(bfTvStreamingStatisticsVectorImageFilter2_AVIRIS512
+         ${TESTEXE_DIR}/otbHyperTests1
+         otbStreamingStatisticsVectorImageFilter2Test
+         ${DATA}/AVIRIS/extract512/f970619t01p02_r02_sc01_goodbands.hdr
+         ${TEMP}/aviris512_stats_rt.txt)
 
 ADD_TEST(bfTuMatrixMultiplyImageFilterNew
          ${TESTEXE_DIR}/otbHyperTests1
@@ -73,8 +80,8 @@ ADD_TEST(bfTvMatrixMultiplyImageFilter
 ADD_TEST(bfTvMatrixMultiplyImageFilterRealImageTest
          ${TESTEXE_DIR}/otbHyperTests1
          otbMatrixMultiplyImageFilterTest
-         ${DATA}/SYNTHETIC/hsi_cube.tif
-         ${DATA}/SYNTHETIC/endmembers.tif
+         ${DATA}/SYNTHETIC100/hsi_cube.tif
+         ${DATA}/SYNTHETIC100/endmembers.tif
          ${TEMP}/bfMatrixMultiplyImageFilter.tif)
 
 
@@ -86,8 +93,8 @@ ADD_TEST(bfTuUnConstrainedLeastSquareImageFilterNew
 ADD_TEST(bfTvUnConstrainedLeastSquareImageFilter
          ${TESTEXE_DIR}/otbHyperTests1
          otbUnConstrainedLeastSquareImageFilterTest
-         ${DATA}/SYNTHETIC/hsi_cube.tif
-         ${DATA}/SYNTHETIC/endmembers.tif
+         ${DATA}/SYNTHETIC100/hsi_cube.tif
+         ${DATA}/SYNTHETIC100/endmembers.tif
          ${TEMP}/bfUnConstrainedLeastSquareImageFilter.tif)
 
 
@@ -125,39 +132,62 @@ ADD_TEST(hyTvEigenvalueLikelihoodMaximization_AVIRIS2
 ADD_TEST(hyTvEigenvalueLikelihoodMaximization2
          ${TESTEXE_DIR}/otbHyperTests1
          otbEigenvalueLikelihoodMaximizationTest
-         ${DATA}/SYNTHETIC/hsi_cube.tif
+         ${DATA}/SYNTHETIC100/hsi_cube.tif
          ${TEMP}/synthetic_elm.txt)
 
 ADD_TEST(testProjectiveProjection
          ${TESTEXE_DIR}/otbHyperTests1
          otbProjectiveProjectionTestHighSNR
-         ${DATA}/SYNTHETIC/hsi_cube.tif
+         ${DATA}/SYNTHETIC100/hsi_cube.tif
          8
          ${TEMP}/hsi_cube_projectiveprojected_8.tif )
 
 
-
-
-ADD_TEST(hyVCA_Synthetic
+ADD_TEST(hyTvVCA_Synthetic
          ${TESTEXE_DIR}/otbHyperTests1
          otbVCATestHighSNR
-         ${DATA}/SYNTHETIC/hsi_cube.tif
+         ${DATA}/SYNTHETIC100/hsi_cube.tif
          ${TEMP}/synthetic_vca_5.hdr
          5 )
 
 
-ADD_TEST(hyVCA_AVIRIS
+ADD_TEST(hyTvVCA_AVIRIS
          ${TESTEXE_DIR}/otbHyperTests1
          otbVCATestHighSNR
          ${DATA}/AVIRIS/extract512/f970619t01p02_r02_sc01_goodbands.hdr
          ${TEMP}/aviris_vca_5.hdr
          13 )
 
+###################
+## Cublas
+###################
+
+SET( otbHyperCublasTests1_SRC
+  otbCublasStreamingStatisticsVectorImageFilter.cxx
+)
+
+ADD_EXECUTABLE(otbHyperCublasTests1 ${otbHyperCublasTests1_SRC} otbHyperCublasTests1.cxx)
+TARGET_LINK_LIBRARIES(otbHyperCublasTests1 OTBCommon OTBIO OTBBasicFilters OTBTesting OTBBasicFiltersGPUCuda)
 
+ADD_TEST(bfTuCublasStreamingStatisticsVectorImageFilterNew
+         ${TESTEXE_DIR}/otbHyperCublasTests1
+         otbCublasStreamingStatisticsVectorImageFilterNewTest)
+
+ADD_TEST(bfTvCublasStreamingStatisticsVectorImageFilter
+         ${TESTEXE_DIR}/otbHyperCublasTests1
+         otbCublasStreamingStatisticsVectorImageFilterTest
+         ${DATA}/AVIRIS/extract128/extract128_goodbands.hdr
+         ${TEMP}/extract128_stats_cublas.txt)
+
+ADD_TEST(bfTvCublasStreamingStatisticsVectorImageFilter_AVIRIS512
+         ${TESTEXE_DIR}/otbHyperCublasTests1
+         otbCublasStreamingStatisticsVectorImageFilterTest
+         ${DATA}/AVIRIS/extract512/f970619t01p02_r02_sc01_goodbands.hdr
+         ${TEMP}/aviris512_stats_cublas.txt)
 
 
 ###################
-##Anomaly detection
+## Anomaly detection
 ###################
 
 SET( otbAnomalyTests1_SRC
diff --git a/Testing/otbCublasStreamingStatisticsVectorImageFilter.cxx b/Testing/otbCublasStreamingStatisticsVectorImageFilter.cxx
new file mode 100644
index 0000000000..4eabd41900
--- /dev/null
+++ b/Testing/otbCublasStreamingStatisticsVectorImageFilter.cxx
@@ -0,0 +1,64 @@
+/*=========================================================================
+
+  Program:   ORFEO Toolbox
+  Language:  C++
+  Date:      $Date$
+  Version:   $Revision$
+
+
+  Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
+  See OTBCopyright.txt for details.
+
+
+  This software is distributed WITHOUT ANY WARRANTY; without even
+  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+  PURPOSE.  See the above copyright notices for more information.
+
+=========================================================================*/
+#include "itkExceptionObject.h"
+
+#include "otbCublasStreamingStatisticsVectorImageFilter.h"
+#include "otbImageFileReader.h"
+#include "otbVectorImage.h"
+#include <fstream>
+#include "otbStreamingTraits.h"
+
+const unsigned int Dimension = 2;
+typedef float PixelType;
+
+typedef otb::VectorImage<PixelType, Dimension> ImageType;
+typedef otb::ImageFileReader<ImageType> ReaderType;
+typedef otb::CublasStreamingStatisticsVectorImageFilter<ImageType> StreamingStatisticsVectorImageFilterType;
+
+int otbCublasStreamingStatisticsVectorImageFilterNewTest(int argc, char * argv[])
+{
+  StreamingStatisticsVectorImageFilterType::Pointer filter = StreamingStatisticsVectorImageFilterType::New();
+  std::cout << filter << std::endl;
+  return EXIT_SUCCESS;
+}
+
+int otbCublasStreamingStatisticsVectorImageFilterTest(int argc, char * argv[])
+{
+  const char * infname = argv[1];
+  const char * outfname = argv[2];
+
+  StreamingStatisticsVectorImageFilterType::Pointer filter = StreamingStatisticsVectorImageFilterType::New();
+
+  ReaderType::Pointer reader = ReaderType::New();
+  reader->SetFileName(infname);
+
+  //filter->GetStreamer()->SetStreamingMode(otb::SET_NUMBER_OF_STREAM_DIVISIONS);
+  filter->GetStreamer()->SetBufferMemorySize(100 * 1024 * 1024);
+  filter->SetInput(reader->GetOutput());
+  filter->Update();
+
+  std::ofstream file;
+  file.open(outfname);
+  file.precision(5);
+  //file << "Mean: " << filter->GetMean() << std::endl;
+  //file << "Covariance: " << filter->GetCovariance() << std::endl;
+  file << "Correlation: " << filter->GetCorrelation() << std::endl;
+  file.close();
+
+  return EXIT_SUCCESS;
+}
diff --git a/Testing/otbHyperCublasTests1.cxx b/Testing/otbHyperCublasTests1.cxx
new file mode 100644
index 0000000000..0f6d19be70
--- /dev/null
+++ b/Testing/otbHyperCublasTests1.cxx
@@ -0,0 +1,29 @@
+/*=========================================================================
+
+  Program:   ORFEO Toolbox
+  Language:  C++
+  Date:      $Date$
+  Version:   $Revision$
+
+
+  Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
+  See OTBCopyright.txt for details.
+
+
+     This software is distributed WITHOUT ANY WARRANTY; without even
+     the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+     PURPOSE.  See the above copyright notices for more information.
+
+=========================================================================*/
+
+// this file defines the otbCommonTest for the test driver
+// and all it expects is that you have a function called RegisterTests
+#include <iostream>
+#include "otbTestMain.h"
+
+void RegisterTests()
+{
+  REGISTER_TEST(otbCublasStreamingStatisticsVectorImageFilterNewTest);
+  REGISTER_TEST(otbCublasStreamingStatisticsVectorImageFilterTest);
+//  REGISTER_TEST(otbCudaCorrelationNoStreamingTest);
+}
-- 
GitLab