Skip to content
Snippets Groups Projects
Commit 57cb034c authored by Julien Malik's avatar Julien Malik
Browse files

ENH: remove GPU Cuda code for now

parent 011961d7
No related branches found
No related tags found
No related merge requests found
Showing with 0 additions and 1520 deletions
# Sources of non-templated classes.
FILE(GLOB OTBBasicFiltersGPUCuda_SRCS "*.cxx" )
ADD_LIBRARY(OTBBasicFiltersGPUCuda ${OTBBasicFiltersGPUCuda_SRCS})
TARGET_LINK_LIBRARIES (OTBBasicFiltersGPUCuda OTBCommon OTBBasicFilters ITKBasicFilters )
IF(OTB_LIBRARY_PROPERTIES)
SET_TARGET_PROPERTIES(OTBBasicFiltersGPUCuda PROPERTIES ${OTB_LIBRARY_PROPERTIES})
ENDIF(OTB_LIBRARY_PROPERTIES)
IF(OTB_USE_CUDA)
CUDA_ADD_CUBLAS_TO_TARGET( OTBBasicFiltersGPUCuda )
ENDIF(OTB_USE_CUDA)
/*=========================================================================
Program: ORFEO Toolbox
Language: C++
Date: $Date$
Version: $Revision$
Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
See OTBCopyright.txt for details.
Some parts of this code are derived from ITK. See ITKCopyright.txt
for details.
This software is distributed WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANT2ABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the above copyright notices for more information.
=========================================================================*/
#ifndef __otbCublasStreamingStatisticsVectorImageFilter_h
#define __otbCublasStreamingStatisticsVectorImageFilter_h
#include "otbMacro.h"
#include "otbPersistentImageFilter.h"
#include "otbPersistentFilterStreamingDecorator.h"
#include "itkNumericTraits.h"
#include "itkArray.h"
#include "itkSimpleDataObjectDecorator.h"
#include "itkImageRegionSplitter.h"
#include "itkVariableSizeMatrix.h"
#include "itkVariableLengthVector.h"
#include "cublas.h"
namespace otb
{
/** \class PersistentCublasStreamingStatisticsVectorImageFilter
* \brief Compute mean, covariance & correlation of a large image using streaming
*
* This filter persists its temporary data. It means that if you Update it n times on n different
* requested regions, the output statistics will be the statitics of the whole set of n regions.
*
* To reset the temporary data, one should call the Reset() function.
*
* To get the statistics once the regions have been processed via the pipeline, use the Synthetize() method.
*
* \sa PersistentImageFilter
* \ingroup Streamed
* \ingroup Multithreaded
* \ingroup MathematicalStatisticsImageFilters
*
*/
template<class TInputImage>
class ITK_EXPORT PersistentCublasStreamingStatisticsVectorImageFilter :
public PersistentImageFilter<TInputImage, TInputImage>
{
public:
/** Standard Self typedef */
typedef PersistentCublasStreamingStatisticsVectorImageFilter Self;
typedef PersistentImageFilter<TInputImage, TInputImage> Superclass;
typedef itk::SmartPointer<Self> Pointer;
typedef itk::SmartPointer<const Self> ConstPointer;
/** Method for creation through the object factory. */
itkNewMacro(Self);
/** Runtime information support. */
itkTypeMacro(PersistentCublasStreamingStatisticsVectorImageFilter, PersistentImageFilter);
/** Image related typedefs. */
typedef TInputImage ImageType;
typedef typename TInputImage::Pointer InputImagePointer;
typedef typename TInputImage::RegionType RegionType;
typedef typename TInputImage::SizeType SizeType;
typedef typename TInputImage::IndexType IndexType;
typedef typename TInputImage::PixelType PixelType;
typedef typename TInputImage::InternalPixelType InternalPixelType;
itkStaticConstMacro(InputImageDimension, unsigned int, TInputImage::ImageDimension);
/** Image related typedefs. */
itkStaticConstMacro(ImageDimension, unsigned int, TInputImage::ImageDimension);
/** Type to use for computations. */
//typedef typename itk::NumericTraits<InternalPixelType>::RealType RealType;
typedef float RealType;
typedef itk::VariableLengthVector<RealType> RealPixelType;
/** Smart Pointer type to a DataObject. */
typedef typename itk::DataObject::Pointer DataObjectPointer;
/** Type of DataObjects used for scalar outputs */
typedef itk::VariableSizeMatrix<RealType> MatrixType;
typedef std::vector<MatrixType> ArrayMatrixType;
typedef itk::Array<long> ArrayLongPixelType;
typedef std::vector<RealPixelType> ArrayRealPixelType;
typedef std::vector<PixelType> ArrayPixelType;
typedef itk::SimpleDataObjectDecorator<RealPixelType> RealPixelObjectType;
typedef itk::SimpleDataObjectDecorator<PixelType> PixelObjectType;
typedef itk::SimpleDataObjectDecorator<MatrixType> MatrixObjectType;
/** Return the computed Mean. */
RealPixelType GetMean() const
{
return this->GetMeanOutput()->Get();
}
RealPixelObjectType* GetMeanOutput();
const RealPixelObjectType* GetMeanOutput() const;
/** Return the computed Covariance. */
MatrixType GetCovariance() const
{
return this->GetCovarianceOutput()->Get();
}
MatrixObjectType* GetCovarianceOutput();
const MatrixObjectType* GetCovarianceOutput() const;
/** Return the computed Covariance. */
MatrixType GetCorrelation() const
{
return this->GetCorrelation()->Get();
}
MatrixObjectType* GetCorrelationOutput();
const MatrixObjectType* GetCorrelationOutput() const;
/** Make a DataObject of the correct type to be used as the specified
* output.
*/
virtual DataObjectPointer MakeOutput(unsigned int idx);
/** Pass the input through unmodified. Do this by Grafting in the
* AllocateOutputs method.
*/
virtual void AllocateOutputs();
virtual void GenerateOutputInformation();
virtual void Synthetize(void);
virtual void Reset(void);
protected:
PersistentCublasStreamingStatisticsVectorImageFilter();
virtual ~PersistentCublasStreamingStatisticsVectorImageFilter();
virtual void PrintSelf(std::ostream& os, itk::Indent indent) const;
/** Multi-thread version GenerateData. */
//void ThreadedGenerateData(const RegionType& outputRegionForThread, int threadId);
void GenerateData();
private:
PersistentCublasStreamingStatisticsVectorImageFilter(const Self &); //purposely not implemented
void operator =(const Self&); //purposely not implemented
RealPixelType m_FirstOrderAccumulator;
MatrixType m_SecondOrderAccumulator;
float* m_GPUImage;
SizeType m_GPUImageSize;
float* m_GPUOnesVector;
float* m_GPUFirstOrderAccumulator;
float* m_GPUSecondOrderAccumulator;
}; // end of class PersistentCublasStreamingStatisticsVectorImageFilter
/** \class CublasStreamingStatisticsVectorImageFilter
* \brief This class streams the whole input image through the PersistentCublasStreamingStatisticsVectorImageFilter.
*
* This way, it allows to compute the first and second order global statistics of this image. It calls the
* Reset() method of the PersistentCublasStreamingStatisticsVectorImageFilter before streaming the image and the
* Synthetize() method of the PersistentCublasStreamingStatisticsVectorImageFilter after having streamed the image
* to compute the statistics. The accessor on the results are wrapping the accessors of the
* internal PersistentCublasStreamingStatisticsVectorImageFilter.
*
* \sa PersistentCublasStreamingStatisticsVectorImageFilter
* \sa PersistentImageFilter
* \sa PersistentFilterStreamingDecorator
* \sa StreamingImageVirtualWriter
* \ingroup Streamed
* \ingroup Multithreaded
* \ingroup MathematicalStatisticsImageFilters
*/
template<class TInputImage>
class ITK_EXPORT CublasStreamingStatisticsVectorImageFilter :
public PersistentFilterStreamingDecorator<PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage> >
{
public:
/** Standard Self typedef */
typedef CublasStreamingStatisticsVectorImageFilter Self;
typedef PersistentFilterStreamingDecorator
<PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage> > Superclass;
typedef itk::SmartPointer<Self> Pointer;
typedef itk::SmartPointer<const Self> ConstPointer;
/** Type macro */
itkNewMacro(Self);
/** Creation through object factory macro */
itkTypeMacro(CublasStreamingStatisticsVectorImageFilter, PersistentFilterStreamingDecorator);
typedef TInputImage InputImageType;
typedef typename Superclass::FilterType StatFilterType;
typedef typename StatFilterType::PixelType PixelType;
typedef typename StatFilterType::RealType RealType;
typedef typename StatFilterType::RealPixelType RealPixelType;
typedef typename StatFilterType::MatrixType MatrixType;
typedef typename StatFilterType::ArrayMatrixType ArrayMatrixType;
typedef typename StatFilterType::ArrayLongPixelType ArrayLongPixelType;
typedef typename StatFilterType::ArrayRealPixelType ArrayRealPixelType;
typedef typename StatFilterType::ArrayPixelType ArrayPixelType;
/** Type of DataObjects used for scalar outputs */
typedef typename StatFilterType::RealPixelObjectType RealPixelObjectType;
typedef typename StatFilterType::PixelObjectType PixelObjectType;
typedef typename StatFilterType::MatrixObjectType MatrixObjectType;
void SetInput(InputImageType * input)
{
this->GetFilter()->SetInput(input);
}
const InputImageType * GetInput()
{
return this->GetFilter()->GetInput();
}
/** Return the computed Mean. */
RealPixelType GetMean() const
{
return this->GetFilter()->GetMeanOutput()->Get();
}
RealPixelObjectType* GetMeanOutput()
{
return this->GetFilter()->GetMeanOutput();
}
const RealPixelObjectType* GetMeanOutput() const
{
return this->GetFilter()->GetMeanOutput();
}
/** Return the computed Covariance. */
MatrixType GetCovariance() const
{
return this->GetFilter()->GetCovarianceOutput()->Get();
}
MatrixObjectType* GetCovarianceOutput()
{
return this->GetFilter()->GetCovarianceOutput();
}
const MatrixObjectType* GetCovarianceOutput() const
{
return this->GetFilter()->GetCovarianceOutput();
}
/** Return the computed Covariance. */
MatrixType GetCorrelation() const
{
return this->GetFilter()->GetCorrelationOutput()->Get();
}
MatrixObjectType* GetCorrelationOutput()
{
return this->GetFilter()->GetCorrelationOutput();
}
const MatrixObjectType* GetCorrelationOutput() const
{
return this->GetFilter()->GetCorrelationOutput();
}
protected:
/** Constructor */
CublasStreamingStatisticsVectorImageFilter() {};
/** Destructor */
virtual ~CublasStreamingStatisticsVectorImageFilter() {}
private:
CublasStreamingStatisticsVectorImageFilter(const Self &); //purposely not implemented
void operator =(const Self&); //purposely not implemented
};
} // end namespace otb
#ifndef OTB_MANUAL_INSTANTIATION
#include "otbCublasStreamingStatisticsVectorImageFilter.txx"
#endif
#endif
/*=========================================================================
Program: ORFEO Toolbox
Language: C++
Date: $Date$
Version: $Revision$
Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
See OTBCopyright.txt for details.
Some parts of this code are derived from ITK. See ITKCopyright.txt
for details.
This software is distributed WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the above copyright notices for more information.
=========================================================================*/
#ifndef __otbCublasStreamingStatisticsVectorImageFilter_txx
#define __otbCublasStreamingStatisticsVectorImageFilter_txx
#include "otbCublasStreamingStatisticsVectorImageFilter.h"
#include "itkNumericTraits.h"
#include "itkProgressReporter.h"
#include "otbMacro.h"
namespace otb
{
template<class TInputImage>
PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::PersistentCublasStreamingStatisticsVectorImageFilter()
{
// first output is a copy of the image, DataObject created by
// superclass
//
// allocate the data objects for the outputs which are
// just decorators around vector/matrix types
this->itk::ProcessObject::SetNthOutput(1, this->MakeOutput(1).GetPointer());
this->itk::ProcessObject::SetNthOutput(2, this->MakeOutput(2).GetPointer());
this->itk::ProcessObject::SetNthOutput(3, this->MakeOutput(3).GetPointer());
cublasStatus status = cublasInit();
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasInit failed" );
}
m_GPUImage = 0;
m_GPUImageSize.Fill(0);
m_GPUSecondOrderAccumulator = 0;
}
template<class TInputImage>
PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::~PersistentCublasStreamingStatisticsVectorImageFilter()
{
cublasStatus status = cublasShutdown();
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasShutdown failed" );
}
}
template<class TInputImage>
itk::DataObject::Pointer PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::MakeOutput(
unsigned int output)
{
switch (output)
{
case 0:
return static_cast<itk::DataObject*> (TInputImage::New().GetPointer());
break;
case 1:
return static_cast<itk::DataObject*> (RealPixelObjectType::New().GetPointer());
break;
case 2:
case 3:
return static_cast<itk::DataObject*> (MatrixObjectType::New().GetPointer());
break;
default:
// might as well make an image
return static_cast<itk::DataObject*> (TInputImage::New().GetPointer());
break;
}
}
template<class TInputImage>
typename PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::RealPixelObjectType*
PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::GetMeanOutput()
{
return static_cast<RealPixelObjectType*> (this->itk::ProcessObject::GetOutput(1));
}
template<class TInputImage>
const typename PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::RealPixelObjectType*
PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::GetMeanOutput() const
{
return static_cast<const RealPixelObjectType*> (this->itk::ProcessObject::GetOutput(1));
}
template<class TInputImage>
typename PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::MatrixObjectType*
PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::GetCorrelationOutput()
{
return static_cast<MatrixObjectType*> (this->itk::ProcessObject::GetOutput(2));
}
template<class TInputImage>
const typename PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::MatrixObjectType*
PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::GetCorrelationOutput() const
{
return static_cast<const MatrixObjectType*> (this->itk::ProcessObject::GetOutput(2));
}
template<class TInputImage>
typename PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::MatrixObjectType*
PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::GetCovarianceOutput()
{
return static_cast<MatrixObjectType*> (this->itk::ProcessObject::GetOutput(3));
}
template<class TInputImage>
const typename PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::MatrixObjectType*
PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::GetCovarianceOutput() const
{
return static_cast<const MatrixObjectType*> (this->itk::ProcessObject::GetOutput(3));
}
template<class TInputImage>
void PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::GenerateOutputInformation()
{
Superclass::GenerateOutputInformation();
if (this->GetInput())
{
this->GetOutput()->CopyInformation(this->GetInput());
this->GetOutput()->SetLargestPossibleRegion(this->GetInput()->GetLargestPossibleRegion());
if (this->GetOutput()->GetRequestedRegion().GetNumberOfPixels() == 0)
{
this->GetOutput()->SetRequestedRegion(this->GetOutput()->GetLargestPossibleRegion());
}
}
}
template<class TInputImage>
void PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::AllocateOutputs()
{
// This is commented to prevent the streaming of the whole image for the first stream strip
// It shall not cause any problem because the output image of this filter is not intended to be used.
//InputImagePointer image = const_cast< TInputImage * >( this->GetInput() );
//this->GraftOutput( image );
// Nothing that needs to be allocated for the remaining outputs
}
template<class TInputImage>
void PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::Reset()
{
TInputImage * inputPtr = const_cast<TInputImage *> (this->GetInput());
inputPtr->UpdateOutputInformation();
unsigned int numberOfComponent = inputPtr->GetNumberOfComponentsPerPixel();
RealPixelType zeroRealPixel;
zeroRealPixel.SetSize(numberOfComponent);
zeroRealPixel.Fill(itk::NumericTraits<RealType>::Zero);
this->GetMeanOutput()->Set(zeroRealPixel);
MatrixType zeroMatrix;
zeroMatrix.SetSize(numberOfComponent, numberOfComponent);
zeroMatrix.Fill(itk::NumericTraits<RealType>::Zero);
this->GetCovarianceOutput()->Set(zeroMatrix);
this->GetCorrelationOutput()->Set(zeroMatrix);
m_FirstOrderAccumulator = zeroRealPixel;
m_SecondOrderAccumulator = zeroMatrix;
if (m_GPUFirstOrderAccumulator == 0)
{
cublasStatus status;
status = cublasAlloc(numberOfComponent, sizeof(float), (void**) &m_GPUFirstOrderAccumulator);
// TODO : check status, throw exception on error & clean up GPU memory
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasAlloc m_GPUFirstOrderAccumulator failed" );
}
status = cublasSetVector(numberOfComponent, sizeof(float),
m_FirstOrderAccumulator.GetDataPointer(), 1,
m_GPUFirstOrderAccumulator, 1);
// TODO : check status, throw exception on error & clean up GPU memory
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasSetMatrix m_GPUFirstOrderAccumulator failed" );
}
}
if (m_GPUSecondOrderAccumulator == 0)
{
cublasStatus status;
status = cublasAlloc(numberOfComponent * numberOfComponent, sizeof(float),
(void**) &m_GPUSecondOrderAccumulator);
// TODO : check status, throw exception on error & clean up GPU memory
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasAlloc m_GPUSecondOrderAccumulator failed" );
}
status = cublasSetMatrix(numberOfComponent, numberOfComponent, sizeof(float),
m_SecondOrderAccumulator.GetVnlMatrix().data_block(), numberOfComponent,
m_GPUSecondOrderAccumulator, numberOfComponent);
// TODO : check status, throw exception on error & clean up GPU memory
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasSetMatrix m_GPUSecondOrderAccumulator failed" );
}
}
}
template<class TInputImage>
void PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::Synthetize()
{
TInputImage * inputPtr = const_cast<TInputImage *> (this->GetInput());
const unsigned int numberOfComponent = inputPtr->GetNumberOfComponentsPerPixel();
const unsigned int nbPixels = inputPtr->GetLargestPossibleRegion().GetNumberOfPixels();
RealPixelType cpuFirstOrderAccumulator(numberOfComponent);
MatrixType cpuSecondOrderAccumulator(numberOfComponent, numberOfComponent);
cublasStatus status;
status = cublasGetMatrix(numberOfComponent, numberOfComponent, sizeof(float), m_GPUSecondOrderAccumulator,
numberOfComponent, cpuSecondOrderAccumulator.GetVnlMatrix().data_block(), numberOfComponent);
// TODO : check status, throw exception on error & clean up GPU memory
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasGetMatrix m_GPUSecondOrderAccumulator failed");
}
status = cublasGetVector(numberOfComponent, sizeof(float), m_GPUFirstOrderAccumulator, 1,
const_cast<float*>(cpuFirstOrderAccumulator.GetDataPointer()), 1);
// TODO : check status, throw exception on error & clean up GPU memory
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasGetMatrix m_GPUSecondOrderAccumulator failed");
}
if (m_GPUImage)
{
status = cublasFree(m_GPUImage);
// TODO : check status, throw exception on error & clean up GPU memory
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasFree m_GPUImage failed" );
}
}
if (m_GPUFirstOrderAccumulator)
{
status = cublasFree(m_GPUFirstOrderAccumulator);
// TODO : check status, throw exception on error & clean up GPU memory
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasFree m_GPUFirstOrderAccumulator failed" );
}
}
if (m_GPUOnesVector)
{
status = cublasFree(m_GPUOnesVector);
// TODO : check status, throw exception on error & clean up GPU memory
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasFree m_GPUOnesVector failed" );
}
}
if (m_GPUSecondOrderAccumulator)
{
status = cublasFree(m_GPUSecondOrderAccumulator);
// TODO : check status, throw exception on error & clean up GPU memory
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasFree m_GPUSecondOrderAccumulator failed" );
}
}
this->GetMeanOutput()->Set(cpuFirstOrderAccumulator / nbPixels);
const RealPixelType& mean = this->GetMeanOutput()->Get();
MatrixType cor = cpuSecondOrderAccumulator / nbPixels;
for (unsigned int r = 0; r < numberOfComponent; ++r)
{
for (unsigned int c = r + 1; c < numberOfComponent; ++c)
{
cor(r, c) = cor(c, r);
}
}
this->GetCorrelationOutput()->Set(cor);
const float regul = static_cast<float>(nbPixels) / (nbPixels - 1);
MatrixType cov(numberOfComponent, numberOfComponent);
for (unsigned int r = 0; r < numberOfComponent; ++r)
{
for (unsigned int c = 0; c < numberOfComponent; ++c)
{
cov(r, c) = regul * (cor(r, c) - mean[r] * mean[c]);
}
}
this->GetCovarianceOutput()->Set(cov);
}
template<class TInputImage>
void PersistentCublasStreamingStatisticsVectorImageFilter<TInputImage>::GenerateData()
{
// Support progress methods/callbacks
// itk::ProgressReporter progress(this, threadId, outputRegionForThread.GetNumberOfPixels());
// Grab the input
InputImagePointer inputPtr = const_cast<TInputImage *> (this->GetInput());
const unsigned int nbPixels = inputPtr->GetBufferedRegion().GetNumberOfPixels();
otbMsgDevMacro( "buffered region : " << inputPtr->GetBufferedRegion() << " (" << nbPixels << " pixels)" );
const unsigned int numberOfComponent = inputPtr->GetNumberOfComponentsPerPixel();
cublasStatus status;
if (inputPtr->GetBufferedRegion().GetSize() != m_GPUImageSize)
{
if (m_GPUImage)
{
status = cublasFree(m_GPUImage);
// TODO : check status, throw exception on error & clean up GPU memory
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasFree m_GPUImage failed" );
}
}
if (m_GPUOnesVector)
{
status = cublasFree(m_GPUOnesVector);
// TODO : check status, throw exception on error & clean up GPU memory
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasFree m_GPUOnesVector failed" );
}
}
otbMsgDevMacro( "Allocating " << nbPixels << " pixels on GPU (" << numberOfComponent * nbPixels * sizeof(float) / 1024
/ 1024 << " Mo)" );
status = cublasAlloc(numberOfComponent * nbPixels, sizeof(float), (void**) &m_GPUImage);
// TODO : check status, throw exception on error & clean up GPU memory
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasAlloc m_GPUImage failed" );
}
m_GPUImageSize = inputPtr->GetBufferedRegion().GetSize();
status = cublasAlloc(nbPixels, sizeof(float), (void**) &m_GPUOnesVector);
// TODO : check status, throw exception on error & clean up GPU memory
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasAlloc m_GPUOnesVector failed" );
}
RealPixelType onesRealPixel;
onesRealPixel.SetSize(nbPixels);
onesRealPixel.Fill(itk::NumericTraits<RealType>::One);
status = cublasSetVector(nbPixels, sizeof(float),
onesRealPixel.GetDataPointer(), 1,
m_GPUOnesVector, 1);
// TODO : check status, throw exception on error & clean up GPU memory
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasSetVector m_GPUOnesVector failed" );
}
}
status = cublasSetMatrix(numberOfComponent, nbPixels, sizeof(float),
inputPtr->GetBufferPointer(), numberOfComponent, m_GPUImage, numberOfComponent);
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasSetMatrix m_GPUImage failed" );
}
cublasSgemv('n', numberOfComponent, nbPixels, 1.0f, m_GPUImage, numberOfComponent, m_GPUOnesVector, 1, 1.0f,
m_GPUFirstOrderAccumulator, 1);
status = cublasGetError();
// TODO : check status, throw exception on error & clean up GPU memory
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasSgemv failed" );
}
cublasSsyrk('u', 'n', numberOfComponent, nbPixels, 1.0f, m_GPUImage,
numberOfComponent, 1, m_GPUSecondOrderAccumulator, numberOfComponent);
status = cublasGetError();
// TODO : check status, throw exception on error & clean up GPU memory
if (status != CUBLAS_STATUS_SUCCESS)
{
otbMsgDevMacro( "cublasSsyrk failed" );
}
}
template<class TImage>
void PersistentCublasStreamingStatisticsVectorImageFilter<TImage>::PrintSelf(std::ostream& os, itk::Indent indent) const
{
Superclass::PrintSelf(os, indent);
os << indent << "Mean: " << this->GetMeanOutput()->Get() << std::endl;
os << indent << "Covariance: " << this->GetCovarianceOutput()->Get() << std::endl;
os << indent << "Correlation: " << this->GetCorrelationOutput()->Get() << std::endl;
}
} // end namespace otb
#endif
FILE(GLOB OTBHyperspectralGPUCuda_CUDASRCS "*.cu" )
CUDA_ADD_LIBRARY(OTBHyperspectralGPUCuda_nvcc ${OTBHyperspectralGPUCuda_CUDASRCS})
FILE(GLOB OTBHyperspectralGPUCuda_CPPSRCS "*.cxx" )
ADD_LIBRARY(OTBHyperspectralGPUCuda ${OTBHyperspectralGPUCuda_CPPSRCS})
TARGET_LINK_LIBRARIES (OTBHyperspectralGPUCuda OTBHyperspectralGPUCuda_nvcc OTBCommon OTBBasicFilters ITKBasicFilters )
IF(OTB_LIBRARY_PROPERTIES)
SET_TARGET_PROPERTIES(OTBHyperspectralGPUCuda PROPERTIES ${OTB_LIBRARY_PROPERTIES})
ENDIF(OTB_LIBRARY_PROPERTIES)
/*
* otbCudaFCLSFilter.cu
*
*/
#include <cstdio>
#include <cuda.h>
#include <driver_types.h>
#include <vector_types.h>
//Block thread size
#define BLOCK_SIZE (16*16)
//#define num_bands 224
//#define num_endmembers 5
#define CUDA_SAFE_CALL( call ) \
do \
{ \
cudaError_t status = call; \
check_error(status); \
} while(0)
void check_error(cudaError_t error)
{
#define ELSEIF_CUDA_HANDLE_ERROR_CODE( code ) \
else if(error == code) \
printf("Cuda error : %d %s\n", error, #code);
if(error == cudaSuccess) {}
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorMissingConfiguration)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorMemoryAllocation)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInitializationError)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorLaunchFailure)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorPriorLaunchFailure)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorLaunchTimeout)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorLaunchOutOfResources)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidDeviceFunction)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidConfiguration)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidDevice)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidValue)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidPitchValue)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidSymbol)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorMapBufferObjectFailed)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorUnmapBufferObjectFailed)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidHostPointer)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidDevicePointer)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidTexture)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidTextureBinding)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidChannelDescriptor)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidMemcpyDirection)
ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorAddressOfConstant)
#undef ELSEIF_HANDLE_ERROR_CODE
}
__global__ void UnconstrainedKernel(float* d_image_vector,
float* d_image_unmixed,
float* d_endmembersInv,
int numSamples,
int num_endmembers,
int num_bands)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
if (x < numSamples)
{
// Unconstrained
for(int e = 0; e < num_endmembers; e++)
{
d_image_unmixed[e + num_endmembers*x] = 0;
for (int t = 0; t < num_bands; t++)
{
d_image_unmixed[e + num_endmembers*x] += d_endmembersInv[t + e*num_bands] * d_image_vector[ t + num_bands*x ];
}
}
}
}
__global__ void FCLSUKernel(float* d_image_unmixed,int numSamples,
int num_endmembers)
{
float sum;
int x = blockIdx.x * blockDim.x + threadIdx.x;
if (x < numSamples)
{
sum = 0;
for(int i = 0; i < num_endmembers; i++)
{
sum += d_image_unmixed[ i + num_endmembers*x ];
}
for (int k = 0; k < num_endmembers; k++)
{
d_image_unmixed[k + num_endmembers*x] = d_image_unmixed[k + num_endmembers*x ] / sum;
}
}
}
__global__ void UnconstrainedISRAKernel(float* d_image_vector,
float* d_image_unmixed,
float* d_image_unmixed_tmp,
float* d_endmembers,
float* d_endmembersT,
float* d_endmembersInv,
int numSamples,
int num_endmembers,
int num_bands,
int maxiter)
{
int x = blockIdx.x * blockDim.x + threadIdx.x;
float numerator = 0;
float denominator = 0;
float dot = 0;
if (x < numSamples)
{
// Unconstrained
for(int e = 0; e < num_endmembers; e++)
{
d_image_unmixed[e + num_endmembers*x] = 0;
for (int t = 0; t < num_bands; t++)
{
d_image_unmixed[e + num_endmembers*x] += d_endmembersInv[t + e*num_bands] * d_image_vector[ t + num_bands*x ];
}
d_image_unmixed_tmp[e + num_endmembers*x] = d_image_unmixed[e + num_endmembers*x];
}
// ISRA
for(int it = 0; it < maxiter; it++)
{
for(int e = 0; e < num_endmembers; e++)
{
numerator = 0;
denominator = 0;
// For all bands
for (int k = 0; k < num_bands; k++)
{
// numerator = numerator + d_endmembers[k + e*num_bands] * l_pixel[k];
numerator = numerator + d_endmembers[k + e*num_bands] * d_image_vector[ k + num_bands*x ];;
// Calculate dot product
dot = 0;
for (int s = 0; s < num_endmembers; s++)
{
// dot += d_endmembersT[s + k*num_endmembers] * l_abu[s];
dot += d_endmembersT[s + k*num_endmembers] * d_image_unmixed_tmp[s + num_endmembers*x];
}
denominator += dot * d_endmembers[k + e*num_bands];
}
// l_abu[e] = l_abu[e] * (numerator/denominator);
// l_abu[e] = numerator/denominator;
d_image_unmixed[e + num_endmembers*x] = d_image_unmixed_tmp[e + num_endmembers*x] * (numerator/denominator);
}
for(int e = 0; e < num_endmembers; e++)
{
d_image_unmixed_tmp[e + num_endmembers*x] = d_image_unmixed[e + num_endmembers*x];
}
}
}
}
extern "C" void fclsProcessing( float* d_image_vector,
float* d_image_unmixed,
float* d_image_unmixed_tmp,
float* d_endmembers,
float* d_endmembersT,
float* d_endmembersInv,
int numSamples,
int numBands,
int nbEndmembers,
int maxIter,
int blockSize)
{
dim3 dimBlock( blockSize );
dim3 dimGrid ( (numSamples + blockSize) / blockSize );
printf( " %d \n " , (numSamples + blockSize) / blockSize );
UnconstrainedKernel<<<dimGrid,dimBlock>>>(d_image_vector, d_image_unmixed, d_endmembersInv, numSamples, nbEndmembers, numBands);
cudaThreadSynchronize();
//UnconstrainedISRAKernel<<<dimGrid,dimBlock>>>(d_image_vector, d_image_unmixed, d_image_unmixed_tmp, d_endmembers, d_endmembersT, d_endmembersInv, numSamples, nbEndmembers, numBands, maxIter);
//cudaThreadSynchronize();
//FCLSUKernel<<<dimGrid,dimBlock>>>(d_image_unmixed, numSamples);
//cudaThreadSynchronize();
}
extern "C" void fclsMallocEndMembers(
float** d_endmembers,
float** d_endmembersT,
float** d_endmembersInv,
int numBands,
int nbEndmembers)
{
float* d_endmembers_ = 0;
float* d_endmembersT_ = 0;
float* d_endmembersInv_ = 0;
printf( "Allocating %d KB\n", numBands*nbEndmembers*sizeof(float) / 1024 );
CUDA_SAFE_CALL( cudaMalloc((void**) &d_endmembers_, nbEndmembers*numBands*sizeof(float)));
CUDA_SAFE_CALL( cudaMemset(d_endmembers_, 0, nbEndmembers*numBands*sizeof(float)));
printf( "Allocating %d KB\n", numBands*nbEndmembers*sizeof(float) / 1024 );
CUDA_SAFE_CALL( cudaMalloc((void**) &d_endmembersT_, nbEndmembers*numBands*sizeof(float)));
CUDA_SAFE_CALL( cudaMemset(d_endmembersT_, 0, nbEndmembers*numBands*sizeof(float)));
printf( "Allocating %d KB\n", numBands*nbEndmembers*sizeof(float) / 1024 );
CUDA_SAFE_CALL( cudaMalloc((void**) &d_endmembersInv_, nbEndmembers*numBands*sizeof(float)));
CUDA_SAFE_CALL( cudaMemset(d_endmembersInv_, 0, nbEndmembers*numBands*sizeof(float)));
*d_endmembers = d_endmembers_;
*d_endmembersT = d_endmembersT_;
*d_endmembersInv = d_endmembersInv_;
}
extern "C" void fclsMallocImage( float** d_image_vector,
float** d_image_unmixed,
float** d_image_unmixed_tmp,
int imageWidth,
int imageHeight,
int numBands,
int nbEndmembers)
{
printf( "Allocating %d KB\n", numBands*imageWidth*imageHeight*sizeof(float) / 1024);
CUDA_SAFE_CALL(cudaMalloc((void**) d_image_vector, numBands*imageWidth*imageHeight*sizeof(float)));
CUDA_SAFE_CALL( cudaMemset(*d_image_vector, 0, nbEndmembers*numBands*sizeof(float)));
printf( "Allocating %d KB\n", nbEndmembers*imageWidth*imageHeight*sizeof(float) / 1024);
CUDA_SAFE_CALL( cudaMalloc((void**) d_image_unmixed, nbEndmembers*imageWidth*imageHeight*sizeof(float)));
CUDA_SAFE_CALL( cudaMemset(*d_image_unmixed, 0, nbEndmembers*numBands*sizeof(float)));
printf( "Allocating %d KB\n", nbEndmembers*imageWidth*imageHeight*sizeof(float) / 1024);
CUDA_SAFE_CALL( cudaMalloc((void**) d_image_unmixed_tmp, nbEndmembers*imageWidth*imageHeight*sizeof(float)));
CUDA_SAFE_CALL( cudaMemset(*d_image_unmixed_tmp, 0, nbEndmembers*numBands*sizeof(float)));
}
extern "C" void fclsCopyHostToDevice( float* d_ptr,
const float* h_ptr,
int nb_bytes)
{
CUDA_SAFE_CALL( cudaMemcpy(d_ptr, h_ptr, nb_bytes, cudaMemcpyHostToDevice));
}
extern "C" void fclsCopyDeviceToHost( float* h_ptr,
const float* d_ptr,
int nb_bytes)
{
CUDA_SAFE_CALL( cudaMemcpy(h_ptr, d_ptr, nb_bytes, cudaMemcpyDeviceToHost));
}
extern "C" void fclsFree( float* d_ptr)
{
CUDA_SAFE_CALL( cudaFree(d_ptr) );
}
extern "C" void fclsInit( void )
{
if (cuInit(0) != CUDA_SUCCESS)
exit (0);
}
/*=========================================================================
Program: ORFEO Toolbox
Language: C++
Date: $Date$
Version: $Revision$
Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
See OTBCopyright.txt for details.
Some parts of this code are derived from ITK. See ITKCopyright.txt
for details.
This software is distributed WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANT2ABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the above copyright notices for more information.
=========================================================================*/
#ifndef __otbCudaFCLSFilter_h
#define __otbCudaFCLSFilter_h
#include "itkMacro.h"
#include "otbUnaryFunctorImageFilter.h"
#include "vnl/algo/vnl_svd.h"
namespace otb
{
namespace Functor
{
template <class TInputPixel, class TOutputPixel>
class Dummy
{
public:
typedef vnl_vector<float> VectorType;
typedef vnl_matrix<float> MatrixType;
Dummy(){}
virtual ~Dummy(){}
void SetMatrix(const MatrixType& U)
{
m_U = U;
m_Ut = m_U.transpose();
m_Uinv = vnl_svd<float>(m_U).inverse();
}
unsigned int GetOutputSize() const
{
return m_U.cols();
}
const MatrixType& GetMatrix() const
{
return m_U;
}
const MatrixType& GetMatrixT() const
{
return m_Ut;
}
const MatrixType& GetMatrixInv() const
{
return m_Uinv;
}
bool operator !=(const Dummy&) const
{
return true;
}
bool operator ==(const Dummy& other) const
{
return !(*this != other);
}
inline TOutputPixel operator ()(const TInputPixel& A) const
{
TOutputPixel B;
return static_cast<TOutputPixel>(B);
}
private:
MatrixType m_U;
MatrixType m_Ut;
MatrixType m_Uinv;
};
}
template<class TInputImage, class TOutputImage>
class ITK_EXPORT CudaFCLSFilter: public otb::UnaryFunctorImageFilter<TInputImage, TOutputImage, Functor::Dummy<
typename TInputImage::PixelType, typename TOutputImage::PixelType> >
{
public:
/** Standard class typedefs. */
typedef CudaFCLSFilter Self;
typedef itk::UnaryFunctorImageFilter<TInputImage, TOutputImage,Functor::Dummy<
typename TInputImage::PixelType, typename TOutputImage::PixelType> > Superclass;
typedef itk::SmartPointer<Self> Pointer;
typedef itk::SmartPointer<const Self> ConstPointer;
/** Method for creation through the object factory. */
itkNewMacro(Self);
/** Run-time type information (and related methods). */
itkTypeMacro(CudaFCLSFilter, UnaryFunctorImageFilter);
typedef TInputImage InputImageType;
typedef TOutputImage OutputImageType;
typedef itk::ImageRegionConstIterator<InputImageType> InputImageConstIterator;
typedef itk::ImageRegionIterator<InputImageType> InputImageIterator;
typedef itk::ImageRegionConstIterator<OutputImageType> OutputImageConstIterator;
typedef itk::ImageRegionIterator<OutputImageType> OutputImageIterator;
typedef typename Superclass::OutputImageRegionType OutputImageRegionType;
typedef typename OutputImageType::RegionType RegionType;
typedef typename OutputImageType::SizeType SizeType;
typedef Functor::Dummy<
typename TInputImage::PixelType,
typename TOutputImage::PixelType
> FunctorType;
typedef typename FunctorType::VectorType VectorType;
typedef typename FunctorType::MatrixType MatrixType;
itkSetMacro(BlockSize, int);
itkGetMacro(BlockSize, int);
itkSetMacro(MaxIter, int);
itkGetMacro(MaxIter, int);
void SetMatrix(const MatrixType& U)
{
this->GetFunctor().SetMatrix(U);
this->Modified();
}
protected:
/// Constructor
CudaFCLSFilter();
/// Destructor
virtual ~CudaFCLSFilter() {}
void ThreadedGenerateData(const OutputImageRegionType& outputRegionForThread,
int threadId );
private:
CudaFCLSFilter(const Self &); //purposely not implemented
void operator =(const Self&); //purposely not implemented
int m_BlockSize;
int m_MaxIter;
float* m_GPUInputImage;
float* m_GPUOutputImage;
float* m_GPUOutputImageTmp;
SizeType m_GPUImageSize;
float* m_GPUU;
float* m_GPUUt;
float* m_GPUUinv;
};
} // end namespace otb
#ifndef OTB_MANUAL_INSTANTIATION
#include "otbCudaFCLSFilter.txx"
#endif
#endif
/*=========================================================================
Program: ORFEO Toolbox
Language: C++
Date: $Date$
Version: $Revision$
Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
See OTBCopyright.txt for details.
Some parts of this code are derived from ITK. See ITKCopyright.txt
for details.
This software is distributed WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANT2ABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the above copyright notices for more information.
=========================================================================*/
#ifndef __otbCudaFCLSFilter_txx
#define __otbCudaFCLSFilter_txx
#include "otbCudaFCLSFilter.h"
extern "C" void fclsInit();
extern "C" void fclsMallocEndMembers(
float** d_endmembers,
float** d_endmembersT,
float** d_endmembersInv,
int numBands,
int nbEndmembers);
extern "C" void fclsMallocImage( float** d_image_vector,
float** d_image_unmixed,
float** d_image_unmixed_tmp,
int imageWidth,
int imageHeight,
int numBands,
int nbEndmembers);
extern "C" void fclsProcessing(float* d_image_vector,
float* d_image_unmixed,
float* d_image_unmixed_tmp,
float* d_endmembers,
float* d_endmembersT,
float* d_endmembersInv,
int numSamples,
int numBands,
int nbEndmembers,
int maxIter,
int blockSize);
extern "C" void fclsCopyHostToDevice( float* d_ptr,
const float* h_ptr,
int nb_bytes);
extern "C" void fclsCopyDeviceToHost( float* h_ptr,
const float* d_ptr,
int nb_bytes);
extern "C" void fclsFree( float* d_ptr);
namespace otb
{
template <class TInputPointSet, class TOutputImage>
CudaFCLSFilter<TInputPointSet, TOutputImage>
::CudaFCLSFilter()
{
fclsInit();
m_BlockSize = 512;
m_GPUU = 0;
m_GPUUt = 0;
m_GPUUinv = 0;
m_GPUInputImage = 0;
m_GPUOutputImage = 0;
m_GPUOutputImageTmp = 0;
m_GPUImageSize.Fill(0);
this->SetNumberOfThreads(1);
}
template<class TInputPointSet, class TOutputImage>
void
CudaFCLSFilter<TInputPointSet, TOutputImage>
::ThreadedGenerateData(
const OutputImageRegionType& outputRegionForThread,
int threadId)
{
std::cout << "Region : " << outputRegionForThread << std::endl;
//std::cout << "Region : " << outputRegionForThread << std::endl;
typename InputImageType::Pointer inputPtr = dynamic_cast<InputImageType *> (this->itk::ProcessObject::GetInput(0));
typename OutputImageType::Pointer outputPtr = dynamic_cast<OutputImageType *> (this->itk::ProcessObject::GetOutput(0));
int numBands = inputPtr->GetNumberOfComponentsPerPixel();
int numEndmembers = outputPtr->GetNumberOfComponentsPerPixel();
int imageWidth = outputPtr->GetBufferedRegion().GetSize()[0];
int imageHeight = outputPtr->GetBufferedRegion().GetSize()[1];
float * pix = inputPtr->GetBufferPointer();
float * unmixed = outputPtr->GetBufferPointer();
if( !m_GPUU )
{
fclsMallocEndMembers(&m_GPUU, &m_GPUUt, &m_GPUUinv, numBands, numEndmembers);
fclsCopyHostToDevice(m_GPUU, this->GetFunctor().GetMatrix().data_block(), numBands * numEndmembers * sizeof(float));
fclsCopyHostToDevice(m_GPUUt, this->GetFunctor().GetMatrixT().data_block(), numBands * numEndmembers * sizeof(float));
fclsCopyHostToDevice(m_GPUUinv, this->GetFunctor().GetMatrixInv().data_block(), numBands * numEndmembers * sizeof(float));
}
if (m_GPUImageSize != inputPtr->GetBufferedRegion().GetSize())
{
if(m_GPUInputImage)
{
fclsFree(m_GPUInputImage);
fclsFree(m_GPUOutputImage);
fclsFree(m_GPUOutputImageTmp);
}
fclsMallocImage(&m_GPUInputImage, &m_GPUOutputImage, &m_GPUOutputImageTmp, imageWidth, imageHeight, numBands, numEndmembers);
m_GPUImageSize = inputPtr->GetBufferedRegion().GetSize();
}
fclsCopyHostToDevice(m_GPUInputImage, pix, numBands * imageWidth * imageHeight * sizeof(float));
std::cout << "m_GPUInputImage " << m_GPUInputImage << std::endl
<< "m_GPUOutputImage" << m_GPUOutputImage << std::endl
<< "m_GPUOutputImageTmp" << m_GPUOutputImageTmp << std::endl
<< "m_GPUU" << m_GPUU << std::endl
<< "m_GPUUt" << m_GPUUt << std::endl
<< "m_GPUUinv" << m_GPUUinv << std::endl
<< "imageWidth * imageHeight" << imageWidth* imageHeight << std::endl
<< "numBands" << numBands << std::endl
<< "numEndmembers" << numEndmembers << std::endl
<< "m_MaxIter" << m_MaxIter << std::endl
<< "m_BlockSize" << m_BlockSize << std::endl;
fclsProcessing(m_GPUInputImage,
m_GPUOutputImage,
m_GPUOutputImageTmp,
m_GPUU,
m_GPUUt,
m_GPUUinv,
imageWidth * imageHeight,
numBands,
numEndmembers,
m_MaxIter,
m_BlockSize);
fclsCopyDeviceToHost(unmixed, m_GPUOutputImage, numEndmembers * imageWidth * imageHeight * sizeof(float));
}
}
#endif
/*=========================================================================
Program: ORFEO Toolbox
Language: C++
Date: $Date$
Version: $Revision$
Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
See OTBCopyright.txt for details.
This software is distributed WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the above copyright notices for more information.
=========================================================================*/
#include "otbCublasStreamingStatisticsVectorImageFilter.h"
#include "itkExceptionObject.h"
#include "otbImageFileReader.h"
#include "otbVectorImage.h"
#include <fstream>
#include "otbStreamingTraits.h"
const unsigned int Dimension = 2;
typedef float PixelType;
typedef otb::VectorImage<PixelType, Dimension> ImageType;
typedef otb::ImageFileReader<ImageType> ReaderType;
typedef otb::CublasStreamingStatisticsVectorImageFilter<ImageType> StreamingStatisticsVectorImageFilterType;
int otbCublasStreamingStatisticsVectorImageFilterNewTest(int argc, char * argv[])
{
StreamingStatisticsVectorImageFilterType::Pointer filter = StreamingStatisticsVectorImageFilterType::New();
std::cout << filter << std::endl;
return EXIT_SUCCESS;
}
int otbCublasStreamingStatisticsVectorImageFilterTest(int argc, char * argv[])
{
const char * infname = argv[1];
const char * outfname = argv[2];
StreamingStatisticsVectorImageFilterType::Pointer filter = StreamingStatisticsVectorImageFilterType::New();
ReaderType::Pointer reader = ReaderType::New();
reader->SetFileName(infname);
//filter->GetStreamer()->SetStreamingMode(otb::SET_NUMBER_OF_STREAM_DIVISIONS);
//filter->GetStreamer()->SetBufferMemorySize(100 * 1024 * 1024);
//filter->GetStreamer()->SetNumberOfStreamDivisions(2);
filter->SetInput(reader->GetOutput());
filter->Update();
std::ofstream file;
file.open(outfname);
file.precision(5);
file << "Mean: " << filter->GetMean() << std::endl;
file << "Covariance: " << filter->GetCovariance() << std::endl;
file << "Correlation: " << filter->GetCorrelation() << std::endl;
file.close();
return EXIT_SUCCESS;
}
/*=========================================================================
Program: ORFEO Toolbox
Language: C++
Date: $Date$
Version: $Revision$
Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
See OTBCopyright.txt for details.
This software is distributed WITHOUT ANY WARRANTY; without even
the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE. See the above copyright notices for more information.
=========================================================================*/
#include "otbCudaFCLSFilter.h"
#include "otbVectorImage.h"
#include "otbImageFileReader.h"
#include "otbStreamingImageFileWriter.h"
#include "otbVectorImageToMatrixImageFilter.h"
#include "otbStandardWriterWatcher.h"
#include <vnl/vnl_inverse.h>
const unsigned int Dimension = 2;
typedef float PixelType;
typedef otb::VectorImage<PixelType, Dimension> ImageType;
typedef otb::ImageFileReader<ImageType> ReaderType;
typedef otb::CudaFCLSFilter<ImageType,ImageType> FullyConstrainedLeastSquareSolverType;
typedef otb::VectorImageToMatrixImageFilter<ImageType> VectorImageToMatrixImageFilterType;
typedef otb::StreamingImageFileWriter<ImageType> WriterType;
int otbCudaFullyConstrainedLeastSquareImageFilterNewTest(int argc, char * argv[])
{
FullyConstrainedLeastSquareSolverType::Pointer filter = FullyConstrainedLeastSquareSolverType::New();
std::cout << filter << std::endl;
return EXIT_SUCCESS;
}
int otbCudaFullyConstrainedLeastSquareImageFilterTest(int argc, char * argv[])
{
const char * inputImage = argv[1];
const char * inputEndmembers = argv[2];
const char * outputImage = argv[3];
int maxIter = atoi(argv[4]);
int blockSize = atoi(argv[5]);
int streamSizeMB = atoi(argv[6]);
ReaderType::Pointer readerImage = ReaderType::New();
readerImage->SetFileName(inputImage);
ReaderType::Pointer readerEndMembers = ReaderType::New();
readerEndMembers->SetFileName(inputEndmembers);
VectorImageToMatrixImageFilterType::Pointer endMember2Matrix = VectorImageToMatrixImageFilterType::New();
endMember2Matrix->SetInput(readerEndMembers->GetOutput());
endMember2Matrix->Update();
typedef VectorImageToMatrixImageFilterType::MatrixType MatrixType;
MatrixType endMembers = endMember2Matrix->GetMatrix();
MatrixType pinv = vnl_matrix_inverse<PixelType>(endMembers);
FullyConstrainedLeastSquareSolverType::Pointer unmixer = \
FullyConstrainedLeastSquareSolverType::New();
unmixer->SetInput(readerImage->GetOutput());
unmixer->SetMatrix(endMembers);
unmixer->SetMaxIter(maxIter);
unmixer->SetBlockSize(blockSize);
WriterType::Pointer writer = WriterType::New();
writer->SetFileName(outputImage);
writer->SetInput(unmixer->GetOutput());
writer->SetAutomaticStrippedStreaming(streamSizeMB * 1024 * 1024);
otb::StandardWriterWatcher w4(writer,unmixer,"FullyConstrainedLeastSquare");
writer->Update();
return EXIT_SUCCESS;
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment