diff --git a/CMakeLists.txt b/CMakeLists.txt index b99a2f814ef782ba94f31ac30dd0733203e7f057..28c2db78a72b9dbe6260c1749ce96e28244e1e89 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -31,6 +31,7 @@ INCLUDE_DIRECTORIES( ${Hyperspectral_SOURCE_DIR}/Code/BasicFilters ${Hyperspectral_SOURCE_DIR}/Code/BasicFilters/GPUCuda ${Hyperspectral_SOURCE_DIR}/Code/Hyperspectral +${Hyperspectral_SOURCE_DIR}/Code/Hyperspectral/GPUCuda ${Hyperspectral_SOURCE_DIR}/Code/Vahine ${Hyperspectral_SOURCE_DIR}/Code/RX ) diff --git a/Code/Hyperspectral/CMakeLists.txt b/Code/Hyperspectral/CMakeLists.txt index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..54297795346c792cf088d84d5ba8dcab3b1fb511 100644 --- a/Code/Hyperspectral/CMakeLists.txt +++ b/Code/Hyperspectral/CMakeLists.txt @@ -0,0 +1 @@ +ADD_SUBDIRECTORY(GPUCuda) diff --git a/Code/Hyperspectral/GPUCuda/CMakeLists.txt b/Code/Hyperspectral/GPUCuda/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..2c935cf55df391efba0e7dea5f5c83cbd637c3a4 --- /dev/null +++ b/Code/Hyperspectral/GPUCuda/CMakeLists.txt @@ -0,0 +1,13 @@ +FILE(GLOB OTBHyperspectralGPUCuda_CUDASRCS "*.cu" ) +CUDA_ADD_LIBRARY(OTBHyperspectralGPUCuda_nvcc ${OTBHyperspectralGPUCuda_CUDASRCS}) + + +FILE(GLOB OTBHyperspectralGPUCuda_CPPSRCS "*.cxx" ) + +ADD_LIBRARY(OTBHyperspectralGPUCuda ${OTBHyperspectralGPUCuda_CPPSRCS}) +TARGET_LINK_LIBRARIES (OTBHyperspectralGPUCuda OTBHyperspectralGPUCuda_nvcc OTBCommon OTBBasicFilters ITKBasicFilters ) + +IF(OTB_LIBRARY_PROPERTIES) + SET_TARGET_PROPERTIES(OTBHyperspectralGPUCuda PROPERTIES ${OTB_LIBRARY_PROPERTIES}) +ENDIF(OTB_LIBRARY_PROPERTIES) + diff --git a/Code/Hyperspectral/GPUCuda/foo.cxx b/Code/Hyperspectral/GPUCuda/foo.cxx new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/Code/Hyperspectral/GPUCuda/otbCudaFCLSFilter.cu b/Code/Hyperspectral/GPUCuda/otbCudaFCLSFilter.cu new file mode 100644 index 0000000000000000000000000000000000000000..ee92148b23cac03d6a51b194417ab3ccbf3359aa --- /dev/null +++ b/Code/Hyperspectral/GPUCuda/otbCudaFCLSFilter.cu @@ -0,0 +1,356 @@ +/* + * otbCudaFCLSFilter.cu + * + */ + +#include <cstdio> +#include <cuda.h> +#include <driver_types.h> +#include <vector_types.h> + +//Block thread size +#define BLOCK_SIZE (16*16) + +#define num_bands 152 +#define num_endmembers 13 + +#define CUDA_SAFE_CALL( call ) \ + do \ + { \ + cudaError_t status = call; \ + check_error(status); \ + } while(0) + +void check_error(cudaError_t error) +{ + +#define ELSEIF_CUDA_HANDLE_ERROR_CODE( code ) \ + else if(error == code) \ + printf("Cuda error : %d %s\n", error, #code); + + if(error == cudaSuccess) {} + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorMissingConfiguration) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorMemoryAllocation) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInitializationError) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorLaunchFailure) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorPriorLaunchFailure) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorLaunchTimeout) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorLaunchOutOfResources) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidDeviceFunction) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidConfiguration) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidDevice) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidValue) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidPitchValue) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidSymbol) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorMapBufferObjectFailed) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorUnmapBufferObjectFailed) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidHostPointer) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidDevicePointer) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidTexture) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidTextureBinding) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidChannelDescriptor) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorInvalidMemcpyDirection) + ELSEIF_CUDA_HANDLE_ERROR_CODE(cudaErrorAddressOfConstant) + +#undef ELSEIF_HANDLE_ERROR_CODE +} + + +__global__ void UnconstrainedKernel(float* d_image_vector, + float* d_image_unmixed, + float* d_endmembersInv, + int numSamples) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + float l_abu[num_endmembers]; + + if (x < numSamples) + { + float l_pixel[num_bands]; + + for (int t=0; t < num_bands; t++) + { + l_pixel[t] = d_image_vector[ t + num_bands*x ]; + } + + for(int e = 0; e < num_endmembers; e++) + { + l_abu[e] = 0; + for (int k = 0; k < num_bands; k++) + { + l_abu[e] += d_endmembersInv[k + e*num_bands] * l_pixel[k]; + } + d_image_unmixed[e + num_endmembers*x] = l_abu[e]; + } + } + //syncthreads(); +} + + +__global__ void ISRAKernel(float* d_image_vector, float* d_image_unmixed, float* d_endmembers, + float* d_endmembersT, int numSamples, int maxiter) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + + if (x < numSamples) + { + float l_pixel[num_bands]; + float l_abu[num_endmembers]; + + float numerator; + float denominator; + float dot = 0.0; + + for (int t=0; t < num_bands; t++) + { + l_pixel[t] = d_image_vector[ t + num_bands*x ]; + } + + for (int t=0; t < num_endmembers; t++) + { + l_abu[t] = d_image_unmixed[t + num_endmembers*x]; + } + + for(int it = 0; it < maxiter; it++) + { + for(int e = 0; e < num_endmembers; e++) + { + numerator = 0; + denominator = 0; + + // For all bands + for (int k = 0; k < num_bands; k++) + { + numerator = numerator + d_endmembers[k + e*num_bands] * l_pixel[k]; + + // Calculate dot product + dot = 0; + for (int s = 0; s < num_endmembers; s++) + { + dot += d_endmembersT[s + k*num_endmembers] * l_abu[s]; + } + + denominator += dot * d_endmembers[k + e*num_bands]; + + } + + l_abu[e] = l_abu[e] * (numerator/denominator); + l_abu[e] = numerator/denominator; + } + } + + for(int k=0; k < num_endmembers;k++) + { + d_image_unmixed[k + num_endmembers*x] = l_abu[k]; + } + } + //syncthreads(); +} + +__global__ void FCLSUKernel(float* d_image_unmixed,int numSamples) +{ + float sum; + // float pixel[num_endmembers]; + + int x = blockIdx.x * blockDim.x + threadIdx.x; + if (x < numSamples) + { + sum = 0; + for(int i = 0; i < num_endmembers; i++) + { + //pixel[i] = d_image_unmixed[i + num_endmembers*x ]; + //sum += pixel[i]; + sum += d_image_unmixed[ i + num_endmembers*x ]; + } + + for (int k = 0; k < num_endmembers; k++) + { + //pixel[k] = pixel[k] / sum; + d_image_unmixed[k + num_endmembers*x] = d_image_unmixed[k + num_endmembers*x ] / sum; + } + } + //syncthreads(); +} + + +__global__ void UnconstrainedISRAKernel(float* d_image_vector, + float* d_image_unmixed, + float* d_endmembers, + float* d_endmembersT, + float* d_endmembersInv, + int numSamples, + int maxiter) +{ + int x = blockIdx.x * blockDim.x + threadIdx.x; + + float l_pixel[num_bands]; + float l_abu[num_endmembers]; + float l_abu2[num_endmembers]; + + float numerator = 0; + float denominator = 0; + float dot = 0; + + if (x < numSamples) + { + // Unconstrained + for (int t=0; t < num_bands; t++) + { + l_pixel[t] = d_image_vector[ t + num_bands*x ]; + } + + for(int e = 0; e < num_endmembers; e++) + { + l_abu[e] = 0; + for (int k1 = 0; k1 < num_bands; k1++) + { + l_abu[e] += d_endmembersInv[k1 + e*num_bands] * l_pixel[k1]; + } + } + + // ISRA + /* + for(int it = 0; it < maxiter; it++) + { + for(int e = 0; e < num_endmembers; e++) + { + numerator = 0; + denominator = 0; + + // For all bands + for (int k = 0; k < num_bands; k++) + { + numerator = numerator + d_endmembers[k + e*num_bands] * l_pixel[k]; + + // Calculate dot product + dot = 0; + for (int s = 0; s < num_endmembers; s++) + { + dot += d_endmembersT[s + k*num_endmembers] * l_abu[s]; + } + + denominator += dot * d_endmembers[k + e*num_bands]; + //denominator = dot; + } + l_abu2[e] = l_abu[e] * (numerator/denominator); + } + + for(int e = 0; e < num_endmembers; e++) + { + l_abu[e] = l_abu2[e]; + } + } + syncthreads(); + */ + + for(int e = 0; e < num_endmembers; e++) + { + d_image_unmixed[e + num_endmembers*x] = l_abu[e]; + } + } +} + +extern "C" void fclsProcessing( float* d_image_vector, + float* d_image_unmixed, + float* d_endmembers, + float* d_endmembersT, + float* d_endmembersInv, + int numSamples, + int numBands, + int nbEndmembers, + int maxIter, + int blockSize) +{ + dim3 dimBlock( blockSize ); + dim3 dimGrid ( (numSamples + blockSize) / blockSize ); + + printf( " %d \n " , (numSamples + blockSize) / blockSize ); + + UnconstrainedISRAKernel<<<dimGrid,dimBlock>>>(d_image_vector, d_image_unmixed, d_endmembers, d_endmembersT, d_endmembersInv, numSamples, maxIter); + cudaThreadSynchronize(); + + //UnconstrainedKernel<<<dimGrid,dimBlock>>>(d_image_vector, d_image_unmixed, d_endmembersInv, numSamples); + //cudaThreadSynchronize(); + + //ISRAKernel<<<dimGrid,dimBlock>>>(d_image_vector, d_image_unmixed, d_endmembers, d_endmembersT, numSamples, maxIter); + //cudaThreadSynchronize(); + + //FCLSUKernel<<<dimGrid,dimBlock>>>(d_image_unmixed, numSamples); + //cudaThreadSynchronize(); +} + + +extern "C" void fclsMallocEndMembers( + float** d_endmembers, + float** d_endmembersT, + float** d_endmembersInv, + int numBands, + int nbEndmembers) +{ + + float* d_endmembers_ = 0; + float* d_endmembersT_ = 0; + float* d_endmembersInv_ = 0; + +// printf( "Allocating %d KB\n", numBands*nbEndmembers*sizeof(float) / 1024 ); + CUDA_SAFE_CALL( cudaMalloc((void**) &d_endmembers_, nbEndmembers*numBands*sizeof(float))); + +// printf( "Allocating %d KB\n", numBands*nbEndmembers*sizeof(float) / 1024 ); + CUDA_SAFE_CALL( cudaMalloc((void**) &d_endmembersT_, nbEndmembers*numBands*sizeof(float))); + +// printf( "Allocating %d KB\n", numBands*nbEndmembers*sizeof(float) / 1024 ); + CUDA_SAFE_CALL( cudaMalloc((void**) &d_endmembersInv_, nbEndmembers*numBands*sizeof(float))); + + + *d_endmembers = d_endmembers_; + *d_endmembersT = d_endmembersT_; + *d_endmembersInv = d_endmembersInv_; +} + +extern "C" void fclsMallocImage( float** d_image_vector, + float** d_image_unmixed, + int imageWidth, + int imageHeight, + int numBands, + int nbEndmembers) +{ + float* d_image_vector_ = 0; + float* d_image_unmixed_ = 0; + +// printf( "Allocating %d KB\n", numBands*imageWidth*imageHeight*sizeof(float) / 1024); + CUDA_SAFE_CALL(cudaMalloc((void**) &d_image_vector_, numBands*imageWidth*imageHeight*sizeof(float))); + +// printf( "Allocating %d KB\n", nbEndmembers*imageWidth*imageHeight*sizeof(float) / 1024 /1024); + CUDA_SAFE_CALL( cudaMalloc((void**) &d_image_unmixed_, nbEndmembers*imageWidth*imageHeight*sizeof(float))); + + *d_image_vector = d_image_vector_; + *d_image_unmixed = d_image_unmixed_; +} + + + +extern "C" void fclsCopyHostToDevice( float* d_ptr, + const float* h_ptr, + int nb_bytes) +{ + CUDA_SAFE_CALL( cudaMemcpy(d_ptr, h_ptr, nb_bytes, cudaMemcpyHostToDevice)); +} + +extern "C" void fclsCopyDeviceToHost( float* h_ptr, + const float* d_ptr, + int nb_bytes) +{ + CUDA_SAFE_CALL( cudaMemcpy(h_ptr, d_ptr, nb_bytes, cudaMemcpyDeviceToHost)); +} + +extern "C" void fclsFree( float* d_ptr) +{ + CUDA_SAFE_CALL( cudaFree(d_ptr) ); +} + +extern "C" void fclsInit( void ) +{ + if (cuInit(0) != CUDA_SUCCESS) + exit (0); +} + diff --git a/Code/Hyperspectral/GPUCuda/otbCudaFCLSFilter.h b/Code/Hyperspectral/GPUCuda/otbCudaFCLSFilter.h new file mode 100644 index 0000000000000000000000000000000000000000..248f1ac743195868fd8154484e09122ba3098863 --- /dev/null +++ b/Code/Hyperspectral/GPUCuda/otbCudaFCLSFilter.h @@ -0,0 +1,176 @@ +/*========================================================================= + + Program: ORFEO Toolbox + Language: C++ + Date: $Date$ + Version: $Revision$ + + + Copyright (c) Centre National d'Etudes Spatiales. All rights reserved. + See OTBCopyright.txt for details. + + Some parts of this code are derived from ITK. See ITKCopyright.txt + for details. + + + This software is distributed WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANT2ABILITY or FITNESS FOR A PARTICULAR + PURPOSE. See the above copyright notices for more information. + +=========================================================================*/ +#ifndef __otbCudaFCLSFilter_h +#define __otbCudaFCLSFilter_h + +#include "itkMacro.h" +#include "otbUnaryFunctorImageFilter.h" +#include "vnl/algo/vnl_svd.h" + +namespace otb +{ +namespace Functor +{ +template <class TInputPixel, class TOutputPixel> +class Dummy +{ +public: + + typedef vnl_vector<float> VectorType; + typedef vnl_matrix<float> MatrixType; + + Dummy(){} + virtual ~Dummy(){} + + void SetMatrix(const MatrixType& U) + { + m_U = U; + m_Ut = m_U.transpose(); + m_Uinv = vnl_svd<float>(m_U).inverse(); + } + + unsigned int GetOutputSize() const + { + return m_U.cols(); + } + + const MatrixType& GetMatrix() const + { + return m_U; + } + + const MatrixType& GetMatrixT() const + { + return m_Ut; + } + + const MatrixType& GetMatrixInv() const + { + return m_Uinv; + } + + bool operator !=(const Dummy&) const + { + return true; + } + bool operator ==(const Dummy& other) const + { + return !(*this != other); + } + + inline TOutputPixel operator ()(const TInputPixel& A) const + { + TOutputPixel B; + return static_cast<TOutputPixel>(B); + } + +private: + MatrixType m_U; + MatrixType m_Ut; + MatrixType m_Uinv; +}; +} + + +template<class TInputImage, class TOutputImage> +class ITK_EXPORT CudaFCLSFilter: public otb::UnaryFunctorImageFilter<TInputImage, TOutputImage, Functor::Dummy< + typename TInputImage::PixelType, typename TOutputImage::PixelType> > +{ +public: + /** Standard class typedefs. */ + typedef CudaFCLSFilter Self; + typedef itk::UnaryFunctorImageFilter<TInputImage, TOutputImage,Functor::Dummy< + typename TInputImage::PixelType, typename TOutputImage::PixelType> > Superclass; + typedef itk::SmartPointer<Self> Pointer; + typedef itk::SmartPointer<const Self> ConstPointer; + + /** Method for creation through the object factory. */ + itkNewMacro(Self); + + /** Run-time type information (and related methods). */ + itkTypeMacro(CudaFCLSFilter, UnaryFunctorImageFilter); + + typedef TInputImage InputImageType; + typedef TOutputImage OutputImageType; + + typedef itk::ImageRegionConstIterator<InputImageType> InputImageConstIterator; + typedef itk::ImageRegionIterator<InputImageType> InputImageIterator; + + typedef itk::ImageRegionConstIterator<OutputImageType> OutputImageConstIterator; + typedef itk::ImageRegionIterator<OutputImageType> OutputImageIterator; + typedef typename Superclass::OutputImageRegionType OutputImageRegionType; + + typedef typename OutputImageType::RegionType RegionType; + typedef typename OutputImageType::SizeType SizeType; + + typedef Functor::Dummy< + typename TInputImage::PixelType, + typename TOutputImage::PixelType + > FunctorType; + + typedef typename FunctorType::VectorType VectorType; + typedef typename FunctorType::MatrixType MatrixType; + + itkSetMacro(BlockSize, int); + itkGetMacro(BlockSize, int); + + itkSetMacro(MaxIter, int); + itkGetMacro(MaxIter, int); + + void SetMatrix(const MatrixType& U) + { + this->GetFunctor().SetMatrix(U); + this->Modified(); + } + +protected: + /// Constructor + CudaFCLSFilter(); + + /// Destructor + virtual ~CudaFCLSFilter() {} + + void ThreadedGenerateData(const OutputImageRegionType& outputRegionForThread, + int threadId ); + +private: + CudaFCLSFilter(const Self &); //purposely not implemented + void operator =(const Self&); //purposely not implemented + + int m_BlockSize; + int m_MaxIter; + + float* m_GPUInputImage; + float* m_GPUOutputImage; + SizeType m_GPUImageSize; + + float* m_GPUU; + float* m_GPUUt; + float* m_GPUUinv; + +}; +} // end namespace otb + +#ifndef OTB_MANUAL_INSTANTIATION +#include "otbCudaFCLSFilter.txx" +#endif + +#endif diff --git a/Code/Hyperspectral/GPUCuda/otbCudaFCLSFilter.txx b/Code/Hyperspectral/GPUCuda/otbCudaFCLSFilter.txx new file mode 100644 index 0000000000000000000000000000000000000000..75a4a2084c6b07bfeab0d35112df074b1cf4fd76 --- /dev/null +++ b/Code/Hyperspectral/GPUCuda/otbCudaFCLSFilter.txx @@ -0,0 +1,139 @@ +/*========================================================================= + + Program: ORFEO Toolbox + Language: C++ + Date: $Date$ + Version: $Revision$ + + + Copyright (c) Centre National d'Etudes Spatiales. All rights reserved. + See OTBCopyright.txt for details. + + Some parts of this code are derived from ITK. See ITKCopyright.txt + for details. + + + This software is distributed WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANT2ABILITY or FITNESS FOR A PARTICULAR + PURPOSE. See the above copyright notices for more information. + +=========================================================================*/ +#ifndef __otbCudaFCLSFilter_txx +#define __otbCudaFCLSFilter_txx + +#include "otbCudaFCLSFilter.h" + +extern "C" void fclsInit(); + +extern "C" void fclsMallocEndMembers( + float** d_endmembers, + float** d_endmembersT, + float** d_endmembersInv, + int numBands, + int nbEndmembers); + +extern "C" void fclsMallocImage( float** d_image_vector, + float** d_image_unmixed, + int imageWidth, + int imageHeight, + int numBands, + int nbEndmembers); + +extern "C" void fclsProcessing(float* d_image_vector, + float* d_image_unmixed, + float* d_endmembers, + float* d_endmembersT, + float* d_endmembersInv, + int numSamples, + int numBands, + int nbEndmembers, + int maxIter, + int blockSize); + +extern "C" void fclsCopyHostToDevice( float* d_ptr, + const float* h_ptr, + int nb_bytes); + +extern "C" void fclsCopyDeviceToHost( float* h_ptr, + const float* d_ptr, + int nb_bytes); + +extern "C" void fclsFree( float* d_ptr); + +namespace otb +{ + +template <class TInputPointSet, class TOutputImage> +CudaFCLSFilter<TInputPointSet, TOutputImage> +::CudaFCLSFilter() +{ + fclsInit(); + m_BlockSize = 512; + m_GPUU = 0; + m_GPUUt = 0; + m_GPUUinv = 0; + m_GPUInputImage = 0; + m_GPUOutputImage = 0; + m_GPUImageSize.Fill(0); + this->SetNumberOfThreads(1); +} + +template<class TInputPointSet, class TOutputImage> +void +CudaFCLSFilter<TInputPointSet, TOutputImage> +::ThreadedGenerateData( + const OutputImageRegionType& outputRegionForThread, + int threadId) +{ + //std::cout << "Region : " << outputRegionForThread << std::endl; + typename InputImageType::Pointer inputPtr = dynamic_cast<InputImageType *> (this->itk::ProcessObject::GetInput(0)); + typename OutputImageType::Pointer outputPtr = dynamic_cast<OutputImageType *> (this->itk::ProcessObject::GetOutput(0)); + + int numBands = inputPtr->GetNumberOfComponentsPerPixel(); + int numEndmembers = outputPtr->GetNumberOfComponentsPerPixel(); + + int imageWidth = outputPtr->GetBufferedRegion().GetSize()[0]; + int imageHeight = outputPtr->GetBufferedRegion().GetSize()[1]; + + float * pix = inputPtr->GetBufferPointer(); + float * unmixed = outputPtr->GetBufferPointer(); + + if( !m_GPUU ) + { + fclsMallocEndMembers(&m_GPUU, &m_GPUUt, &m_GPUUinv, numBands, numEndmembers); + fclsCopyHostToDevice(m_GPUU, this->GetFunctor().GetMatrix().data_block(), numBands * numEndmembers * sizeof(float)); + fclsCopyHostToDevice(m_GPUUt, this->GetFunctor().GetMatrixT().data_block(), numBands * numEndmembers * sizeof(float)); + fclsCopyHostToDevice(m_GPUUinv, this->GetFunctor().GetMatrixInv().data_block(), numBands * numEndmembers * sizeof(float)); + } + + if (m_GPUImageSize != inputPtr->GetBufferedRegion().GetSize()) + { + if(m_GPUInputImage) + { + fclsFree(m_GPUInputImage); + fclsFree(m_GPUOutputImage); + } + + fclsMallocImage(&m_GPUInputImage, &m_GPUOutputImage, imageWidth, imageHeight, numBands, numEndmembers); + m_GPUImageSize = inputPtr->GetBufferedRegion().GetSize(); + } + + fclsCopyHostToDevice(m_GPUInputImage, pix, numBands * imageWidth * imageHeight * sizeof(float)); + + fclsProcessing(m_GPUInputImage, + m_GPUOutputImage, + m_GPUU, + m_GPUUt, + m_GPUUinv, + imageWidth * imageHeight, + numBands, + numEndmembers, + m_MaxIter, + m_BlockSize); + + fclsCopyDeviceToHost(unmixed, m_GPUOutputImage, numEndmembers * imageWidth * imageHeight * sizeof(float)); +} + +} + +#endif diff --git a/Testing/CMakeLists.txt b/Testing/CMakeLists.txt index b34eaa27bf0f76991e95441980351cff94eb9173..abe5aa77fc06925cb9cd8ddf2b88c44ea685f2d1 100644 --- a/Testing/CMakeLists.txt +++ b/Testing/CMakeLists.txt @@ -109,7 +109,7 @@ ADD_TEST(bfTvFullyConstrainedLeastSquareImageFilter_SYNTHETIC otbFullyConstrainedLeastSquareImageFilterTest ${DATA}/SYNTHETIC100/hsi_cube.tif ${DATA}/SYNTHETIC100/endmembers.tif - ${TEMP}/bfFullyConstrainedLeastSquareImageFilter.tif) + ${TEMP}/bfFullyConstrainedLeastSquareImageFilter_SYNTHETIC.tif) ADD_TEST(bfTvFullyConstrainedLeastSquareImageFilter_AVIRIS512 @@ -202,10 +202,11 @@ IF( OTB_USE_CUBLAS ) SET( otbHyperCublasTests1_SRC otbCublasStreamingStatisticsVectorImageFilter.cxx + otbCudaFCLSImageFilter.cxx ) ADD_EXECUTABLE(otbHyperCublasTests1 ${otbHyperCublasTests1_SRC} otbHyperCublasTests1.cxx) -TARGET_LINK_LIBRARIES(otbHyperCublasTests1 OTBCommon OTBIO OTBBasicFilters OTBTesting OTBBasicFiltersGPUCuda) +TARGET_LINK_LIBRARIES(otbHyperCublasTests1 OTBCommon OTBIO OTBBasicFilters OTBTesting OTBBasicFiltersGPUCuda OTBHyperspectralGPUCuda ) ADD_TEST(bfTuCublasStreamingStatisticsVectorImageFilterNew ${TESTEXE_DIR}/otbHyperCublasTests1 @@ -223,6 +224,33 @@ ADD_TEST(bfTvCublasStreamingStatisticsVectorImageFilter_AVIRIS512 ${DATA}/AVIRIS/extract512/f970619t01p02_r02_sc01_goodbands.hdr ${TEMP}/aviris512_stats_cublas.txt) + +ADD_TEST(bfTuCudaFullyConstrainedLeastSquareImageFilterNew + ${TESTEXE_DIR}/otbHyperCublasTests1 + otbCudaFullyConstrainedLeastSquareImageFilterNewTest) + +ADD_TEST(bfTvCudaFullyConstrainedLeastSquareImageFilter_SYNTHETIC + ${TESTEXE_DIR}/otbHyperCublasTests1 + otbCudaFullyConstrainedLeastSquareImageFilterTest + ${DATA}/SYNTHETIC100/hsi_cube.tif + ${DATA}/SYNTHETIC100/endmembers.tif + ${TEMP}/bfCudaFullyConstrainedLeastSquareImageFilter_SYNTHETIC.hdr + 500 # maxiter + 512 # GPU block size + 1 # stream size in MB + ) + +ADD_TEST(bfTvCudaFullyConstrainedLeastSquareImageFilter_AVIRIS512_block512 + ${TESTEXE_DIR}/otbHyperCublasTests1 + otbCudaFullyConstrainedLeastSquareImageFilterTest + ${DATA}/AVIRIS/extract512/f970619t01p02_r02_sc01_goodbands.hdr + ${TEMP}/aviris_vcaimagefilter_13.hdr + ${TEMP}/bfCudaFullyConstrainedLeastSquareImageFilter_AVIRIS512.hdr + 500 # maxiter + 512 # GPU block size + 1 # stream size in MB + ) + ENDIF( OTB_USE_CUBLAS ) ################### diff --git a/Testing/otbCudaFCLSImageFilter.cxx b/Testing/otbCudaFCLSImageFilter.cxx new file mode 100644 index 0000000000000000000000000000000000000000..a611d8a2e787b0690c2dc2cf3354d0e415b4cf6f --- /dev/null +++ b/Testing/otbCudaFCLSImageFilter.cxx @@ -0,0 +1,84 @@ +/*========================================================================= + + Program: ORFEO Toolbox + Language: C++ + Date: $Date$ + Version: $Revision$ + + + Copyright (c) Centre National d'Etudes Spatiales. All rights reserved. + See OTBCopyright.txt for details. + + + This software is distributed WITHOUT ANY WARRANTY; without even + the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR + PURPOSE. See the above copyright notices for more information. + +=========================================================================*/ +#include "otbCudaFCLSFilter.h" + +#include "otbVectorImage.h" +#include "otbImageFileReader.h" +#include "otbStreamingImageFileWriter.h" +#include "otbVectorImageToMatrixImageFilter.h" +#include "otbStandardWriterWatcher.h" +#include <vnl/vnl_inverse.h> + +const unsigned int Dimension = 2; +typedef float PixelType; + +typedef otb::VectorImage<PixelType, Dimension> ImageType; +typedef otb::ImageFileReader<ImageType> ReaderType; +typedef otb::CudaFCLSFilter<ImageType,ImageType> FullyConstrainedLeastSquareSolverType; +typedef otb::VectorImageToMatrixImageFilter<ImageType> VectorImageToMatrixImageFilterType; +typedef otb::StreamingImageFileWriter<ImageType> WriterType; + +int otbCudaFullyConstrainedLeastSquareImageFilterNewTest(int argc, char * argv[]) +{ + FullyConstrainedLeastSquareSolverType::Pointer filter = FullyConstrainedLeastSquareSolverType::New(); + std::cout << filter << std::endl; + return EXIT_SUCCESS; +} + +int otbCudaFullyConstrainedLeastSquareImageFilterTest(int argc, char * argv[]) +{ + const char * inputImage = argv[1]; + const char * inputEndmembers = argv[2]; + const char * outputImage = argv[3]; + int maxIter = atoi(argv[4]); + int blockSize = atoi(argv[5]); + int streamSizeMB = atoi(argv[6]); + + ReaderType::Pointer readerImage = ReaderType::New(); + readerImage->SetFileName(inputImage); + + ReaderType::Pointer readerEndMembers = ReaderType::New(); + readerEndMembers->SetFileName(inputEndmembers); + VectorImageToMatrixImageFilterType::Pointer endMember2Matrix = VectorImageToMatrixImageFilterType::New(); + endMember2Matrix->SetInput(readerEndMembers->GetOutput()); + + endMember2Matrix->Update(); + + typedef VectorImageToMatrixImageFilterType::MatrixType MatrixType; + MatrixType endMembers = endMember2Matrix->GetMatrix(); + MatrixType pinv = vnl_matrix_inverse<PixelType>(endMembers); + + FullyConstrainedLeastSquareSolverType::Pointer unmixer = \ + FullyConstrainedLeastSquareSolverType::New(); + + unmixer->SetInput(readerImage->GetOutput()); + unmixer->SetMatrix(endMembers); + unmixer->SetMaxIter(maxIter); + unmixer->SetBlockSize(blockSize); + + WriterType::Pointer writer = WriterType::New(); + writer->SetFileName(outputImage); + writer->SetInput(unmixer->GetOutput()); + writer->SetBufferMemorySize(streamSizeMB * 1024 * 1024); + + otb::StandardWriterWatcher w4(writer,unmixer,"FullyConstrainedLeastSquare"); + + writer->Update(); + + return EXIT_SUCCESS; +} diff --git a/Testing/otbHyperCublasTests1.cxx b/Testing/otbHyperCublasTests1.cxx index 0f6d19be70f05ad146ebed75ef50e02f4abd686a..c832283500936a948d523c1a388d0abb22f9fe12 100644 --- a/Testing/otbHyperCublasTests1.cxx +++ b/Testing/otbHyperCublasTests1.cxx @@ -25,5 +25,6 @@ void RegisterTests() { REGISTER_TEST(otbCublasStreamingStatisticsVectorImageFilterNewTest); REGISTER_TEST(otbCublasStreamingStatisticsVectorImageFilterTest); -// REGISTER_TEST(otbCudaCorrelationNoStreamingTest); + REGISTER_TEST(otbCudaFullyConstrainedLeastSquareImageFilterNewTest); + REGISTER_TEST(otbCudaFullyConstrainedLeastSquareImageFilterTest); }