diff --git a/Code/FeatureExtraction/CMakeLists.txt b/Code/FeatureExtraction/CMakeLists.txt
index ed0c66843a6e1f35197e6222330d497bf0ed9f53..36ce2adb9327e6a30522758535e994674bc9309f 100644
--- a/Code/FeatureExtraction/CMakeLists.txt
+++ b/Code/FeatureExtraction/CMakeLists.txt
@@ -4,7 +4,7 @@
 FILE(GLOB OTBFeatureExtraction_SRCS "*.cxx" )
 
 ADD_LIBRARY(OTBFeatureExtraction ${OTBFeatureExtraction_SRCS})
-TARGET_LINK_LIBRARIES (OTBFeatureExtraction OTBCommon)
+TARGET_LINK_LIBRARIES (OTBFeatureExtraction OTBCommon otbsiftfast)
 
 INSTALL(TARGETS OTBFeatureExtraction
 RUNTIME DESTINATION ${OTB_INSTALL_BIN_DIR} COMPONENT RuntimeLibraries
diff --git a/Code/FeatureExtraction/otbImageToHessianDeterminantImageFilter.h b/Code/FeatureExtraction/otbImageToHessianDeterminantImageFilter.h
index 6a7341b7d4186dd05bbacf35094d9eeac43b4ef9..da1c141db0978fa2d51f8fc766bc6f6c346ca07d 100644
--- a/Code/FeatureExtraction/otbImageToHessianDeterminantImageFilter.h
+++ b/Code/FeatureExtraction/otbImageToHessianDeterminantImageFilter.h
@@ -56,9 +56,9 @@ namespace otb
      */
     inline TOutput operator()(const TInput& input)
       {
-        return static_cast<TOutput>(input[0]*input[1] - input[2]*input[2]);
-
+	      return static_cast<TOutput>(input[0]*input[1] - input[2]*input[2]);
       }
+    
 
     bool operator !=(const HessianDeterminant) const
       {
diff --git a/Code/FeatureExtraction/otbKeyPointSetsMatchingFilter.h b/Code/FeatureExtraction/otbKeyPointSetsMatchingFilter.h
new file mode 100755
index 0000000000000000000000000000000000000000..449d8a4c722d3acbdf92bc706217baf7b44ea8fc
--- /dev/null
+++ b/Code/FeatureExtraction/otbKeyPointSetsMatchingFilter.h
@@ -0,0 +1,135 @@
+/*=========================================================================
+
+Program:   ORFEO Toolbox
+Language:  C++
+Date:      $Date$
+Version:   $Revision$
+
+
+Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
+See OTBCopyright.txt for details.
+
+
+This software is distributed WITHOUT ANY WARRANTY; without even 
+the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+PURPOSE.  See the above copyright notices for more information.
+
+=========================================================================*/
+#ifndef __otbKeyPointSetsMatchingFilter_h
+#define __otbKeyPointSetsMatchingFilter_h
+
+#include "otbObjectList.h"
+#include "otbObjectListSource.h"
+#include "otbLandmark.h"
+#include "itkEuclideanDistance.h"
+
+namespace otb
+{
+  /** \class KeyPointSetsMatchingFilter
+   *  \brief This class matches two point sets according to their associated data.
+   *
+   *   The matching criteria is that the ratio between the distance to the first nearest neighbor and the 
+   *   second nearest neighbor is lower than the distance threshold. The distance used can be set via the TDistance
+   *   template parameters. It has to implement the Evaluate() method (see \doxygen{EuclideanDistance} for more details).
+   *
+   *   By default, the algorithm tries to match points from pointset 1 to points from pointset 2. If back matching is activated,
+   *   it will aslo try to match points from pointset 2 to points from pointset 2, and discard matches that do not appear both in
+   *   forward and backward matching. 
+   *
+   *   Matches are stored in a landmark object containing both matched points and point data. The landmark data will hold the distance value
+   *   between the data.
+   *
+   *   \sa Landmark
+   *   \sa PointSet
+   *   \sa EuclideanDistance
+   */
+template < class TPointSet, class TDistance = itk::Statistics::EuclideanDistance< typename TPointSet::PixelType > >
+class ITK_EXPORT KeyPointSetsMatchingFilter
+: public ObjectListSource<  ObjectList< Landmark< typename TPointSet::PointType, typename TPointSet::PixelType,double> > >
+  {
+    public:
+    /// standard class typedefs
+    typedef KeyPointSetsMatchingFilter             Self;
+    typedef ObjectListSource<  ObjectList< 
+    Landmark< typename TPointSet::PointType, 
+    typename TPointSet::PixelType,double> > >      Superclass;
+    typedef itk::SmartPointer<Self>                Pointer;
+    typedef itk::SmartPointer<const Self>          ConstPointer;
+
+    /// template typedefs
+    typedef TPointSet                                      PointSetType;
+    typedef typename PointSetType::Pointer                 PointSetPointerType;
+    typedef typename PointSetType::PointType               PointType;
+    typedef typename PointSetType::PixelType               PointDataType;
+    typedef typename PointSetType::PointsContainer         PointsContainerType;
+    typedef typename PointsContainerType::ConstIterator    PointsIteratorType;
+    typedef typename PointSetType::PointDataContainer      PointDataContainerType;
+    typedef typename PointDataContainerType::ConstIterator PointDataIteratorType;
+    typedef TDistance                                      DistanceType;
+    typedef typename DistanceType::Pointer                 DistancePointerType;
+    typedef Landmark< typename TPointSet::PointType, 
+    typename TPointSet::PixelType,double>                  LandmarkType;
+    typedef typename LandmarkType::Pointer                 LandmarkPointerType;
+    typedef ObjectList<LandmarkType>                       LandmarkListType;
+    typedef typename LandmarkListType::Pointer             LandmarkListPointerType;
+    typedef std::pair<unsigned int,double>                 NeighborSearchResultType;
+
+    /// standard macros
+    itkNewMacro(Self);
+    itkTypeMacro(KeyPointSetsMatchingFilter,ObjectListSource);
+
+    /// Accessors
+    itkBooleanMacro(UseBackMatching);
+    itkSetMacro(UseBackMatching,bool);
+    itkGetMacro(UseBackMatching,bool);
+    itkSetMacro(DistanceThreshold,double);
+    itkGetMacro(DistanceThreshold,double);
+    
+    /// Set the first pointset
+    void SetInput1(const PointSetType * pointset);    
+    /// Get the first pointset
+    const PointSetType * GetInput1();
+    /// Set the second pointset
+    void SetInput2(const PointSetType * pointset);    
+    /// Get the second pointset
+    const PointSetType * GetInput2();
+
+    protected:
+    /// Constructor
+    KeyPointSetsMatchingFilter();
+    /// Destructor
+    ~KeyPointSetsMatchingFilter(){};
+    /// PrintSelf method
+    virtual void PrintSelf(std::ostream& os, itk::Indent indent) const;
+
+    /// Generate Data
+    virtual void GenerateData();
+
+    /**
+     * Find the nearest neighbor of data1 in pointset.
+     * \return a pair of (index,distance).
+     */
+    NeighborSearchResultType NearestNeighbor(const PointDataType& data1, const PointSetType * pointset);
+
+
+    private:
+    KeyPointSetsMatchingFilter(const Self&); // purposely not implemented
+    void operator=(const Self&);             // purposely not implemented
+
+    // Find back matches from 2 to 1 to validate them
+    bool m_UseBackMatching;
+
+    // Distance threshold to decide matching
+    double m_DistanceThreshold;
+
+    // Distance calculator
+    DistancePointerType m_DistanceCalculator;
+  };
+
+} // end namespace otb
+
+#ifndef OTB_MANUAL_INSTANTIATION
+#include "otbKeyPointSetsMatchingFilter.txx"
+#endif
+#endif 
+
diff --git a/Code/FeatureExtraction/otbKeyPointSetsMatchingFilter.txx b/Code/FeatureExtraction/otbKeyPointSetsMatchingFilter.txx
new file mode 100755
index 0000000000000000000000000000000000000000..eaca7c29db864a09a59e7ac11f603628489a1a97
--- /dev/null
+++ b/Code/FeatureExtraction/otbKeyPointSetsMatchingFilter.txx
@@ -0,0 +1,233 @@
+/*=========================================================================
+
+Program:   ORFEO Toolbox
+Language:  C++
+Date:      $Date$
+Version:   $Revision$
+
+
+Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
+See OTBCopyright.txt for details.
+
+
+This software is distributed WITHOUT ANY WARRANTY; without even 
+the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+PURPOSE.  See the above copyright notices for more information.
+
+=========================================================================*/
+#ifndef __otbKeyPointSetsMatchingFilter_txx
+#define __otbKeyPointSetsMatchingFilter_txx
+
+#include "otbKeyPointSetsMatchingFilter.h"
+
+namespace otb
+{
+
+template <class TPointSet, class TDistance>
+KeyPointSetsMatchingFilter<TPointSet,TDistance>
+::KeyPointSetsMatchingFilter()
+{
+  this->SetNumberOfRequiredInputs(2);
+  m_UseBackMatching   = false;
+  m_DistanceThreshold = 0.6;
+  // Object used to measure distance
+  m_DistanceCalculator = DistanceType::New();
+}
+
+template <class TPointSet, class TDistance>
+const typename KeyPointSetsMatchingFilter<TPointSet,TDistance>
+::PointSetType *
+KeyPointSetsMatchingFilter<TPointSet,TDistance>
+::GetInput1()
+{
+  return static_cast<const PointSetType *>(this->itk::ProcessObject::GetInput(0));
+}
+
+template <class TPointSet, class TDistance>
+void
+KeyPointSetsMatchingFilter<TPointSet,TDistance>
+::SetInput1(const PointSetType * pointset)
+{
+  this->itk::ProcessObject::SetNthInput(0,const_cast<PointSetType *>(pointset));
+}
+
+template <class TPointSet, class TDistance>
+const typename KeyPointSetsMatchingFilter<TPointSet,TDistance>
+::PointSetType *
+KeyPointSetsMatchingFilter<TPointSet,TDistance>
+::GetInput2()
+{
+  return static_cast<const PointSetType *>(this->itk::ProcessObject::GetInput(1));
+}
+
+template <class TPointSet, class TDistance>
+void
+KeyPointSetsMatchingFilter<TPointSet,TDistance>
+::SetInput2(const PointSetType * pointset)
+{
+  this->itk::ProcessObject::SetNthInput(1,const_cast<PointSetType *>(pointset));
+}
+
+template <class TPointSet, class TDistance>
+void
+KeyPointSetsMatchingFilter<TPointSet,TDistance>
+::GenerateData()
+{
+//   std::cout<<"GenerateData()"<<std::endl;
+
+  // Get the input pointers
+  const PointSetType * ps1 =  this->GetInput1();
+  const PointSetType * ps2 =  this->GetInput2();
+
+  // Check if one of the pointsets is empty
+  if( ps1->GetNumberOfPoints() == 0 || ps2->GetNumberOfPoints() == 0 )
+    {
+      itkExceptionMacro(<<"Empty input pointset !");
+    }
+
+  // Get the output pointer
+  LandmarkListPointerType landmarks = this->GetOutput();
+
+  // Define iterators on points and point data.
+  PointsIteratorType     pIt  = ps1->GetPoints()->Begin();
+  PointDataIteratorType pdIt = ps1->GetPointData()->Begin();
+  
+  // iterate on pointset 1
+  while(pdIt!=ps1->GetPointData()->End()
+	&&pIt!=ps1->GetPoints()->End())
+    {
+      // Get point and point data at current location
+      bool matchFound = false;
+      unsigned int currentIndex = pIt.Index();
+      PointDataType data = pdIt.Value();
+      PointType     point = pIt.Value();
+
+      // These variables will hold the matched point and point data
+      PointDataType dataMatch;
+      PointType pointMatch;
+
+      // call to the matching routine
+      NeighborSearchResultType searchResult1 = NearestNeighbor(data,ps2);
+
+     // Check if the neighbor distance is lower than the threshold
+     if(searchResult1.second < m_DistanceThreshold)
+       {
+	 // Get the matched point and point data
+	 dataMatch = ps2->GetPointData()->GetElement(searchResult1.first);
+	 pointMatch = ps2->GetPoints()->GetElement(searchResult1.first);
+
+	 // If the back matching option is on
+	 if(m_UseBackMatching)
+	   {
+	     // Peform the back search
+	     NeighborSearchResultType searchResult2 = NearestNeighbor(dataMatch,ps1);
+	     
+	     // Test if back search finds the same match
+	     if(currentIndex == searchResult2.first)
+	       {
+		 matchFound = true;
+	       }	     
+	   } 
+	 else // else back matching
+	   {
+	     matchFound = true;
+	   }
+       }
+     
+     // If we found a match, add the proper landmark
+     if(matchFound)
+       {
+	 LandmarkPointerType landmark = LandmarkType::New();
+	 landmark->SetPoint1(point);
+	 landmark->SetPointData1(data);
+	 landmark->SetPoint2(pointMatch);
+	 landmark->SetPointData2(dataMatch);
+	 landmark->SetLandmarkData(searchResult1.second);
+	 
+	 // Add the new landmark to the landmark list
+	 landmarks->PushBack(landmark);
+       }
+     ++pdIt;
+     ++pIt;
+    }
+}
+
+template <class TPointSet, class TDistance>
+typename KeyPointSetsMatchingFilter<TPointSet,TDistance>::NeighborSearchResultType 
+KeyPointSetsMatchingFilter<TPointSet,TDistance>
+::NearestNeighbor(const PointDataType& data1, const PointSetType * pointset)
+{
+//   std::cout<<"Call to NearestNeighbor()"<<std::endl;
+  // Declare the result
+  NeighborSearchResultType result;
+
+  // Define iterators on points and point data.
+  PointDataIteratorType pdIt = pointset->GetPointData()->Begin();
+
+  // local variables
+  unsigned int nearestIndex = 0;
+  double d1 = m_DistanceCalculator->Evaluate(data1,pdIt.Value());
+  ++pdIt;
+  double d2 = m_DistanceCalculator->Evaluate(data1,pdIt.Value());
+  ++pdIt;
+
+  if(d1>d2)
+    {
+      nearestIndex = 1;
+    }
+  // Initialize distances
+  double nearestDistance = std::min(d1,d2);
+  double secondNearestDistance = std::max(d1,d2);
+  double distanceValue;
+
+  // iterate on the pointset
+  while( pdIt != pointset->GetPointData()->End() )
+    {
+      // Evaluate the distance
+      distanceValue = m_DistanceCalculator->Evaluate(data1,pdIt.Value());
+
+//       std::cout<<nearestIndex<<" "<<nearestDistance<<" "<<secondNearestDistance<<std::endl;
+
+      // Check if this point is the nearest neighbor
+      if(distanceValue < nearestDistance)
+	{
+	  secondNearestDistance = nearestDistance;
+	  nearestDistance = distanceValue;
+	  nearestIndex = pdIt.Index();
+	  
+	}
+      // Else check if it is the second nearest neighbor
+      else if(distanceValue < secondNearestDistance)
+	{
+	  secondNearestDistance = distanceValue;
+	}
+      ++pdIt;
+    }
+
+  // Fill results
+  result.first = nearestIndex;
+  if(secondNearestDistance == 0)
+    {
+      result.second = 1;
+    }
+  else
+    {
+      result.second = nearestDistance/secondNearestDistance;
+    }
+
+  // return the result
+  return result;
+
+}
+ 
+template <class TPointSet, class TDistance>
+void
+KeyPointSetsMatchingFilter<TPointSet,TDistance>
+::PrintSelf(std::ostream& os, itk::Indent indent) const
+{
+  Superclass::PrintSelf(os, indent);
+}
+
+} // end namespace otb
+
+#endif 
diff --git a/Code/FeatureExtraction/otbLandmark.h b/Code/FeatureExtraction/otbLandmark.h
new file mode 100755
index 0000000000000000000000000000000000000000..2d447da6e1a73fba4210d2bcebe49167d10f7941
--- /dev/null
+++ b/Code/FeatureExtraction/otbLandmark.h
@@ -0,0 +1,94 @@
+/*=========================================================================
+
+Program:   ORFEO Toolbox
+Language:  C++
+Date:      $Date$
+Version:   $Revision$
+
+
+Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
+See OTBCopyright.txt for details.
+
+
+This software is distributed WITHOUT ANY WARRANTY; without even 
+the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+PURPOSE.  See the above copyright notices for more information.
+
+=========================================================================*/
+#ifndef __otbLandmark_h
+#define __otbLandmark_h
+
+#include "itkDataObject.h"
+#include "otbMacro.h"
+#include "itkObjectFactory.h"
+
+namespace otb
+{
+  /** \class Landmark
+   *  \brief This class represent point and point data binary matching.
+   *
+   * It is used to represent match between keypoint like SIFT keypoint for instance.
+   *
+   * The class TLandmarkData can be used to store any information on the matching.
+   */
+  template <class TPoint, class TPointData, class TLandmarkData=TPointData>
+    class ITK_EXPORT Landmark 
+    : public itk::DataObject
+    {
+      public:
+      /// standard class typedefs
+      typedef Landmark                      Self;
+      typedef itk::DataObject               Superclass;
+      typedef itk::SmartPointer<Self>       Pointer;
+      typedef itk::SmartPointer<const Self> ConstPointer;
+
+      /// Standard macros
+      itkNewMacro(Self);
+      itkTypeMacro(Landmark,DataObject);
+    
+      /// template typedefs 
+      typedef TPoint        PointType;
+      typedef TPointData    PointDataType;
+      typedef TLandmarkData LandmarkDataType;
+      
+      /// Accessors
+      itkSetMacro(Point1,PointType);
+      itkGetConstReferenceMacro(Point1,PointType);
+      itkSetMacro(PointData1,PointDataType);
+      itkGetConstReferenceMacro(PointData1,PointDataType);
+      itkSetMacro(Point2,PointType);
+      itkGetConstReferenceMacro(Point2,PointType);
+      itkSetMacro(PointData2,PointDataType);
+      itkGetConstReferenceMacro(PointData2,PointDataType);
+      itkSetMacro(LandmarkData,LandmarkDataType);
+      itkGetConstReferenceMacro(LandmarkData,LandmarkDataType);
+
+      protected:
+      /// Constructor 
+      Landmark(){}
+      /// Destructor 
+      ~Landmark(){}
+      /// PrintSelf method 
+      virtual void PrintSelf(std::ostream& os, itk::Indent indent) const
+      {
+	Superclass::PrintSelf(os,indent);
+	os<<indent<<"Landmark: P1= "<<m_Point1<<" P2= "<<m_Point2<<std::endl;
+      }
+
+      private:
+      Landmark(const Self&); //purposely not implemented
+      void operator=(const Self&); //purposely not implemented
+      
+      /// First landmark point
+      PointType        m_Point1;
+      /// Second landmark point
+      PointType        m_Point2;
+      /// First landmark point data
+      PointDataType    m_PointData1;
+      /// Second landmark point data
+      PointDataType    m_PointData2;
+      /// Landmark data
+      LandmarkDataType m_LandmarkData;
+    };
+} // end namespace otb
+#endif
diff --git a/Code/FeatureExtraction/otbSiftFastImageFilter.h b/Code/FeatureExtraction/otbSiftFastImageFilter.h
new file mode 100755
index 0000000000000000000000000000000000000000..451155c0a176ffa567e091bb200a3e95415f0372
--- /dev/null
+++ b/Code/FeatureExtraction/otbSiftFastImageFilter.h
@@ -0,0 +1,93 @@
+/*=========================================================================
+
+Program:   ORFEO Toolbox
+Language:  C++
+Date:      $Date$
+Version:   $Revision$
+
+
+Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
+See OTBCopyright.txt for details.
+
+
+This software is distributed WITHOUT ANY WARRANTY; without even 
+the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+PURPOSE.  See the above copyright notices for more information.
+
+=========================================================================*/
+#ifndef __otbSiftFastImageFilter_h
+#define __otbSiftFastImageFilter_h
+
+#include "otbImageToPointSetFilter.h"
+#include "itkRescaleIntensityImageFilter.h"
+#include "otbImage.h"
+
+namespace otb
+{
+
+
+  /** \class SiftFastImageFilter
+   *  \brief
+   *
+   */
+  template <class TInputImage, class TOutputPointSet>
+    class ITK_EXPORT SiftFastImageFilter
+    : public ImageToPointSetFilter<TInputImage,TOutputPointSet>
+    {
+    public:
+      /** Standard typedefs */
+      typedef SiftFastImageFilter                                Self;
+      typedef ImageToPointSetFilter<TInputImage,TOutputPointSet> Superclass;
+      typedef itk::SmartPointer<Self>                            Pointer;
+      typedef itk::SmartPointer<const Self>                      ConstPointer;
+  
+      /** Creation through object factory macro */
+      itkNewMacro(Self);  
+
+      /** Type macro */
+      itkTypeMacro(SiftFastImageFilter,ImageToPointSetFilter);
+  
+      /** Template parameters typedefs */
+      typedef TInputImage InputImageType;
+      typedef typename TInputImage::Pointer InputImagePointerType;
+      typedef typename TInputImage::PixelType PixelType;
+
+      typedef TOutputPointSet OutputPointSetType;
+      typedef typename TOutputPointSet::Pointer OutputPointSetPointerType;
+      typedef typename TOutputPointSet::PixelType OutputPixelType;
+      typedef typename TOutputPointSet::PointType OutputPointType;
+      typedef typename TOutputPointSet::PointIdentifier OutputPointIdentifierType;
+      
+      typedef otb::Image<float,2> FloatImageType;
+      
+      // Used to rescale data in the [0,1] range
+      typedef itk::RescaleIntensityImageFilter<InputImageType,FloatImageType> RescalerType;
+
+      itkSetMacro(NumberOfScales,unsigned int);
+      itkGetMacro(NumberOfScales,unsigned int);
+
+    protected:
+      /** Actually process the input */
+      virtual void GenerateData();
+      
+      /** Constructor */
+      SiftFastImageFilter();
+      
+      /** Destructor */
+      virtual ~SiftFastImageFilter() {}
+      
+      /** PrintSelf method */
+      virtual void PrintSelf(std::ostream& os, itk::Indent indent) const;
+
+    private:
+      /** The number of scales */
+      unsigned int m_NumberOfScales;
+
+
+    };
+}// End namespace otb
+#ifndef OTB_MANUAL_INSTANTIATION
+#include "otbSiftFastImageFilter.txx"
+#endif
+
+#endif
diff --git a/Code/FeatureExtraction/otbSiftFastImageFilter.txx b/Code/FeatureExtraction/otbSiftFastImageFilter.txx
new file mode 100755
index 0000000000000000000000000000000000000000..91f36d124197d2ea1e56edeb0a72c11908753164
--- /dev/null
+++ b/Code/FeatureExtraction/otbSiftFastImageFilter.txx
@@ -0,0 +1,116 @@
+/*=========================================================================
+
+Program:   ORFEO Toolbox
+Language:  C++
+Date:      $Date$
+Version:   $Revision$
+
+
+Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
+See OTBCopyright.txt for details.
+
+
+This software is distributed WITHOUT ANY WARRANTY; without even 
+the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+PURPOSE.  See the above copyright notices for more information.
+
+=========================================================================*/
+#include "otbSiftFastImageFilter.h"
+
+#include "siftfast.h"
+#include "itkContinuousIndex.h"
+#include "itkImageRegionIterator.h"
+
+#include "otbImageFileWriter.h"
+
+namespace otb
+{
+  /**
+   * Constructor
+   */
+  template <class TInputImage, class TOutputPointSet>
+  SiftFastImageFilter<TInputImage,TOutputPointSet>
+  ::SiftFastImageFilter()
+  {  }
+  
+  template <class TInputImage, class TOutputPointSet>
+  void
+  SiftFastImageFilter<TInputImage,TOutputPointSet>
+  ::GenerateData()
+  {
+    // Get the input image pointer
+    const InputImageType *     inputPtr       = this->GetInput(0);
+    OutputPointSetPointerType  outputPointSet = this->GetOutput();
+
+    typename InputImageType::SizeType size = inputPtr->GetLargestPossibleRegion().GetSize();
+
+    // Rescale data in the [0,1] range
+    typename RescalerType::Pointer rescaler = RescalerType::New();
+    rescaler->SetInput(inputPtr);
+    rescaler->SetOutputMinimum(0);
+    rescaler->SetOutputMaximum(1);
+    rescaler->Update();
+
+    typedef otb::ImageFileWriter<FloatImageType> WriterType;
+    WriterType::Pointer writer = WriterType::New();
+    writer->SetInput(rescaler->GetOutput());
+    writer->SetFileName("qb_RoadExtract.tif");
+    writer->Update();
+
+    SiftFastImage siftInputImage = CreateImage(size[1],size[0]);
+    itk::ImageRegionIterator<FloatImageType> inIt(rescaler->GetOutput(),rescaler->GetOutput()->GetLargestPossibleRegion());
+
+    unsigned int index =0;
+
+    for(inIt.GoToBegin();!inIt.IsAtEnd();++inIt)
+      {
+	siftInputImage->pixels[index]=inIt.Get();
+	++index;
+      }
+
+    Keypoint keypts = GetKeypoints(siftInputImage,m_NumberOfScales);
+    
+    Keypoint key = keypts;
+
+    unsigned int numkeys = 0;
+
+    while(key) 
+      {
+	// Get the key location
+	itk::ContinuousIndex<float,2> keyContIndex;
+	keyContIndex[0]=key->col;
+	keyContIndex[1]=key->row;
+
+	OutputPointType point;
+	inputPtr->TransformContinuousIndexToPhysicalPoint(keyContIndex,point);
+	
+	// Get the key descriptor
+	OutputPixelType data;
+	data.SetSize(128);
+	for(int i = 0; i < 128; ++i) 
+	  {
+	    data[i]=key->descrip[i];
+	
+	  }
+	outputPointSet->SetPoint(numkeys,point);
+	outputPointSet->SetPointData(numkeys,data);
+
+	// go to next key
+	numkeys++;
+	key = key->next;
+      }  
+    FreeKeypoints(keypts);
+    DestroyAllResources();
+  }
+  /*
+   * PrintSelf Method
+   */
+  template <class TInputImage, class TOutputPointSet>
+  void
+  SiftFastImageFilter<TInputImage,TOutputPointSet>
+  ::PrintSelf(std::ostream& os, itk::Indent indent) const
+  {          
+    Superclass::PrintSelf(os, indent);
+  }
+
+} // End namespace otb
diff --git a/Code/IO/otbGDALImageIO.cxx b/Code/IO/otbGDALImageIO.cxx
index fec8bbed91788acbd4114b196d32fcfdefdfb534..afa980d28a5d79227448324abea122877f8e4e4b 100644
--- a/Code/IO/otbGDALImageIO.cxx
+++ b/Code/IO/otbGDALImageIO.cxx
@@ -148,7 +148,6 @@ namespace otb
                "GDALOpen failed - %d\n%s\n",
                CPLGetLastErrorNo(), CPLGetLastErrorMsg() );
 
-      GDALDumpOpenDatasets( stderr );
       GDALDestroyDriverManager();
       CPLDumpSharedList( NULL );
       itkDebugMacro(<<"No dataset ");
diff --git a/Examples/IO/CMakeLists.txt b/Examples/IO/CMakeLists.txt
index 3eac84ca5a79a6b8df45cf72d4ea0307053b7b94..3816f54951b047ddfb83b3da014d3417fc1e157b 100644
--- a/Examples/IO/CMakeLists.txt
+++ b/Examples/IO/CMakeLists.txt
@@ -142,7 +142,7 @@ ADD_TEST(ExtractROITest ${EXE_TESTS1}
 ADD_TEST(DEMToImageGeneratorTest ${EXE_TESTS1}
 --compare-image ${TOL}  ${BASELINE}/DEMToImageGenerator.tif
                         ${TEMP}/DEMToImageGenerator.tif
-        DEMToImageGeneratorTest        
+        DEMToImageGeneratorTest
          ${TEMP}/DEMToImageGenerator.tif
 	 ${TEMP}/pretty_DEMToImageGenerator.png
          6.5
@@ -150,7 +150,7 @@ ADD_TEST(DEMToImageGeneratorTest ${EXE_TESTS1}
          500
          500
 	 0.002
-	 -0.002	
+	 -0.002
 	  ${INPUTDATA}/DEM_srtm
          )
 
@@ -166,6 +166,17 @@ ADD_TEST(LidarToImageExampleTest ${EXE_TESTS1}
          5
          4
          )
+ADD_TEST(LidarToImageExample2Test ${EXE_TESTS1}
+--compare-image ${TOL}  ${BASELINE}/lidar-image-8.hdr
+                        ${TEMP}/lidar-image-8.hdr
+         LidarToImageExampleTest
+         ${DATA}/TO_core_last_zoom.las
+         ${TEMP}/lidar-image-8.hdr
+	 ${TEMP}/lidar-image-8.png
+         1.0
+         5
+         8
+         )
 ENDIF(ITK_USE_REVIEW)
 
 ADD_EXECUTABLE(otbIOExamplesTests1 otbIOExamplesTests1.cxx)
diff --git a/Examples/Projections/CMakeLists.txt b/Examples/Projections/CMakeLists.txt
index cb57a1f404d92ef9af05e68451d8f512f772620f..66e5b2a36ca040504557e9654897e918cfba0d3f 100644
--- a/Examples/Projections/CMakeLists.txt
+++ b/Examples/Projections/CMakeLists.txt
@@ -10,8 +10,6 @@ IF(CMAKE_COMPILER_IS_GNUCXX)
 ENDIF(CMAKE_COMPILER_IS_GNUCXX)
 
 
-SET(PROJECTIONS_EXAMPLES ${CXX_TEST_PATH}/otbProjectionsExamplesTests)
-
 ADD_EXECUTABLE(SensorModelExample SensorModelExample.cxx )
 TARGET_LINK_LIBRARIES(SensorModelExample OTBProjections OTBCommon OTBIO ITKCommon ITKIO)
 
@@ -22,8 +20,8 @@ ADD_EXECUTABLE(OrthoRectificationExample OrthoRectificationExample.cxx )
 TARGET_LINK_LIBRARIES(OrthoRectificationExample OTBProjections OTBCommon OTBIO ITKCommon ITKIO)
 
 IF( OTB_USE_CURL )
-ADD_EXECUTABLE(PlaceNameToLonLat PlaceNameToLonLat.cxx )
-TARGET_LINK_LIBRARIES(PlaceNameToLonLat OTBProjections OTBCommon OTBIO ITKCommon ITKIO ${CURL_LIBRARY} tinyXML)
+ADD_EXECUTABLE(PlaceNameToLonLatExample PlaceNameToLonLatExample.cxx )
+TARGET_LINK_LIBRARIES(PlaceNameToLonLatExample OTBProjections OTBCommon OTBIO ITKCommon ITKIO ${CURL_LIBRARY} tinyXML)
 ENDIF( OTB_USE_CURL )
 
 ADD_EXECUTABLE(VectorDataProjectionExample VectorDataProjectionExample.cxx )
@@ -33,7 +31,7 @@ IF( NOT OTB_DISABLE_CXX_TESTING AND BUILD_TESTING )
 
 SET(BASELINE ${OTB_DATA_ROOT}/Baseline/Examples/Projections)
 
-SET(INPUTDATA ${OTB_DATA_ROOT}/Examples)
+SET(INPUTDATA ${OTB_DATA_ROOT}/Input)
 #Remote sensing images (large images )
 IF(OTB_DATA_USE_LARGEINPUT)
   SET(INPUTLARGEDATA ${OTB_DATA_LARGEINPUT_ROOT} )
@@ -83,7 +81,39 @@ ADD_TEST(OrthoRectificationExampleXSTest ${EXE_TESTS}
 )
 ENDIF(OTB_DATA_USE_LARGEINPUT)
 
-ADD_EXECUTABLE(otbProjectionsExamplesTests otbProjectionsExamplesTests.cxx)
-TARGET_LINK_LIBRARIES(otbProjectionsExamplesTests gdal ITKIO ITKAlgorithms ITKStatistics ITKNumerics ITKCommon OTBBasicFilters OTBCommon OTBDisparityMap OTBIO OTBSpatialReasoning OTBChangeDetection OTBFeatureExtraction  OTBLearning  OTBMultiScale OTBFusion OTBProjections)
+ADD_TEST(MapProjectionExampleTest ${EXE_TESTS}
+	--compare-ascii ${TOL}
+	${BASELINE}/mapProjectionExample.txt
+	${TEMP}/mapProjectionExample.txt
+    MapProjectionExampleTest
+    	${TEMP}/mapProjectionExample.txt
+)
+
+IF(OTB_DATA_USE_LARGEINPUT)
+ADD_TEST(VectorDataProjectionExampleTest ${EXE_TESTS}
+	--compare-binary
+	${BASELINE}/vectorDataProjectionExample.shp
+	${TEMP}/vectorDataProjectionExample.shp
+    VectorDataProjectionExampleTest
+        ${INPUTDATA}/Capitole-Shadows.kml
+        ${INPUTLARGEDATA}/TOULOUSE/QuickBird/000000128955_01_P001_PAN/02APR01105228-P1BS-000000128955_01_P001.TIF
+    	${TEMP}/vectorDataProjectionExample.shp
+)
+ENDIF(OTB_DATA_USE_LARGEINPUT)
+
+IF( OTB_USE_CURL )
+ADD_TEST(PlaceNameToLonLatExampleTest ${EXE_TESTS}
+    PlaceNameToLonLatExampleTest
+        Toulouse
+)
+ENDIF( OTB_USE_CURL )
+
+IF( OTB_USE_CURL )
+  ADD_EXECUTABLE(otbProjectionsExamplesTests otbProjectionsExamplesTests.cxx)
+  TARGET_LINK_LIBRARIES(otbProjectionsExamplesTests OTBCommon OTBIO OTBProjections ${CURL_LIBRARY} tinyXML)
+ELSE( OTB_USE_CURL )
+  ADD_EXECUTABLE(otbProjectionsExamplesTests otbProjectionsExamplesTests.cxx)
+  TARGET_LINK_LIBRARIES(otbProjectionsExamplesTests OTBCommon OTBIO OTBProjections)
+ENDIF( OTB_USE_CURL )
 
 ENDIF( NOT OTB_DISABLE_CXX_TESTING AND BUILD_TESTING )
diff --git a/Examples/Projections/MapProjectionExample.cxx b/Examples/Projections/MapProjectionExample.cxx
index d162c0cf0991f5038b7920ca52fdf0366885c946..7f67a3a27663501fb4a5a20cc8a5bdba109d0dd1 100644
--- a/Examples/Projections/MapProjectionExample.cxx
+++ b/Examples/Projections/MapProjectionExample.cxx
@@ -19,29 +19,219 @@
 #pragma warning ( disable : 4786 )
 #endif
 
+//  Software Guide : BeginCommandLineArgs
+//    OUTPUTS: {mapProjectionExample-output.txt}
+//    OUTPUTS: {dummy.png}
+//    1.4835345  43.55968261
+//  Software Guide : EndCommandLineArgs
+
+//These two dependencies are just to produce the dummy image
+#include "otbImage.h"
+#include "otbImageFileWriter.h"
+
+
+// Software Guide : BeginLatex
+//
+// Map projection is an important issue when working with satellite images. In the
+// orthorectification process, converting between geographic and cartographic
+// coordinates is a key step. In this process, everything is integrated and you
+// don't need to know the details.
+//
+// However, sometimes, you need to go hands-on and find out the nitty-gritty details.
+// This example shows you how to play with map projections in OTB and how to convert
+// coordinates. In most cases, the underlying work is done by ossim.
+//
+// First, we start by including the otbMapProjections header. In this file, over 30
+// projections are defined and ready to use. It is easy to add new one.
+//
+// The otbGenericMapProjection enables you to instanciate a map projection from a
+// WKT (Well Known Text) string, which is popular with OGR for example.
+//
+// Software Guide : EndLatex
+
+// Software Guide : BeginCodeSnippet
 #include "otbMapProjections.h"
+#include "otbGenericMapProjection.h"
+// Software Guide : EndCodeSnippet
 
 int main( int argc, char* argv[] )
 {
-  if(argc < 3  )
+  if(argc < 2  )
   {
-    std::cout << argv[0] <<" <lon> <lat> <outputfile>"  << std::endl;
+    std::cout << argv[0] <<" <outputfile> "  << std::endl;
 
     return EXIT_FAILURE;
   }
 
-  double lon = atof(argv[1]);
-  double lat = atof(argv[2]);
-  const char * outFileName = argv[3];
+  // Software Guide : BeginLatex
+  //
+  // We retrieve the command line parameters and put them in the correct variables. The
+  // transforms are going to work with an \doxygen{itk}{Point}.
+  //
+  // Software Guide : EndLatex
+
+  // Software Guide : BeginCodeSnippet
+  const char * outFileName = argv[1];
+
+  itk::Point<double,2> point;
+  point[0]=1.4835345;
+  point[1]=43.55968261;
+  // Software Guide : EndCodeSnippet
 
-  std::ofstream file;
-  file.open(outFileName);
-  file << std::setprecision(15);
 
-  otb::UtmForwardProjection::Pointer lUtmProjection = otb::UtmForwardProjection::New();
 
+  // Software Guide : BeginLatex
+  //
+  // The output of this program will be save in a text file. We also want
+  // to make sure the precision of the digit will be enough.
+  //
+  // Software Guide : EndLatex
 
+  // Software Guide : BeginCodeSnippet
+  std::ofstream file;
+  file.open(outFileName);
+  file << std::setprecision(15);
+  // Software Guide : EndCodeSnippet
+
+  file << "\\begin{verbatim}" << std::endl;
+
+  // Software Guide : BeginLatex
+  //
+  // We can now instanciate our first map projection. Here, it is a UTM projection
+  // we also need to provide the information concerning the zone and the hemisphere
+  // for the projection. These are specific to UTM projection.
+  //
+  // Software Guide : EndLatex
+
+
+  // Software Guide : BeginCodeSnippet
+  otb::UtmForwardProjection::Pointer utmProjection = otb::UtmForwardProjection::New();
+  utmProjection->SetZone(31);
+  utmProjection->SetHemisphere('N');
+  // Software Guide : EndCodeSnippet
+
+  // Software Guide : BeginLatex
+  //
+  // The TransformPoint() method return the coordinates of the point in the
+  // new projection.
+  //
+  // Software Guide : EndLatex
+
+
+  // Software Guide : BeginCodeSnippet
+  file << "Forward UTM projection: " << std::endl;
+  file << point << " -> ";
+  file << utmProjection->TransformPoint(point);
+  file << std::endl << std::endl;
+  // Software Guide : EndCodeSnippet
+
+  // Software Guide : BeginLatex
+  //
+  // We follow the same path for the Lambert93 projection:
+  //
+  // Software Guide : EndLatex
+
+  // Software Guide : BeginCodeSnippet
+  otb::Lambert93ForwardProjection::Pointer lambertProjection = otb::Lambert93ForwardProjection::New();
+
+  file << "Forward Lambert93 projection: " << std::endl;
+  file << point << " -> ";
+  file << lambertProjection->TransformPoint(point);
+  file << std::endl << std::endl;
+  // Software Guide : EndCodeSnippet
+
+
+  // Software Guide : BeginLatex
+  //
+  // If you follow carefully the previous examples, you've noticed that the target
+  // projection have been directly coded, which mean that they can't be changed at
+  // run-time. What happens if you don't know the target projection when you're writing
+  // the program? It can depends on some input provided by the user (image, shapefile).
+  //
+  // In this situation, you can use the \doxygen{otb}{GenericMapProjection}. It will
+  // accept a string to set the projection. This string should be in the WKT format.
+  //
+  // For example:
+  //
+  // Software Guide : EndLatex
+
+  // Software Guide : BeginCodeSnippet
+  std::string projectionRefWkt ="PROJCS[\"UTM Zone 31, Northern Hemisphere\","
+      "GEOGCS[\"WGS 84\",DATUM[\"WGS_1984\",SPHEROID[\"WGS 84\",6378137,298.257223563,"
+      "AUTHORITY[\"EPSG\",\"7030\"]],TOWGS84[0,0,0,0,0,0,0],"
+      "AUTHORITY[\"EPSG\",\"6326\"]],PRIMEM[\"Greenwich\",0,AUTHORITY[\"EPSG\",\"8901\"]],"
+      "UNIT[\"degree\",0.0174532925199433,AUTHORITY[\"EPSG\",\"9108\"]],"
+      "AXIS[\"Lat\",NORTH],AXIS[\"Long\",EAST],"
+      "AUTHORITY[\"EPSG\",\"4326\"]],PROJECTION[\"Transverse_Mercator\"],"
+      "PARAMETER[\"latitude_of_origin\",0],PARAMETER[\"central_meridian\",3],"
+      "PARAMETER[\"scale_factor\",0.9996],PARAMETER[\"false_easting\",500000],"
+      "PARAMETER[\"false_northing\",0],UNIT[\"Meter\",1]]";
+  // Software Guide : EndCodeSnippet
+
+  // Software Guide : BeginLatex
+  //
+  // This string is then passed to the projection using the SetWkt() method.
+  //
+  // Software Guide : EndLatex
+
+  // Software Guide : BeginCodeSnippet
+  typedef otb::GenericMapProjection<otb::FORWARD> GenericMapProjection;
+  GenericMapProjection::Pointer genericMapProjection = GenericMapProjection::New();
+  genericMapProjection->SetWkt(projectionRefWkt);
+
+  file << "Forward gerenic projection: " << std::endl;
+  file << point << " -> ";
+  file << genericMapProjection ->TransformPoint(point);
+  file << std::endl << std::endl;
+  // Software Guide : EndCodeSnippet
+
+  file << "\\end{verbatim}" << std::endl;
+
+  // Software Guide : BeginLatex
+  //
+  // And of course, we don't forget to close the file:
+  //
+  // Software Guide : EndLatex
+
+  // Software Guide : BeginCodeSnippet
+  file.close();
+  // Software Guide : EndCodeSnippet
+
+
+  // Software Guide : BeginLatex
+  //
+  // The final output of the program should be:
+  //
+  // \input{Art/Generated/mapProjectionExample-output.txt}
+  //
+  // %\includegraphics[width=0.40\textwidth]{dummy.eps}
+  //
+  // Software Guide : EndLatex
+
+  //this is just to trigger the dependancy for the software guide, not
+  //directly related to this example
+  if( argc > 2 )
+  {
 
+    char * dummyfilename = argv[2];
+    typedef otb::Image< unsigned char, 2 > ImageType;
+    ImageType::Pointer image = ImageType::New();
+    ImageType::IndexType start;
+    start[0] = 0; start[1] = 0;
+    ImageType::SizeType  size;
+    size[0] = 1; size[1] = 1;
+    ImageType::RegionType region;
+    region.SetSize( size );
+    region.SetIndex( start );
+    image->SetRegions( region );
+    image->Allocate();
+
+    typedef otb::ImageFileWriter<ImageType> FileWriterType;
+    FileWriterType::Pointer writer = FileWriterType::New();
+    writer->SetFileName(dummyfilename);
+    writer->SetInput(image);
+    writer->Update();
+  }
 
   return EXIT_SUCCESS;
 }
diff --git a/Examples/Projections/PlaceNameToLonLat.cxx b/Examples/Projections/PlaceNameToLonLat.cxx
deleted file mode 100644
index 57d6ffb7f279573255aec59ac4f72ba59ef226d7..0000000000000000000000000000000000000000
--- a/Examples/Projections/PlaceNameToLonLat.cxx
+++ /dev/null
@@ -1,48 +0,0 @@
-/*=========================================================================
-
-  Program:   ORFEO Toolbox
-  Language:  C++
-  Date:      $Date$
-  Version:   $Revision$
-
-
-  Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
-  See OTBCopyright.txt for details.
-
-
-     This software is distributed WITHOUT ANY WARRANTY; without even
-     the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
-     PURPOSE.  See the above copyright notices for more information.
-
-=========================================================================*/
-#if defined(_MSC_VER)
-#pragma warning ( disable : 4786 )
-#endif
-
-#include "otbPlaceNameToLonLat.h"
-
-int main( int argc, char* argv[] )
-{
-
-  if(argc!=2)
-  {
-    std::cout << argv[0] <<" <place name> "
-        << std::endl;
-
-    return EXIT_FAILURE;
-  }
-
-
-  otb::PlaceNameToLonLat::Pointer pn2LL = otb::PlaceNameToLonLat::New();
-  pn2LL->SetPlaceName(std::string(argv[1]));
-  pn2LL->Evaluate();
-
-  double lon = pn2LL->GetLon();
-  double lat = pn2LL->GetLat();
-
-  std::cout << "Latitude: " << lat << std::endl;
-  std::cout << "Longitude: " << lon << std::endl;
-
-  return EXIT_SUCCESS;
-
-}
diff --git a/Examples/Projections/PlaceNameToLonLatExample.cxx b/Examples/Projections/PlaceNameToLonLatExample.cxx
new file mode 100644
index 0000000000000000000000000000000000000000..d312d7b172e49ef604ecff5cf52bf0b225f21a82
--- /dev/null
+++ b/Examples/Projections/PlaceNameToLonLatExample.cxx
@@ -0,0 +1,89 @@
+/*=========================================================================
+
+  Program:   ORFEO Toolbox
+  Language:  C++
+  Date:      $Date$
+  Version:   $Revision$
+
+
+  Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
+  See OTBCopyright.txt for details.
+
+
+     This software is distributed WITHOUT ANY WARRANTY; without even
+     the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+     PURPOSE.  See the above copyright notices for more information.
+
+=========================================================================*/
+#if defined(_MSC_VER)
+#pragma warning ( disable : 4786 )
+#endif
+
+// Software Guide : BeginLatex
+//
+// This example will show how to retrieve the longitude and latitude from
+// a place using the name of the city or the address. For that, we will
+// use the \doxygen{otb}{PlaceNameToLonLat} class.
+//
+// Software Guide : EndLatex
+
+// Software Guide : BeginCodeSnippet
+#include "otbPlaceNameToLonLat.h"
+// Software Guide : EndCodeSnippet
+
+int main( int argc, char* argv[] )
+{
+
+  if(argc!=2)
+  {
+    std::cout << argv[0] <<" <place name> "
+        << std::endl;
+
+    return EXIT_FAILURE;
+  }
+
+  // Software Guide : BeginLatex
+  //
+  // You instantiate the class and pass the name you want to look for as a
+  // std::string to the SetPlaceName method.
+  //
+  // The call to evaluate will trigger the retrival process.
+  //
+  // Software Guide : EndLatex
+
+  // Software Guide : BeginCodeSnippet
+  otb::PlaceNameToLonLat::Pointer pn2LL = otb::PlaceNameToLonLat::New();
+  pn2LL->SetPlaceName(std::string(argv[1]));
+  pn2LL->Evaluate();
+  // Software Guide : EndCodeSnippet
+
+  // Software Guide : BeginLatex
+  //
+  // To get the data, you can simply call the GetLon and GetLat methods.
+  //
+  // Software Guide : EndLatex
+
+  // Software Guide : BeginCodeSnippet
+  double lon = pn2LL->GetLon();
+  double lat = pn2LL->GetLat();
+
+  std::cout << "Latitude: " << lat << std::endl;
+  std::cout << "Longitude: " << lon << std::endl;
+  // Software Guide : EndCodeSnippet
+
+
+  // Software Guide : BeginLatex
+  //
+  // If you tried with a string such as "Toulouse", you should obtain something
+  // like:
+  //
+  // \begin{verbatim}
+  // Latitude: 43.6044
+  // Longitude: 1.44295
+  // \end{verbatim}
+  //
+  // Software Guide : EndLatex
+
+  return EXIT_SUCCESS;
+
+}
diff --git a/Examples/Projections/otbProjectionsExamplesTests.cxx b/Examples/Projections/otbProjectionsExamplesTests.cxx
index 1e7a26e018f5f7699e1dbf74d60958a9bb3c836f..57df32860016d74f38f18eb2e272cd4bd646602d 100644
--- a/Examples/Projections/otbProjectionsExamplesTests.cxx
+++ b/Examples/Projections/otbProjectionsExamplesTests.cxx
@@ -27,9 +27,23 @@
 void RegisterTests()
 {
   REGISTER_TEST(OrthoRectificationExampleTest);
+  REGISTER_TEST(MapProjectionExampleTest);
+  REGISTER_TEST(VectorDataProjectionExampleTest);
+  REGISTER_TEST(PlaceNameToLonLatExampleTest);
 }
 
 #undef main
 #define main OrthoRectificationExampleTest
 #include "OrthoRectificationExample.cxx"
 
+#undef main
+#define main MapProjectionExampleTest
+#include "MapProjectionExample.cxx"
+
+#undef main
+#define main VectorDataProjectionExampleTest
+#include "VectorDataProjectionExample.cxx"
+
+#undef main
+#define main PlaceNameToLonLatExampleTest
+#include "PlaceNameToLonLatExample.cxx"
diff --git a/Testing/Code/BasicFilters/otbPolygonCompacityFunctor.cxx b/Testing/Code/BasicFilters/otbPolygonCompacityFunctor.cxx
index c57c749380443c8b3e846891902cf7f120975b65..9787580080326ebecb06774cfdf1bbf3b9bee021 100755
--- a/Testing/Code/BasicFilters/otbPolygonCompacityFunctor.cxx
+++ b/Testing/Code/BasicFilters/otbPolygonCompacityFunctor.cxx
@@ -36,17 +36,13 @@ int otbPolygonCompacityFunctor( int argc, char * argv[] )
 {
 
   if(argc !=3 )
-  {
+    {
     std::cout << "Usage: " << argv[0] ;
     std::cout << " inputImage outputFile" << std::endl;
     return 1;
-  }
+    }
 
-// In the 3.10.1 ITK version, the itk::ConnectedComponentImageFilter filter has changed.
-// TODO: change reference output 
-//  typedef itk::RGBPixel<unsigned char> InputPixelType;
   typedef unsigned char  InputPixelType;
-
   typedef unsigned short LabelPixelType;//FIXME doesn't seem to work with long int (64 bits problem ?)
 
   typedef otb::Image<InputPixelType,2> InputImageType;
@@ -72,7 +68,7 @@ int otbPolygonCompacityFunctor( int argc, char * argv[] )
   PolygonListType::Pointer polygonList = PolygonListType::New();
 
   for(LabelPixelType label = 1; label<=connectedComponentsFilter->GetObjectCount();++label)
-  {
+    {
     std::cerr << ".";
     PolygonFilterType::Pointer polygonFilter = PolygonFilterType::New();
     polygonFilter->SetInput(connectedComponentsFilter->GetOutput());
@@ -80,7 +76,7 @@ int otbPolygonCompacityFunctor( int argc, char * argv[] )
     polygonFilter->Update();
 
     polygonList->PushBack(polygonFilter->GetOutput());
-  }
+    }
 
   typedef otb::PolygonCompacityFunctor<PolygonType::Pointer> CompatityFunctorType;
   typedef otb::UnaryFunctorObjectListBooleanFilter<PolygonListType,PolygonListType,CompatityFunctorType> CompatityFilterType;
@@ -100,16 +96,16 @@ int otbPolygonCompacityFunctor( int argc, char * argv[] )
   for(PolygonListIteratorType pIt = compacityFilter->GetOutput()->Begin();
       pIt!=compacityFilter->GetOutput()->End();
       ++pIt)
-  {
+    {
     file<< "--- New Polygon ---" << std::endl;
     PolygonType::Pointer polygon=pIt.Get();
     IteratorType it;
     for(it=polygon->GetVertexList()->Begin();it!=polygon->GetVertexList()->End();++it)
-    {
+      {
       file<<it.Value()<<std::endl;
 
+      }
     }
-  }
 
   file.close();
 
diff --git a/Testing/Code/FeatureExtraction/CMakeLists.txt b/Testing/Code/FeatureExtraction/CMakeLists.txt
index 59c66b541ee32ef9e1cd62a9732fce47f3c6246f..2868e9a5c59b7d590114bd43dd567249a43292c4 100644
--- a/Testing/Code/FeatureExtraction/CMakeLists.txt
+++ b/Testing/Code/FeatureExtraction/CMakeLists.txt
@@ -869,6 +869,7 @@ ADD_TEST(feTvImageToSURFKeyPointSetFilterSceneOutputAscii ${FEATUREEXTRACTION_TE
 # -------            otb::ImageFittingPolygonListFilter   -------------
 ADD_TEST(feTuImageFittingPolygonListFilterNew ${FEATUREEXTRACTION_TESTS9} 
          otbImageFittingPolygonListFilterNew)
+
 ADD_TEST(feTvImageFittingPolygonListFilter ${FEATUREEXTRACTION_TESTS9}
 --compare-ascii ${EPS}
 		${BASELINE_FILES}/feTvImageFittingPolygonListFilter_Output.kml
@@ -880,8 +881,49 @@ ADD_TEST(feTvImageFittingPolygonListFilter ${FEATUREEXTRACTION_TESTS9}
 		5 10
 )
 
+# -------            otb::ImageToFastSIFTKeyPointSetFilter   -------------
+
+ADD_TEST(feTuImageToFastSIFTKeyPointSetFilterNew ${FEATUREEXTRACTION_TESTS9} 
+         otbImageToFastSIFTKeyPointSetFilterNew)
+
+ADD_TEST(feTvImageToFastSIFTKeyPointSetFilterSceneOutputImage ${FEATUREEXTRACTION_TESTS9} 
+--compare-image ${EPSILON}  
+		${BASELINE}/feTvImageToFastSIFTKeyPointSetFilterSceneImageOutput.png
+		${TEMP}/feTvImageToFastSIFTKeyPointSetFilterSceneImageOutput.png
+         otbImageToFastSIFTKeyPointSetFilterOutputImage
+		${INPUTDATA}/scene.png
+		${TEMP}/feTvImageToFastSIFTKeyPointSetFilterSceneImageOutput.png
+		6  
+)	
+
+ADD_TEST(feTvImageToFastSIFTKeyPointSetFilterSceneOutputAscii ${FEATUREEXTRACTION_TESTS9}
+--compare-ascii ${EPS}
+		${BASELINE_FILES}/feTvImageToFastSIFTKeyPointSetFilterSceneKeysOutput.txt
+		${TEMP}/feTvImageToFastSIFTKeyPointSetFilterSceneKeysOutput.txt
+         otbImageToFastSIFTKeyPointSetFilterOutputAscii
+		${INPUTDATA}/scene.png
+		${TEMP}/feTvImageToFastSIFTKeyPointSetFilterSceneKeysOutput.txt
+		6  
+)
+
+
+
+# --------- MatchingFilter ----------------
+ADD_TEST(KeyPointSetsMatchingFilterNew ${FEATUREEXTRACTION_TESTS9} 
+	otbKeyPointSetsMatchingFilterNew)
+
+ADD_TEST(KeyPointSetsMatchingFilter ${FEATUREEXTRACTION_TESTS9}
+--compare-ascii ${EPS}				   
+	${BASELINE}/feTvKeyPointSetsMatchingFilterOutputAscii.txt
+	${TEMP}/feTvKeyPointSetsMatchingFilterOutputAscii.txt	
+    otbKeyPointSetsMatchingFilter
+	${TEMP}/feTvKeyPointSetsMatchingFilterOutputAscii.txt
+	0.6 0
+)
 	
-				     
+#--------- LandMark
+ADD_TEST(LandmarkNew ${FEATUREEXTRACTION_TESTS9} 
+	otbLandmarkNew)			     
 
 # A enrichir
 SET(BasicFeatureExtraction_SRCS1
@@ -992,6 +1034,12 @@ otbImageFittingPolygonListFilterNew.cxx
 otbImageToSURFKeyPointSetFilterNew.cxx	
 otbImageToSURFKeyPointSetFilterOutputImage.cxx
 otbImageToSURFKeyPointSetFilterOutputAscii.cxx
+otbImageToFastSIFTKeyPointSetFilterNew.cxx
+otbImageToFastSIFTKeyPointSetFilterOutputImage.cxx
+otbImageToFastSIFTKeyPointSetFilterOutputAscii.cxx
+otbKeyPointSetsMatchingFilterNew.cxx
+otbKeyPointSetsMatchingFilter.cxx
+otbLandmarkNew.cxx
 )
 
 INCLUDE_DIRECTORIES("${OTBTesting_BINARY_DIR}")
diff --git a/Testing/Code/FeatureExtraction/otbFeatureExtractionTests9.cxx b/Testing/Code/FeatureExtraction/otbFeatureExtractionTests9.cxx
index 71c377316acbc924a892e50d34205ebb543e8f2d..158296d9ac21f1a9c67d673550af56784a7bc6eb 100644
--- a/Testing/Code/FeatureExtraction/otbFeatureExtractionTests9.cxx
+++ b/Testing/Code/FeatureExtraction/otbFeatureExtractionTests9.cxx
@@ -40,4 +40,10 @@ REGISTER_TEST(otbImageFittingPolygonListFilterNew);
 REGISTER_TEST(otbImageToSURFKeyPointSetFilterNew);
 REGISTER_TEST(otbImageToSURFKeyPointSetFilterOutputImage);
 REGISTER_TEST(otbImageToSURFKeyPointSetFilterOutputAscii);
+REGISTER_TEST(otbImageToFastSIFTKeyPointSetFilterNew);
+REGISTER_TEST(otbImageToFastSIFTKeyPointSetFilterOutputImage);
+REGISTER_TEST(otbImageToFastSIFTKeyPointSetFilterOutputAscii);
+REGISTER_TEST(otbKeyPointSetsMatchingFilterNew);
+REGISTER_TEST(otbKeyPointSetsMatchingFilter);
+REGISTER_TEST(otbLandmarkNew);
 }
diff --git a/Testing/Code/FeatureExtraction/otbImageToFastSIFTKeyPointSetFilterNew.cxx b/Testing/Code/FeatureExtraction/otbImageToFastSIFTKeyPointSetFilterNew.cxx
new file mode 100644
index 0000000000000000000000000000000000000000..25893652e139170eddabcdff93dd47f6232b2bb1
--- /dev/null
+++ b/Testing/Code/FeatureExtraction/otbImageToFastSIFTKeyPointSetFilterNew.cxx
@@ -0,0 +1,37 @@
+/*=========================================================================
+
+Program:   ORFEO Toolbox
+Language:  C++
+Date:      $Date$
+Version:   $Revision$
+
+
+Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
+See OTBCopyright.txt for details.
+
+
+This software is distributed WITHOUT ANY WARRANTY; without even
+the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+PURPOSE.  See the above copyright notices for more information.
+
+=========================================================================*/
+#include "otbSiftFastImageFilter.h"
+#include "otbImage.h"
+#include "itkPointSet.h"
+#include "itkVariableLengthVector.h"
+
+int otbImageToFastSIFTKeyPointSetFilterNew(int argc, char * argv[])
+{
+  typedef float RealType;
+  const unsigned int Dimension =2;
+
+  typedef otb::Image<RealType,Dimension> ImageType;
+  typedef itk::VariableLengthVector<RealType> RealVectorType;
+  typedef itk::PointSet<RealVectorType,Dimension> PointSetType;
+  typedef otb::SiftFastImageFilter<ImageType,PointSetType> ImageToFastSIFTKeyPointSetFilterType;
+  
+  // Instantiating object
+  ImageToFastSIFTKeyPointSetFilterType::Pointer object = ImageToFastSIFTKeyPointSetFilterType::New();
+  
+  return EXIT_SUCCESS;
+}
diff --git a/Testing/Code/FeatureExtraction/otbImageToFastSIFTKeyPointSetFilterOutputAscii.cxx b/Testing/Code/FeatureExtraction/otbImageToFastSIFTKeyPointSetFilterOutputAscii.cxx
new file mode 100644
index 0000000000000000000000000000000000000000..9e58dc90273024d6e85f08f36941c318079e5110
--- /dev/null
+++ b/Testing/Code/FeatureExtraction/otbImageToFastSIFTKeyPointSetFilterOutputAscii.cxx
@@ -0,0 +1,87 @@
+/*=========================================================================
+
+Program:   ORFEO Toolbox
+Language:  C++
+Date:      $Date$
+Version:   $Revision$
+
+
+Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
+See OTBCopyright.txt for details.
+
+
+This software is distributed WITHOUT ANY WARRANTY; without even
+the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+PURPOSE.  See the above copyright notices for more information.
+
+=========================================================================*/
+#include "otbSiftFastImageFilter.h"
+#include "otbImage.h"
+#include "otbImageFileReader.h"
+#include "otbImageFileWriter.h"
+#include "itkPointSet.h"
+#include "itkVariableLengthVector.h"
+#include "otbRationalQuotientResampleImageFilter.h"
+#include "itkRGBPixel.h"
+#include "itkImageRegionIterator.h"
+
+#include <iostream>
+#include <fstream>
+
+int otbImageToFastSIFTKeyPointSetFilterOutputAscii(int argc, char * argv[])
+{
+  const char * infname = argv[1];
+  const char * outfname = argv[2];
+
+  const unsigned int scales = atoi(argv[3]);
+
+  typedef float RealType;
+  const unsigned int Dimension =2;
+
+  typedef otb::Image<RealType,Dimension> ImageType;
+  typedef itk::VariableLengthVector<RealType> RealVectorType;
+  typedef otb::ImageFileReader<ImageType> ReaderType;
+  typedef itk::PointSet<RealVectorType,Dimension> PointSetType;
+  typedef otb::SiftFastImageFilter<ImageType,PointSetType> ImageToFastSIFTKeyPointSetFilterType;
+  
+  // PointSet iterator types
+  typedef PointSetType::PointsContainer PointsContainerType;
+  typedef PointsContainerType::Iterator PointsIteratorType;
+  typedef PointSetType::PointDataContainer PointDataContainerType;
+  typedef PointDataContainerType::Iterator PointDataIteratorType;
+
+  // Instantiating object
+  ReaderType::Pointer reader = ReaderType::New();
+  ImageToFastSIFTKeyPointSetFilterType::Pointer filter = ImageToFastSIFTKeyPointSetFilterType::New();
+
+  reader->SetFileName(infname);
+  filter->SetInput(0,reader->GetOutput());
+  filter->SetNumberOfScales(scales);
+  filter->Update();
+
+  PointsIteratorType pIt = filter->GetOutput()->GetPoints()->Begin();
+  PointDataIteratorType pDataIt = filter->GetOutput()->GetPointData()->Begin();
+
+  std::ofstream outfile(outfname);
+
+  outfile << "Number of scales: "<<scales << std::endl;
+  outfile << "Number of SIFT key points: " << filter->GetOutput()->GetNumberOfPoints() << std::endl;
+  while( pIt!=filter->GetOutput()->GetPoints()->End() )
+    {
+      outfile << "[" << std::fixed << std::setprecision(2) << pIt.Value()[0] << ", " << std::setprecision(2) << pIt.Value()[1] << "][";
+
+      unsigned int lIterDesc=0;
+      while (lIterDesc < pDataIt.Value().Size())
+	{
+	  outfile << std::setprecision(3) << pDataIt.Value()[lIterDesc] << " ";
+	  lIterDesc++;
+	}
+      outfile << "]" << std::endl;
+      ++pIt;
+      ++pDataIt;
+    }
+
+  outfile.close();
+
+  return EXIT_SUCCESS;
+}
diff --git a/Testing/Code/FeatureExtraction/otbImageToFastSIFTKeyPointSetFilterOutputImage.cxx b/Testing/Code/FeatureExtraction/otbImageToFastSIFTKeyPointSetFilterOutputImage.cxx
new file mode 100644
index 0000000000000000000000000000000000000000..3f08c2426d6387cfc4a072f9f4553e9e2d430c43
--- /dev/null
+++ b/Testing/Code/FeatureExtraction/otbImageToFastSIFTKeyPointSetFilterOutputImage.cxx
@@ -0,0 +1,162 @@
+/*=========================================================================
+
+Program:   ORFEO Toolbox
+Language:  C++
+Date:      $Date$
+Version:   $Revision$
+
+
+Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
+See OTBCopyright.txt for details.
+
+
+This software is distributed WITHOUT ANY WARRANTY; without even
+the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+PURPOSE.  See the above copyright notices for more information.
+
+=========================================================================*/
+#include "otbSiftFastImageFilter.h"
+#include "otbImage.h"
+#include "otbImageFileReader.h"
+#include "otbImageFileWriter.h"
+#include "itkPointSet.h"
+#include "itkVariableLengthVector.h"
+#include "otbRationalQuotientResampleImageFilter.h"
+#include "itkRGBPixel.h"
+#include "itkImageRegionIterator.h"
+
+#include <iostream>
+#include <fstream>
+
+int otbImageToFastSIFTKeyPointSetFilterOutputImage(int argc, char * argv[])
+{
+  const char * infname = argv[1];
+  const char * outputImageFilename = argv[2];
+
+  const unsigned int scales = atoi(argv[3]);
+
+  typedef float RealType;
+  const unsigned int Dimension =2;
+
+  typedef otb::Image<RealType,Dimension> ImageType;
+  typedef itk::VariableLengthVector<RealType> RealVectorType;
+  typedef otb::ImageFileReader<ImageType> ReaderType;
+  typedef itk::PointSet<RealVectorType,Dimension> PointSetType;
+  typedef otb::SiftFastImageFilter<ImageType,PointSetType> ImageToFastSIFTKeyPointSetFilterType;
+  
+  // PointSet iterator types
+  typedef PointSetType::PointsContainer PointsContainerType;
+  typedef PointsContainerType::Iterator PointsIteratorType;
+
+  // Instantiating object
+  ReaderType::Pointer reader = ReaderType::New();
+  ImageToFastSIFTKeyPointSetFilterType::Pointer filter = ImageToFastSIFTKeyPointSetFilterType::New();
+
+  reader->SetFileName(infname);
+  filter->SetInput(0,reader->GetOutput());
+  filter->SetNumberOfScales(scales);
+  filter->Update();
+
+  ImageType::OffsetType t = {{ 0, 1}};
+  ImageType::OffsetType b = {{ 0,-1}};
+  ImageType::OffsetType l = {{ 1, 0}};
+  ImageType::OffsetType r = {{-1, 0}};
+
+  typedef unsigned char PixelType;
+  typedef otb::Image<PixelType,2> UCharImageType;
+
+  typedef itk::RGBPixel<PixelType> RGBPixelType;
+  typedef otb::Image<RGBPixelType, 2> OutputImageType;
+
+  typedef otb::ImageFileWriter<OutputImageType> WriterType;
+  OutputImageType::Pointer outputImage = OutputImageType::New();
+  OutputImageType::RegionType region;
+
+  OutputImageType::SizeType outputSize;
+  outputSize[0] = reader->GetOutput()->GetLargestPossibleRegion().GetSize()[0];
+  outputSize[1] = reader->GetOutput()->GetLargestPossibleRegion().GetSize()[1];
+  region.SetSize(outputSize);
+
+  OutputImageType::IndexType indexStart;
+  indexStart[0] = 0;
+  indexStart[1] = 0;
+  region.SetIndex(indexStart);
+
+  outputImage->SetRegions(region);
+  outputImage->Allocate();
+
+  itk::ImageRegionIterator<OutputImageType> iterOutput(outputImage,
+						       outputImage->GetLargestPossibleRegion());
+  itk::ImageRegionIterator<ImageType> iterInput(reader->GetOutput(),
+						reader->GetOutput()->GetLargestPossibleRegion());
+
+  for (iterOutput.GoToBegin(), iterInput.GoToBegin();
+       !iterOutput.IsAtEnd();
+       ++iterOutput, ++iterInput)
+    {
+      OutputImageType::PixelType rgbPixel;
+      rgbPixel.SetRed( static_cast<PixelType>(iterInput.Get()) );
+      rgbPixel.SetGreen( static_cast<PixelType>(iterInput.Get()) );
+      rgbPixel.SetBlue( static_cast<PixelType>(iterInput.Get()) );
+
+      iterOutput.Set(rgbPixel);
+    }
+
+  WriterType::Pointer writerTmp = WriterType::New();
+  writerTmp->SetFileName(outputImageFilename);
+  writerTmp->SetInput(outputImage);
+  writerTmp->Update();
+
+  std::cout << "Copy Input image in Output image" << std::endl;
+
+  PointsIteratorType pIt = filter->GetOutput()->GetPoints()->Begin();
+  ImageType::SpacingType spacing = reader->GetOutput()->GetSpacing();
+  ImageType::PointType origin = reader->GetOutput()->GetOrigin();
+  OutputImageType::SizeType size = outputImage->GetLargestPossibleRegion().GetSize();
+
+  while( pIt != filter->GetOutput()->GetPoints()->End() )
+    {
+      ImageType::IndexType index;
+
+      index[0] = (unsigned int)
+	(vcl_floor
+	 ((double)((pIt.Value()[0]-origin[0])/spacing[0]+0.5)));
+
+      index[1] = (unsigned int)
+	(vcl_floor
+	 ((double)((pIt.Value()[1]-origin[1])/spacing[1]+0.5)));
+
+      OutputImageType::PixelType keyPixel;
+      keyPixel.SetRed(0);
+      keyPixel.SetGreen(255);
+      keyPixel.SetBlue(0);
+
+      if (outputImage->GetLargestPossibleRegion().IsInside(index))
+	{
+	  outputImage->SetPixel(index,keyPixel);
+
+	  if (outputImage->GetLargestPossibleRegion().IsInside(index+t))
+	    outputImage->SetPixel(index+t,keyPixel);
+
+	  if (outputImage->GetLargestPossibleRegion().IsInside(index+b))
+	    outputImage->SetPixel(index+b,keyPixel);
+
+	  if (outputImage->GetLargestPossibleRegion().IsInside(index+l))
+	    outputImage->SetPixel(index+l,keyPixel);
+
+	  if (outputImage->GetLargestPossibleRegion().IsInside(index+r))
+	    outputImage->SetPixel(index+r,keyPixel);
+	}
+      ++pIt;
+    }
+
+  std::cout << "Copy sift key" << std::endl;
+
+  WriterType::Pointer writer = WriterType::New();
+  writer->SetFileName(outputImageFilename);
+  writer->SetInput(outputImage);
+  writer->Update();
+
+  std::cout << "Write image" << std::endl;
+  return EXIT_SUCCESS;
+}
diff --git a/Testing/Code/FeatureExtraction/otbKeyPointSetsMatchingFilter.cxx b/Testing/Code/FeatureExtraction/otbKeyPointSetsMatchingFilter.cxx
new file mode 100755
index 0000000000000000000000000000000000000000..57ed0cc12c85eefac51cec122668cec3279f3311
--- /dev/null
+++ b/Testing/Code/FeatureExtraction/otbKeyPointSetsMatchingFilter.cxx
@@ -0,0 +1,107 @@
+/*=========================================================================
+
+  Program:   ORFEO Toolbox
+  Language:  C++
+  Date:      $Date$
+  Version:   $Revision$
+
+
+  Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
+  See OTBCopyright.txt for details.
+
+
+  This software is distributed WITHOUT ANY WARRANTY; without even 
+  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+  PURPOSE.  See the above copyright notices for more information.
+
+=========================================================================*/
+#if defined(_MSC_VER)
+#pragma warning ( disable : 4786 )
+#endif
+
+#include "otbKeyPointSetsMatchingFilter.h"
+
+#include "itkVariableLengthVector.h"
+#include "itkPointSet.h"
+
+#include <iostream>
+#include <fstream>
+
+int otbKeyPointSetsMatchingFilter(int argc, char* argv[])
+{
+
+  const char * outfname = argv[1];
+  const double thresh        = atof(argv[2]);
+  const bool useBackMatching = atoi(argv[3]);
+  
+  typedef itk::VariableLengthVector<double> PointDataType;
+  typedef itk::PointSet<PointDataType,2>    PointSetType;
+  typedef PointSetType::PointType           PointType;
+  typedef otb::KeyPointSetsMatchingFilter<PointSetType> MatchingFilterType;
+  typedef MatchingFilterType::LandmarkListType LandmarkListType;
+
+
+  // instantiation
+  MatchingFilterType::Pointer filter = MatchingFilterType::New();
+
+  filter->SetUseBackMatching(useBackMatching);
+  filter->SetDistanceThreshold(thresh);
+
+  // Building two pointsets
+  PointSetType::Pointer ps1 = PointSetType::New();
+  PointSetType::Pointer ps2 = PointSetType::New();
+  
+  PointType p1,p2,p3;
+  
+  p1.Fill(1);
+  p2.Fill(2);
+  p3.Fill(3);
+
+  PointDataType d1(3),d2(3),d3(3),d1b(3),d2b(3),d3b(3);
+
+  d1.Fill(1);
+  d1b.Fill(1);
+
+  d2.Fill(0);
+  d2[0]=10;
+  d2b.Fill(0);
+  d2b[1]=10;
+
+  d3.Fill(2);
+  d3b.Fill(10);
+
+  ps1->SetPoint(0,p1);
+  ps1->SetPoint(1,p2);
+  ps1->SetPoint(2,p3);
+
+  ps2->SetPoint(0,p1);
+  ps2->SetPoint(1,p2);
+  ps2->SetPoint(2,p3);
+  
+  ps1->SetPointData(0,d1);
+  ps1->SetPointData(1,d2);
+  ps1->SetPointData(2,d3);
+
+  ps2->SetPointData(0,d1b);
+  ps2->SetPointData(1,d2b);
+  ps2->SetPointData(2,d3b);
+
+  filter->SetInput1(ps1);
+  filter->SetInput2(ps2);
+  
+  filter->Update();
+
+  LandmarkListType * matching = filter->GetOutput();
+  
+  std::ofstream outfile(outfname);
+  outfile <<"Matches: "<<std::endl;
+  
+  for(LandmarkListType::Iterator it = matching->Begin(); it != matching->End();++it)
+    {
+      outfile<<"Matching: "<<it.Get()->GetPoint1()<< " " << it.Get()->GetPointData1()<< " <- "<<it.Get()->GetLandmarkData() << " -> "<<it.Get()->GetPoint2()<< " " << it.Get()->GetPointData2() << std::endl;
+    }
+  
+  outfile.close();
+
+  return EXIT_SUCCESS;
+}
diff --git a/Testing/Code/FeatureExtraction/otbKeyPointSetsMatchingFilterNew.cxx b/Testing/Code/FeatureExtraction/otbKeyPointSetsMatchingFilterNew.cxx
new file mode 100644
index 0000000000000000000000000000000000000000..2377d866cfc029292bd9207d10ac166cf29cc6a9
--- /dev/null
+++ b/Testing/Code/FeatureExtraction/otbKeyPointSetsMatchingFilterNew.cxx
@@ -0,0 +1,35 @@
+/*=========================================================================
+
+Program:   ORFEO Toolbox
+Language:  C++
+Date:      $Date$
+Version:   $Revision$
+
+
+Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
+See OTBCopyright.txt for details.
+
+
+This software is distributed WITHOUT ANY WARRANTY; without even
+the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
+PURPOSE.  See the above copyright notices for more information.
+
+=========================================================================*/
+#include "itkPointSet.h"
+#include "itkVariableLengthVector.h"
+#include "otbKeyPointSetsMatchingFilter.h"
+
+int otbKeyPointSetsMatchingFilterNew(int argc, char * argv[])
+{
+  typedef float RealType;
+  const unsigned int Dimension =2;
+
+  typedef itk::VariableLengthVector<RealType> RealVectorType;
+  typedef itk::PointSet<RealVectorType,Dimension> PointSetType;
+  typedef otb::KeyPointSetsMatchingFilter<PointSetType> EuclideanDistanceMatchingFilterType;
+
+  // Instantiating object
+   EuclideanDistanceMatchingFilterType::Pointer object = EuclideanDistanceMatchingFilterType::New();
+  
+  return EXIT_SUCCESS;
+}
diff --git a/Testing/Code/FeatureExtraction/otbLandmarkNew.cxx b/Testing/Code/FeatureExtraction/otbLandmarkNew.cxx
new file mode 100755
index 0000000000000000000000000000000000000000..835213f667b15d5ceadc1be8d6b4abf9d4ab3640
--- /dev/null
+++ b/Testing/Code/FeatureExtraction/otbLandmarkNew.cxx
@@ -0,0 +1,37 @@
+/*=========================================================================
+
+  Program:   ORFEO Toolbox
+  Language:  C++
+  Date:      $Date$
+  Version:   $Revision$
+
+
+  Copyright (c) Centre National d'Etudes Spatiales. All rights reserved.
+  See OTBCopyright.txt for details.
+
+
+  This software is distributed WITHOUT ANY WARRANTY; without even 
+  the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR 
+  PURPOSE.  See the above copyright notices for more information.
+
+=========================================================================*/
+#if defined(_MSC_VER)
+#pragma warning ( disable : 4786 )
+#endif
+
+#include "otbLandmark.h"
+
+#include "itkVariableLengthVector.h"
+#include "itkPoint.h"
+
+int otbLandmarkNew(int argc, char* argv[])
+{
+  typedef itk::Point<double,2> PointType;
+  typedef itk::VariableLengthVector<double> PointDataType;
+  typedef otb::Landmark<PointType,PointDataType,double> LandmarkType;
+
+  // instantiation
+  LandmarkType::Pointer landmark = LandmarkType::New();
+
+  return EXIT_SUCCESS;
+}
diff --git a/Testing/Utilities/CMakeLists.txt b/Testing/Utilities/CMakeLists.txt
index 50fe2046c9082b6f633c30294fde62757f4601dd..eb83a25bad160d9f98254a25b1849fa3282cbf68 100644
--- a/Testing/Utilities/CMakeLists.txt
+++ b/Testing/Utilities/CMakeLists.txt
@@ -463,6 +463,7 @@ kmlprettykml.cc
 kmlprintgeometry.cc
 kmlhellofeatures.cc
 kmlprint.cc
+siftfast.cpp
 )
 
 IF(OTB_COMPILE_JPEG2000)
@@ -498,7 +499,7 @@ IF(NOT BUILD_SHARED_LIBS)
 ENDIF(NOT BUILD_SHARED_LIBS)
 
 ADD_EXECUTABLE(otbUtilitiesTests otbUtilitiesTests.cxx ${UtilitiesTests_SRCS})
-TARGET_LINK_LIBRARIES(otbUtilitiesTests OTBIO OTBCommon gdal ITKIO ITKAlgorithms ITKStatistics ITKCommon otbossim otbsvm otb6S tinyXML otbkml )
+TARGET_LINK_LIBRARIES(otbUtilitiesTests OTBIO OTBCommon gdal ITKIO ITKAlgorithms ITKStatistics ITKCommon otbossim otbsvm otb6S tinyXML otbkml otbsiftfast)
 
 IF(OTB_COMPILE_JPEG2000)
   TARGET_LINK_LIBRARIES(otbUtilitiesTests otbopenjpeg)
@@ -516,7 +517,6 @@ IF(UNIX)
     TARGET_LINK_LIBRARIES (otbUtilitiesTests m)
 ENDIF(UNIX)
 
-
 # -------       CXX and EXECUTABLES files for GALIB Tests -----------------------------------
 #FOREACH(loop_var RANGE 1 27 1)
 #        ADD_EXECUTABLE(otbGalibTests${loop_var} galibTests${loop_var}.cxx)
diff --git a/Testing/Utilities/otbUtilitiesTests.cxx b/Testing/Utilities/otbUtilitiesTests.cxx
index c9db5821bbab67aa2353cd3926cabdc97474ee82..f8f3175d1b205cbe86db1c1458dcfdace95d162f 100644
--- a/Testing/Utilities/otbUtilitiesTests.cxx
+++ b/Testing/Utilities/otbUtilitiesTests.cxx
@@ -66,5 +66,6 @@ REGISTER_TEST(kmlhellohref);
 REGISTER_TEST(kmlhellokmz);
 REGISTER_TEST(kmlhelloregion);
 REGISTER_TEST(kmlprettykml);
+REGISTER_TEST(SiftFast);
 }
 
diff --git a/Testing/Utilities/siftfast.cpp b/Testing/Utilities/siftfast.cpp
new file mode 100755
index 0000000000000000000000000000000000000000..53d822573fc635c9a916079a10301085fe3e460f
--- /dev/null
+++ b/Testing/Utilities/siftfast.cpp
@@ -0,0 +1,157 @@
+// exact C++ implementation of lowe's sift program
+// author: zerofrog(@gmail.com), Sep 2008
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+//This program is distributed in the hope that it will be useful,
+//but WITHOUT ANY WARRANTY; without even the implied warranty of
+//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//Lesser GNU General Public License for more details.
+//
+//You should have received a copy of the GNU Lesser General Public License
+//along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+// This source code was carefully calibrated to match David Lowe's SIFT features program
+#include "siftfast.h"
+
+#include <iostream>
+
+#include <sys/timeb.h>    // ftime(), struct timeb
+
+#ifdef _WIN32
+#include <io.h>
+#include <fcntl.h>
+#endif
+
+#include <cstdlib>
+#include <assert.h>
+
+using namespace std;
+
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+inline u32 timeGetTime()
+{
+#ifdef _WIN32
+    _timeb t;
+    _ftime(&t);
+#else
+    timeb t;
+    ftime(&t);
+#endif
+
+    return (u32)(t.time*1000+t.millitm);
+}
+
+// code from david lowe
+void SkipComments(FILE *fp)
+{
+    int ch;
+
+    fscanf(fp," "); // Skip white space.
+    while ((ch = fgetc(fp)) == '#') {
+        while ((ch = fgetc(fp)) != '\n'  &&  ch != EOF)
+            ;
+        fscanf(fp," ");
+    }
+    ungetc(ch, fp); // Replace last character read.
+}
+
+// code from david lowe
+SiftFastImage ReadPGM(FILE *fp)
+{
+    int char1, char2, width, height, max, c1, c2, c3, r, c;
+    SiftFastImage image, nextimage;
+
+    char1 = fgetc(fp);
+    char2 = fgetc(fp);
+    SkipComments(fp);
+    c1 = fscanf(fp, "%d", &width);
+    SkipComments(fp);
+    c2 = fscanf(fp, "%d", &height);
+    SkipComments(fp);
+    c3 = fscanf(fp, "%d", &max);
+
+    if (char1 != 'P' || char2 != '5' || c1 != 1 || c2 != 1 || c3 != 1 || max > 255) {
+        cerr << "Input is not a standard raw 8-bit PGM file." << endl
+             << "Use xv or pnmdepth to convert file to 8-bit PGM format." << endl;
+        exit(1);
+    }
+
+    fgetc(fp);  // Discard exactly one byte after header.
+
+    // Create floating point image with pixels in range [0,1].
+    image = CreateImage(height, width);
+    for (r = 0; r < height; r++)
+        for (c = 0; c < width; c++)
+            image->pixels[r*image->stride+c] = ((float) fgetc(fp)) / 255.0;
+
+    //Check if there is another image in this file, as the latest PGM
+    // standard allows for multiple images.
+    SkipComments(fp);
+    if (getc(fp) == 'P') {
+        cerr << "ignoring other images" << endl;
+        ungetc('P', fp);
+        nextimage = ReadPGM(fp);
+        //image->next = nextimage;
+    }
+    return image;
+}
+
+int SiftFast(int argc, char *argv[])
+{
+#ifdef _WIN32
+    // have to set to binary
+     _setmode(_fileno(stdin),_O_BINARY);
+#endif
+
+    SiftFastImage image = ReadPGM(stdin);
+    Keypoint keypts;
+    float fproctime;
+
+    cerr << "Finding keypoints (image " << image->cols << "x" << image->rows << ")..." << endl;
+    {
+        u32 basetime = timeGetTime();
+        keypts = GetKeypoints(image,3);
+        fproctime = (timeGetTime()-basetime)*0.001f;
+    }
+
+    // write the keys to the output
+    int numkeys = 0;
+    Keypoint key = keypts;
+    while(key) {
+        numkeys++;
+        key = key->next;
+    }
+
+    cerr << numkeys << " keypoints found in " << fproctime << " seconds." << endl;
+
+    cout << numkeys << " " << 128 << endl;
+    key = keypts;
+    while(key) {
+        cout << key->row << " " << key->col << " " << key->scale << " " << key->ori << endl;
+
+        for(int i = 0; i < 128; ++i) {
+            int intdesc = (int)(key->descrip[i]*512.0f);
+            assert( intdesc >= 0 );
+            
+            if( intdesc > 255 )
+                intdesc = 255;
+            cout << intdesc << " ";
+            if( (i&15)==15 )
+                cout << endl;
+        }
+        cout << endl;
+        key = key->next;
+    }
+
+    FreeKeypoints(keypts);
+    DestroyAllResources();
+
+    return 0;
+}
diff --git a/Utilities/CMakeLists.txt b/Utilities/CMakeLists.txt
index 81215d88ef77b1eb875093b48fd6494474c8257b..23d431b7c92bd79bc66e9bb3336468264951b96e 100644
--- a/Utilities/CMakeLists.txt
+++ b/Utilities/CMakeLists.txt
@@ -27,7 +27,7 @@ ENDIF(NOT OTB_USE_EXTERNAL_EXPAT)
 
 #Supress libraries not used by the 2.2.0 version
 #SUBDIRS(BGL otbsvm dxflib InsightJournal otbossim otb6S otbgeotiff tinyXMLlib otbgalib otbkml)
-SUBDIRS(BGL otbsvm dxflib InsightJournal otbossim otbossimplugins otb6S otbgeotiff tinyXMLlib otbkml otbliblas otbedison)
+SUBDIRS(BGL otbsvm dxflib InsightJournal otbossim otbossimplugins otb6S otbgeotiff tinyXMLlib otbkml otbliblas otbedison otbsiftfast)
 
 IF(BUILD_TESTING)
         SUBDIRS( Dart )
diff --git a/Utilities/otbsiftfast/AUTHORS b/Utilities/otbsiftfast/AUTHORS
new file mode 100755
index 0000000000000000000000000000000000000000..d645f229f298a58cf43f1e0a119379b8640f58ea
--- /dev/null
+++ b/Utilities/otbsiftfast/AUTHORS
@@ -0,0 +1 @@
+zerofrog(@gmail.com)
\ No newline at end of file
diff --git a/Utilities/otbsiftfast/CMakeLists.txt b/Utilities/otbsiftfast/CMakeLists.txt
new file mode 100755
index 0000000000000000000000000000000000000000..fdbf2a4fdd6603cd3a6999020668bccc07dc0140
--- /dev/null
+++ b/Utilities/otbsiftfast/CMakeLists.txt
@@ -0,0 +1,114 @@
+cmake_minimum_required (VERSION 2.4.7)
+project (otbsiftfast)
+set( CMAKE_ALLOW_LOOSE_LOOP_CONSTRUCTS TRUE )
+
+include(CheckIncludeFile)
+include(CheckLibraryExists)
+include(CheckCXXSourceRuns)
+include(CheckCXXCompilerFlag)
+
+# check for SSE extensions
+if( CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX )
+  set(SSE_FLAGS)
+
+  set(CMAKE_REQUIRED_FLAGS "-msse2")
+  check_cxx_source_runs("
+    #include <emmintrin.h>
+
+    int main()
+    {
+        __m128d a, b;
+        double vals[2] = {0};
+        a = _mm_loadu_pd(vals);
+        b = _mm_add_pd(a,a);
+        _mm_storeu_pd(vals,b);
+        return 0;
+     }"
+     HAS_SSE2_EXTENSIONS)
+
+  set(CMAKE_REQUIRED_FLAGS "-msse")
+  check_cxx_source_runs("
+    #include <xmmintrin.h>
+    int main()
+    {
+        __m128 a, b;
+        float vals[4] = {0};
+        a = _mm_loadu_ps(vals);
+        b = a;
+        b = _mm_add_ps(a,b);
+        _mm_storeu_ps(vals,b);
+        return 0;
+    }"
+    HAS_SSE_EXTENSIONS)
+
+  set(CMAKE_REQUIRED_FLAGS)
+
+  if(HAS_SSE2_EXTENSIONS)
+    message(STATUS "Using SSE2 extensions")
+    set(SSE_FLAGS "-msse2 -mfpmath=sse")
+  elseif(HAS_SSE_EXTENSIONS)
+    message(STATUS "Using SSE extensions")
+    set(SSE_FLAGS "-msse -mfpmath=sse")
+  endif()
+
+  add_definitions(${SSE_FLAGS})
+elseif(MSVC)
+  check_cxx_source_runs("
+    #include <emmintrin.h>
+
+    int main()
+    {
+        __m128d a, b;
+        double vals[2] = {0};
+        a = _mm_loadu_pd(vals);
+        b = _mm_add_pd(a,a);
+        _mm_storeu_pd(vals,b);
+        return 0;
+     }"
+     HAS_SSE2_EXTENSIONS)
+  if( HAS_SSE2_EXTENSIONS )
+    message(STATUS "Using SSE2 extensions")
+    add_definitions( "/arch:SSE2 /fp:fast -D__SSE__ -D__SSE2__" )
+  endif()
+endif()
+
+add_library(otbsiftfast libsiftfast.cpp)
+
+# check for OpenMP
+if( NOT DEFINED USE_OPENMP OR USE_OPENMP  )
+
+  if( WIN32 )
+    CHECK_INCLUDE_FILE(omp.h HAVE_OMP_H)
+    if( HAVE_OMP_H )
+      message(STATUS "Using OpenMP")
+      check_cxx_compiler_flag(/openmp HAVE_OPENMP)
+
+      if( HAVE_OPENMP )
+        add_definitions("/openmp")
+      endif()
+    endif()
+  elseif(CMAKE_COMPILER_IS_GNUCC OR CMAKE_COMPILER_IS_GNUCXX)
+
+    # check if compilers supports -fopenmp
+    INCLUDE(CheckCCompilerFlag)
+    check_cxx_compiler_flag(-fopenmp HAVE_OPENMP)
+    check_library_exists(gomp omp_get_num_threads "" HAS_GOMP_LIB)
+
+    if( HAVE_OPENMP AND HAS_GOMP_LIB )
+      add_definitions("-fopenmp")
+      target_link_libraries(otbsiftfast gomp)
+      set(OPENMP_LFLAGS "-lgomp")
+    endif()
+  endif()
+endif()
+
+#Instal TARGET & FILES for otb-lib
+
+INSTALL(TARGETS otbsiftfast
+RUNTIME DESTINATION ${OTB_INSTALL_BIN_DIR} COMPONENT RuntimeLibraries
+LIBRARY DESTINATION ${OTB_INSTALL_LIB_DIR} COMPONENT RuntimeLibraries
+ARCHIVE DESTINATION ${OTB_INSTALL_LIB_DIR} COMPONENT Development)
+
+INSTALL(FILES siftfast.h 
+    DESTINATION ${OTB_INSTALL_INCLUDE_DIR}/Utilities/otbsiftfast
+    COMPONENT Development)
diff --git a/Utilities/otbsiftfast/INSTALL b/Utilities/otbsiftfast/INSTALL
new file mode 100755
index 0000000000000000000000000000000000000000..c543c1c02e65f25223200516f3ca3359a439145a
--- /dev/null
+++ b/Utilities/otbsiftfast/INSTALL
@@ -0,0 +1,43 @@
+libsiftfast: author zerofrog(@gmail.com)
+----------------------------------------
+
+The compilation system uses a cross-platform tool called cmake.
+
+Linux/Mac OSX Instructions:
+---------------------------
+
+Just type
+
+> make
+
+This will set the project to be installed in /usr/local. To change the install directory type
+
+> make prefix=/my/new/dir
+
+To change other environment variables that cmake uses after initially making, type "cmake build"
+
+OpenMP:
+
+cmake will use OpenMP if it exists in your system. Note that OpenMP is available only on gcc versions >= 4.2. CMake checks this automatically, but you can force usage or disabling of it by adding "-DUSE_OPENMP=OFF" when manually running cmake. Use ON to force enabling.
+
+Sometimes siftfast might fail to compile with OpenMP because libgomp.so is not setup properly to be used by shared objects. Checkout this tutorial here on how to compile the correct libgomp:
+
+http://openrave.programmingvision.com/index.php?title=Misc:MatlabOpenMP
+
+Basically the problem is that the default libgomp might be compiled with nodlopen flag refusing it to be dynamically loaded. The only way around this is to compile your own libgomp library and make sure libsiftfast is linking to it.
+
+Matlab:
+
+Read this if you are interested in using siftfast.m for matlab on Linux. When compiling a matlab mex file,  you might get a message saying the gcc version is too high. If so, matlab will have a hard time locating the correct libstdc++.so file. In this case, go into /usr/local/share/sys/os/glnx86
+
+and make libgcc_s and libstdc++ point to the /usr/lib versions
+sudo mv libgcc_s.so.1 libgcc_s.so.1.back
+sudo ln -s /lib/libgcc_s.so.1 libgcc_s.so.1
+sudo rm libstdc++.so.6 (this was already a symbolic link)
+sudo ln -s /usr/lib/libstdc++.so.6.0.9 libstdc++.so.6
+
+
+Windows Instructions:
+---------------------
+
+Download cmake (http://www.cmake.org/), make sure to install it in the PATH. Then run runcmake.bat, that should generate visual studio files in the build folder. Open libsiftfast.sln and compile.
diff --git a/Utilities/otbsiftfast/Makefile b/Utilities/otbsiftfast/Makefile
new file mode 100755
index 0000000000000000000000000000000000000000..ea0b05197429b951f9d726da8f99b9b5369474cb
--- /dev/null
+++ b/Utilities/otbsiftfast/Makefile
@@ -0,0 +1,24 @@
+all:
+	@mkdir -p build
+	@if [ $(prefix) ]; then \
+		cd build && cmake -DCMAKE_INSTALL_PREFIX=$(prefix) -DCMAKE_BUILD_TYPE=Release ..; \
+	else \
+		cd build && cmake -DCMAKE_VERBOSE_MAKEFILE=OFF -DCMAKE_BUILD_TYPE=Release ..; \
+	fi
+	cd build && make
+
+install:
+	cd build && make install
+
+uninstall:
+	cd build && make uninstall
+
+test: all
+	cd build && make test
+
+test-future: all
+	cd build && make test-future
+
+clean:
+	-cd build && make clean
+	rm -rf build
diff --git a/Utilities/otbsiftfast/README b/Utilities/otbsiftfast/README
new file mode 100755
index 0000000000000000000000000000000000000000..c12068bf604fad19e833001427ad17a8fa638e99
--- /dev/null
+++ b/Utilities/otbsiftfast/README
@@ -0,0 +1,42 @@
+libsiftfast: author zerofrog(@gmail.com)
+----------------------------------------
+Various utilities to compute the SIFT features of a greyscale image. Packages offered:
+
+*libsiftfast.so - main library that contains the sift code
+
+*siftfast - really fast SIFT implementation using SSE and OpenMP (also fixes some bugs from lowe's code, so outputs are similar, but not exact). The usage is very similar to David Lowe's sift program. To input a greyscale image test.pgm and get the SIFT keys test.key use
+
+> siftfast < test.pgm > test.key
+
+The format of test.key is pretty self explanatory:
+
+#keys #key_dimension
+(for #keys) [x y scale orientation (for #key_dimension)[value] ]
+
+
+*siftfast.m - matlab/octave mex file that uses sift_fast, just do [frames,descr]=sift_mex(grayscale_image);
+           frames is a 4xN matrix of [X,Y,scale,orientation],
+           descr is a 128xN matrix of normalized descriptors
+           To use the mex files, libsift_fast.so, Octave/Matlab need to be able to load it. A way to do it is to add its path to your LD_LIBRARY_PATH in your ~/.bashrc file, or in matlab with:
+           setenv('LD_LIBRARY_PATH',[getenv('LD_LIBRARY_PATH') ':' libsift_directory]);
+           
+
+SIFT is based on
+David G. Lowe, "Distinctive image features from scale-invariant keypoints,
+"International Journal of Computer Vision, 60, 2 (2004), pp. 91-110.
+
+Using the Octave/Matlab mex files
+---------------------------------
+
+the octave and matlab mex files are installed in $prefix/share/siftfast/octave and $prefix/share/siftfast/matlab where $prefix is the installation directory you've provided (via CMAKE_INSTALL_PREFIX). The default value for $prefix is /usr/local
+
+Add the path via addpath('$prefix/share/siftfast/matlab'), and then in octave/matlab type:
+
+> help siftfast
+
+this should give a help file on how to use it.
+
+Comparisons with other SIFT Code
+--------------------------------
+
+The default setting of siftfast produce the same output as Lowe's free sift program. On a quad-core Core2Duo machine with OpenMP, siftfast goes about 6x faster than lowe's sift program for 640x480 images.
diff --git a/Utilities/otbsiftfast/cmake_uninstall.cmake.in b/Utilities/otbsiftfast/cmake_uninstall.cmake.in
new file mode 100755
index 0000000000000000000000000000000000000000..df95fb9d82dd84e040f52b7944abe750c2e10cf7
--- /dev/null
+++ b/Utilities/otbsiftfast/cmake_uninstall.cmake.in
@@ -0,0 +1,21 @@
+IF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
+  MESSAGE(FATAL_ERROR "Cannot find install manifest: \"@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt\"")
+ENDIF(NOT EXISTS "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt")
+
+FILE(READ "@CMAKE_CURRENT_BINARY_DIR@/install_manifest.txt" files)
+STRING(REGEX REPLACE "\n" ";" files "${files}")
+FOREACH(file ${files})
+  MESSAGE(STATUS "Uninstalling \"$ENV{DESTDIR}${file}\"")
+  IF(EXISTS "$ENV{DESTDIR}${file}")
+    EXEC_PROGRAM(
+      "@CMAKE_COMMAND@" ARGS "-E remove \"$ENV{DESTDIR}${file}\""
+      OUTPUT_VARIABLE rm_out
+      RETURN_VALUE rm_retval
+      )
+    IF(NOT "${rm_retval}" STREQUAL 0)
+      MESSAGE(FATAL_ERROR "Problem when removing \"$ENV{DESTDIR}${file}\"")
+    ENDIF(NOT "${rm_retval}" STREQUAL 0)
+  ELSE(EXISTS "$ENV{DESTDIR}${file}")
+    MESSAGE(STATUS "File \"$ENV{DESTDIR}${file}\" does not exist.")
+  ENDIF(EXISTS "$ENV{DESTDIR}${file}")
+ENDFOREACH(file)
diff --git a/Utilities/otbsiftfast/libsiftfast.cpp b/Utilities/otbsiftfast/libsiftfast.cpp
new file mode 100755
index 0000000000000000000000000000000000000000..6c2ee7ca6117513d093fadcff10f365896e5d4d8
--- /dev/null
+++ b/Utilities/otbsiftfast/libsiftfast.cpp
@@ -0,0 +1,1806 @@
+// exact C++ implementation of lowe's sift program
+// author: zerofrog(@gmail.com), Sep 2008
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+//This program is distributed in the hope that it will be useful,
+//but WITHOUT ANY WARRANTY; without even the implied warranty of
+//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//Lesser GNU General Public License for more details.
+//
+//You should have received a copy of the GNU Lesser General Public License
+//along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+// This source code was carefully calibrated to match David Lowe's SIFT features program
+#include <stdlib.h>
+#include <math.h>
+#include <assert.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdarg.h>
+#include <assert.h>
+
+#include <vector>
+#include <iostream>
+#include <map>
+#include <list>
+
+#include <sys/timeb.h>    // ftime(), struct timeb
+
+#ifndef _MSC_VER
+#include <sys/time.h>
+#else
+#define WIN32_LEAN_AND_MEAN
+#include <winsock2.h>
+#endif
+
+#if defined(__SSE3__)
+#include <pmmintrin.h>
+#elif defined(__SSE2__)
+#include <emmintrin.h>
+#elif defined(__SSE__)
+#include <xmmintrin.h>
+#else
+#ifndef _MSC_VER
+#warning "Not compiling with SSE extenions"
+#endif
+#endif
+
+#ifdef _OPENMP
+#include <omp.h>
+#else
+#ifndef _MSC_VER
+#warning "OpenMP not enabled. Use -fopenmp (>=gcc-4.2) or -openmp (icc) for speed enhancements on SMP machines."
+#endif
+#endif
+
+using namespace std;
+
+#define PI 3.141592654f
+#define SQRT2 1.4142136f
+
+// if defined, will profile the critical functions and write results to prof.txt
+//#define DVPROFILE 
+
+// if defined will align all image rows to 16 bytes
+// usually aligning is faster (can save ~100ms), however for 1024x768 
+// cache misses with the enlarged rows make it ~400-500ms slower
+//#define ALIGNED_IMAGE_ROWS
+
+#ifdef ALIGNED_IMAGE_ROWS
+#define _MM_LOAD_ALIGNED _mm_load_ps
+#define _MM_STORE_ALIGNED _mm_store_ps
+#else
+#define _MM_LOAD_ALIGNED _mm_loadu_ps
+#define _MM_STORE_ALIGNED _mm_storeu_ps
+#endif
+
+#ifdef DVPROFILE
+#include "profiler.h"
+#else
+#define DVSTARTPROFILE()
+#endif
+
+typedef struct ImageSt {
+    int rows, cols;          // Dimensions of image.
+    float *pixels;          // 2D array of image pixels.
+    int stride;             // how many floats until the next row
+                            // (used to add padding to make rows aligned to 16 bytes)
+} *Image;
+
+
+// Data structure for a keypoint.  Lists of keypoints are linked by the "next" field.
+typedef struct KeypointSt {
+    float row, col;             // Subpixel location of keypoint.
+    float scale, ori;           // Scale and orientation (range [-PI,PI])
+    float descrip[128];     // Vector of descriptor values
+    struct KeypointSt *next;    // Pointer to next keypoint in list.
+} *Keypoint;
+
+
+int DoubleImSize = 1;
+// was int Scales = 3
+int Scales = 6;
+float InitSigma = 1.6f;
+float PeakThresh;
+
+static list<Keypoint> s_listKeypoints;
+
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+#if defined(__SSE__) && !defined(SIMDMATH_H)
+
+#ifdef _MSC_VER
+typedef __m128           vec_int4;
+typedef __m128           vec_float4;
+#else
+// need an implementation of atan2 in SSE
+typedef int             vec_int4   __attribute__ ((vector_size (16)));
+typedef float           vec_float4 __attribute__ ((vector_size (16)));
+#endif
+
+inline vec_float4 atanf4(  vec_float4 x );
+inline vec_float4 atan2f4( vec_float4 y, vec_float4 x );
+
+#endif
+
+inline u64 GetMicroTime()
+{
+#ifdef _WIN32
+    LARGE_INTEGER count, freq;
+    QueryPerformanceCounter(&count);
+    QueryPerformanceFrequency(&freq);
+    return (count.QuadPart * 1000000) / freq.QuadPart;
+#else
+    struct timeval t;
+    gettimeofday(&t, NULL);
+    return (u64)t.tv_sec*1000000+t.tv_usec;
+#endif
+}
+
+// aligned malloc and free
+inline void* sift_aligned_malloc(size_t size, size_t align)
+{
+    assert( align < 0x10000 );
+	char* p = (char*)malloc(size+align);
+	int off = 2+align - ((int)(size_t)(p+2) % align);
+
+	p += off;
+	*(u16*)(p-2) = off;
+
+	return p;
+}
+
+void sift_aligned_free(void* pmem)
+{
+    if( pmem != NULL ) {
+        char* p = (char*)pmem;
+        free(p - (int)*(u16*)(p-2));
+    }
+}
+
+#if defined(_MSC_VER)
+#define SIFT_ALIGNED16(x) __declspec(align(16)) x
+#else
+#define SIFT_ALIGNED16(x) x __attribute((aligned(16)))
+#endif
+
+extern "C" {
+Image CreateImage(int rows, int cols);
+Image CreateImageFromMatlabData(double* pdata, int rows, int cols);
+void DestroyAllImages();
+Keypoint GetKeypoints(Image porgimage,unsigned int nbScales);
+Image SiftDoubleSize(Image p);
+Image SiftCopyImage(Image p);
+Image HalfImageSize(Image curimage);
+Keypoint OctaveKeypoints(Image pimage, Image* phalfimage, float fscale, Keypoint prevkeypts,unsigned int nbScales);
+void SubtractImage(Image imgdst, Image img0, Image img1);
+void GaussianBlur(Image imgdst, Image image, float fblur);
+void ConvHorizontal(Image imgdst, Image image, float* kernel, int ksize);
+void ConvHorizontalFast(Image imgdst, Image image, float* kernel, int ksize);
+void ConvVertical(Image image, float* kernel, int ksize);
+void ConvVerticalFast(Image image, float* kernel, int ksize);
+void ConvBuffer(float* buf, float* kernel, int cols, int ksize);
+Keypoint FindMaxMin(Image* imdiff, Image* imgaus, float fscale, Keypoint prevkeypts, unsigned int nbScales);
+void GradOriImages(Image imgaus, Image imgrad, Image imorient);
+void GradOriImagesFast(Image imgaus, Image imgrad, Image imorient);
+int LocalMaxMin(float fval, Image imdiff, int row, int col);
+int NotOnEdge(Image imdiff, int row, int col);
+Keypoint InterpKeyPoint(Image* imdiff, int index, int rowstart, int colstart,
+                        Image imgrad, Image imorient, char* pMaxMinArray, float fscale,Keypoint keypts, int steps,unsigned int nbScales);
+float FitQuadratic(float* X, Image* imdiff, int index, int rowstart, int colstart);
+void SolveLinearSystem(float* Y, float* H, int dim);
+Keypoint AssignOriHist(Image imgrad, Image imorient, float fscale, float fSize,
+                       float frowstart, float fcolstart, Keypoint keypts);
+float InterpPeak(float f0, float f1, float f2);
+void SmoothHistogram(float* phist, int numbins);
+Keypoint MakeKeypoint(Image imgrad, Image imorient, float fscale, float fSize,
+                      float frowstart,float fcolstart,float forient, Keypoint keypts);
+void MakeKeypointSample(Keypoint pnewkeypt, Image imgrad, Image imorient,
+                        float fSize, float frowstart, float fcolstart);
+void NormalizeVec(float* pf, int num);
+void KeySample(float* fdesc, Keypoint pnewkeypt, Image imgrad, Image imorient,
+               float fSize, float frowstart, float fcolstart);
+void AddSample(float* fdesc, Keypoint pkeypt, Image imgrad, Image imorient, int r, int c,
+               float rpos, float cpos, float rx, float cx);
+void PlaceInIndex(float* fdesc, float fgrad, float forient, float fnewrow, float fnewcol);
+void FreeKeypoints(Keypoint keypt);
+void DestroyAllResources();
+}
+
+static list<Image> s_listImages;
+Image CreateImage(int rows, int cols)
+{
+    Image im;
+
+    im = (Image) sift_aligned_malloc(sizeof(struct ImageSt),16);
+    im->rows = rows;
+    im->cols = cols;
+
+    // cannot make 16 byte aligned since 1024x768 images 
+#if defined(ALIGNED_IMAGE_ROWS) && defined(__SSE__)
+    im->stride = (cols+3)&~3;
+#else
+    im->stride = cols;
+#endif
+
+    im->pixels = (float*)sift_aligned_malloc(rows*im->stride*sizeof(float)+16,128); // add padding (for sse)
+    s_listImages.push_back(im);
+    return im;
+}
+
+void DestroyAllImages()
+{
+    for(list<Image>::iterator it = s_listImages.begin(); it != s_listImages.end(); ++it) {
+        sift_aligned_free((*it)->pixels);
+        sift_aligned_free(*it);
+    }
+    s_listImages.clear();
+}
+
+Image CreateImageFromMatlabData(double* pdata, int rows, int cols)
+{
+    Image image = CreateImage(rows,cols);
+    float* pixels = image->pixels;
+    
+#ifdef __SSE2__
+    for(int i = 0; i < (rows&~1); i += 2, pixels+=2*image->stride) {
+        for(int j = 0; j < cols; j += 4) {
+            double* pf = &pdata[i+j*rows];
+#ifdef ALIGNED_IMAGE_ROWS
+            __m128d m0 = _mm_load_pd(pf);
+            __m128d m1 = _mm_load_pd(pf+rows);
+            __m128d m2 = _mm_load_pd(pf+2*rows);
+            __m128d m3 = _mm_load_pd(pf+3*rows);
+#else
+            __m128d m0 = _mm_loadu_pd(pf);
+            __m128d m1 = _mm_loadu_pd(pf+rows);
+            __m128d m2 = _mm_loadu_pd(pf+2*rows);
+            __m128d m3 = _mm_loadu_pd(pf+3*rows);
+#endif
+            
+            __m128 mrows0 = _mm_shuffle_ps(_mm_cvtpd_ps(m0),_mm_cvtpd_ps(m1),0x44);
+            __m128 mrows1 = _mm_shuffle_ps(_mm_cvtpd_ps(m2),_mm_cvtpd_ps(m3),0x44);
+
+            _mm_storeu_ps(pixels+j,_mm_shuffle_ps(mrows0,mrows1,0x88));
+            _mm_storeu_ps(pixels+j+image->stride,_mm_shuffle_ps(mrows0,mrows1,0xdd));
+        }
+    }
+
+    if( rows & 1 ) {
+        for(int j = 0; j < cols; ++j)
+            pixels[j] = (float)pdata[rows-1+j*rows];
+    }
+#else
+    for(int i = 0; i < rows; ++i, pixels+=image->stride) {
+        for(int j = 0; j < cols; ++j)
+            pixels[j] = (float)pdata[i+j*rows];
+    }
+#endif
+
+    return image;
+}
+
+static Image* s_imgaus = NULL, *s_imdiff = NULL;
+static Image s_imgrad = NULL, s_imorient = NULL;
+static char* s_MaxMinArray = NULL;
+
+Keypoint GetKeypoints(Image porgimage, unsigned int nbScales)
+{
+    DVSTARTPROFILE();
+
+    PeakThresh = 0.04f/(float)nbScales;
+    s_imgaus = new Image[((27 + 4*nbScales)&0xfffffff0)/4];
+    s_imdiff = new Image[((23 + 4*nbScales)&0xfffffff0)/4];
+
+    Image pimage = NULL;
+    float fscale = 1.0f;
+    Image halfimage = NULL;
+    Keypoint keypts = 0;
+
+    if( DoubleImSize ) {
+        pimage = SiftDoubleSize(porgimage);
+        fscale = 0.5f;
+    }
+    else
+        pimage = SiftCopyImage(porgimage);
+    
+    float fnewscale = 1.0f;
+    if( !DoubleImSize )
+        fnewscale = 0.5f;
+    
+    if( InitSigma > fnewscale ) {
+        GaussianBlur(pimage, pimage, sqrtf(InitSigma*InitSigma - fnewscale*fnewscale));
+    }
+
+    // create the images
+    s_imgaus[0] = pimage;
+    for(int i = 1; i < (int)nbScales+3; ++i)
+        s_imgaus[i] = CreateImage(pimage->rows,pimage->cols);
+    for(int i = 0; i < (int)nbScales+2; ++i)
+        s_imdiff[i] = CreateImage(pimage->rows,pimage->cols);
+    s_imgrad = CreateImage(pimage->rows,pimage->cols);
+    s_imorient = CreateImage(pimage->rows,pimage->cols);
+    s_MaxMinArray = (char*)sift_aligned_malloc(pimage->rows*pimage->cols,16);
+
+    while( pimage->rows > 12 && pimage->cols > 12 ) {
+        keypts = OctaveKeypoints(pimage, &halfimage, fscale, keypts,nbScales);
+        pimage = HalfImageSize(halfimage);
+        fscale += fscale;
+    }
+
+    delete[] s_imgaus;
+    delete[] s_imdiff;
+    sift_aligned_free(s_MaxMinArray);
+
+#ifdef DVPROFILE
+    DVProfWrite("prof.txt");
+#endif
+    
+    return keypts;
+}
+
+Image SiftDoubleSize(Image im)
+{
+    int rows = im->rows, cols = im->cols;
+    int newrows = 2*rows-2, newcols = 2*cols-2;
+    Image newim = CreateImage(newrows, newcols);
+    float* psrc = im->pixels, *pdst = newim->pixels;
+    int stride = im->stride, newstride = newim->stride;
+    for(int i = 0; i < rows-1; ++i, psrc += stride, pdst += 2*newstride) {
+        for(int j = 0; j < cols-1; ++j) {
+            pdst[2*j] = psrc[j];
+            pdst[newstride+2*j] = 0.5f*(psrc[j] + psrc[stride+j]);
+            pdst[2*j+1] = 0.5f*(psrc[j] + psrc[j+1]);
+            pdst[newstride+2*j+1] = 0.25f*(psrc[j] + psrc[j+1] + psrc[stride+j] + psrc[stride+j+1]);
+        }
+    }
+    
+    return newim;
+}
+
+Image SiftCopyImage(Image im)
+{
+    DVSTARTPROFILE();
+    Image newim = CreateImage(im->rows,im->cols);
+    memcpy(newim->pixels, im->pixels, sizeof(float)*im->rows*im->stride);
+    return newim;
+}
+
+Image HalfImageSize(Image curimage)
+{
+    int cols = curimage->cols;
+    int newrows = (curimage->rows+(curimage->rows<0))>>1;
+    int newcols = (cols+(curimage->cols<0))>>1;
+
+    Image halfimage = CreateImage(newrows, newcols);
+    float* psrc = curimage->pixels, *pdst = halfimage->pixels;
+    int stride = curimage->stride, newstride = halfimage->stride;
+
+
+    for(int halfrow = 0; halfrow < newrows; ++halfrow, pdst += newstride, psrc += 2*stride ) {
+        for(int halfcol = 0; halfcol < newcols; ++halfcol) {
+            pdst[halfcol] = psrc[halfcol*2];
+        }
+    }
+
+    return halfimage;
+}
+
+Keypoint OctaveKeypoints(Image pimage, Image* phalfimage, float fscale, Keypoint prevkeypts,unsigned int nbScales)
+{
+    DVSTARTPROFILE();
+
+    float fwidth = powf(2.0f,1.0f / (float)nbScales);
+    float fincsigma = sqrtf(fwidth * fwidth - 1.0f);
+
+    int rows = pimage->rows, cols = pimage->cols, stride = pimage->stride;
+
+    s_imgaus[0] = pimage;
+    float sigma = InitSigma;
+    for(int i = 1; i < (int)nbScales+3; ++i) {
+
+        s_imgaus[i]->rows = rows; s_imgaus[i]->cols = cols; s_imgaus[i]->stride = stride;
+        GaussianBlur(s_imgaus[i], s_imgaus[i-1], fincsigma * sigma);
+        
+        s_imdiff[i-1]->rows = rows; s_imdiff[i-1]->cols = cols; s_imdiff[i-1]->stride = stride;
+        SubtractImage(s_imdiff[i-1],s_imgaus[i-1],s_imgaus[i]);
+
+        sigma *= fwidth;
+    }
+
+    s_imgrad->rows = rows; s_imgrad->cols = cols; s_imgrad->stride = stride;
+    s_imorient->rows = rows; s_imorient->cols = cols; s_imorient->stride = stride;
+    
+    *phalfimage = s_imgaus[nbScales];
+    return FindMaxMin(s_imdiff, s_imgaus, fscale, prevkeypts,nbScales);
+}
+
+// imgdst = img0 - img1
+void SubtractImage(Image imgdst, Image img0, Image img1)
+{
+    int rows = imgdst->rows, cols = imgdst->cols, stride = imgdst->stride;
+    float* _pixels0 = img0->pixels, *_pixels1 = img1->pixels, *_pdst = imgdst->pixels;
+#ifdef __SSE__
+    #pragma omp parallel for schedule(dynamic,32)
+    for(int j = 0; j < rows; ++j ) {
+        float* pixels0 = _pixels0+j*stride;
+        float* pixels1 = _pixels1+j*stride;
+        float* pdst = _pdst + j*stride;
+                    
+        for(int k = 0; k < (cols&~7); k += 8) {
+            _MM_STORE_ALIGNED(pdst+k,_mm_sub_ps(_MM_LOAD_ALIGNED(pixels0+k), _MM_LOAD_ALIGNED(pixels1+k)));
+            _MM_STORE_ALIGNED(pdst+k+4,_mm_sub_ps(_MM_LOAD_ALIGNED(pixels0+k+4), _MM_LOAD_ALIGNED(pixels1+k+4)));
+        }
+
+        for(int k = (cols&~7); k < cols; ++k)
+            pdst[k] = pixels0[k]-pixels1[k];
+    }
+#else
+    for(int j = 0; j < rows; ++j, _pixels0 += stride, _pixels1 += stride, _pdst += stride ) {
+        for(int k = 0; k < cols; ++k) {
+            _pdst[k] = _pixels0[k]-_pixels1[k];
+        }
+    }
+#endif
+}
+
+static map<float, float* > s_mapkernel; // assumes GaussTruncate doesn't change!, if freeing second, subtract 4 bytes
+
+void GaussianBlur(Image imgdst, Image image, float fblur)
+{
+    DVSTARTPROFILE();
+
+    const float GaussTruncate = 4.0f;
+    
+
+    int ksize = (int)(2.0f * GaussTruncate * fblur + 1.0f);
+    if( ksize < 3 )
+        ksize = 3;
+    ksize += !(ksize&1); // make it odd    
+        
+    float* kernel = NULL;
+    for( map<float, float* >::iterator it = s_mapkernel.begin(); it != s_mapkernel.end(); ++it) {
+        if( fabsf(fblur-it->first) < 0.001f ) {
+            kernel = it->second;
+            break;
+        }
+    }
+
+    if( kernel == NULL ) { // have to create a new one
+        double faccum = 0;
+
+        // +4 for alignment and padding issues with sse
+        kernel = (float*)sift_aligned_malloc((ksize+9)*sizeof(float),16)+1;
+        
+        int width = (ksize >= 0 ? ksize : ksize-1)>>1;
+        for(int i = 0; i <= ksize; ++i) {
+            float fweight = expf( - (float)(i-width)*(i-width) / (2.0f*fblur*fblur) );
+            faccum += (double)fweight;
+            kernel[i] = fweight;
+        }
+        
+        for(int i = 0; i < ksize; ++i) // shouldn't it be <=?
+            kernel[i] /= (float)faccum;
+        memset(kernel+ksize,0,sizeof(float)*8);
+
+        s_mapkernel[fblur] = kernel;
+    }
+
+#ifdef __SSE__
+    if( image->cols < 12 )
+        ConvHorizontal(imgdst, image,kernel,ksize);
+    else
+        ConvHorizontalFast(imgdst, image,kernel,ksize);
+
+    ConvVerticalFast(imgdst,kernel,ksize);
+#else
+    ConvHorizontal(imgdst, image,kernel,ksize);
+    ConvVertical(imgdst,kernel,ksize);
+#endif
+}
+
+void ConvHorizontal(Image imgdst, Image image, float* kernel, int ksize)
+{
+    DVSTARTPROFILE();
+
+    static vector<float> _buf; //TODO, make 16 byte aligned
+    _buf.resize(image->cols + ksize);
+    float* buf = &_buf[0];
+    int rows = image->rows, cols = image->cols, stride = image->stride;
+    int width = (ksize >= 0 ? ksize : ksize-1)>>1;
+    float* pixels = image->pixels;
+    float* pdst = imgdst->pixels;
+
+    for(int i = 0; i < rows; ++i, pixels += stride, pdst += stride) {
+        for(int j = 0; j < width; ++j)
+            buf[j] = pixels[0];
+        for(int j = 0; j < cols; ++j)
+            buf[width+j] = pixels[j];
+        for(int j = 0; j < width; ++j)
+            buf[cols+width+j] = pixels[cols-1];
+        ConvBuffer(buf,kernel,cols,ksize);
+
+        memcpy(pdst,buf,sizeof(float)*cols);
+    }
+}
+
+void ConvVertical(Image image, float* kernel, int ksize)
+{
+    DVSTARTPROFILE();
+
+    static vector<float> _buf; //TODO, make 16 byte aligned
+    _buf.resize(image->rows + ksize);
+    float* buf = &_buf[0];
+    int rows = image->rows, cols = image->cols, stride = image->stride;
+    int width = (ksize >= 0 ? ksize : ksize-1)>>1;
+    float* pixels = image->pixels;
+
+    for(int j = 0; j < cols; ++j, pixels += 1) {
+        for(int i = 0; i < width; ++i)
+            buf[i] = pixels[0];
+        for(int i = 0; i < rows; ++i)
+            buf[width+i] = pixels[i*stride];
+        for(int i = 0; i < width; ++i)
+            buf[rows+width+i] = pixels[(rows-1)*stride];
+        ConvBuffer(buf,kernel,rows,ksize);
+
+        for(int i = 0; i < rows; ++i)
+            pixels[i*stride] = buf[i];
+    }
+}
+
+void ConvBuffer(float* buf, float* kernel, int bufsize, int ksize)
+{
+    for(int i = 0; i < bufsize; ++i) {
+        float faccum = 0;
+        for(int j = 0; j < ksize; ++j)
+            faccum += buf[i+j]*kernel[j];
+        buf[i] = (float)faccum;
+    }
+}
+
+#ifdef __SSE__
+
+typedef vector<float*> LISTBUF;
+static LISTBUF  s_listconvbuf; //TODO, make 16 byte aligned
+static int s_convbufsize = 0; // the size of all the buffers in s_listconvbuf
+static int SIFT_ALIGNED16(s_convmask[4]) = {0xffffffff,0xffffffff,0xffffffff,0};
+
+struct myaccum { float SIFT_ALIGNED16(faccum[2][4]); };
+
+void ConvHorizontalFast(Image imgdst, Image image, float* kernel, int ksize)
+{
+    int rows = image->rows, cols = image->cols, stride = image->stride;
+    assert( ksize >= 3 && cols >= 3 ); // 3 is cutting it close
+
+#ifdef ALIGNED_IMAGE_ROWS
+    assert( !(image->stride&3) );
+#endif
+    
+    DVSTARTPROFILE();
+    
+    int width = (ksize >= 0 ? ksize : ksize-1)>>1;
+    float* _pixels = image->pixels, *_pdst = imgdst->pixels;
+
+    int convsize = max(100000,4*(cols + ksize)+36);
+
+    if( s_listconvbuf.size() == 0 || s_convbufsize < convsize ) {
+        for(LISTBUF::iterator it = s_listconvbuf.begin(); it != s_listconvbuf.end(); ++it)
+            sift_aligned_free(*it);
+        s_listconvbuf.clear();
+        
+        // create at least one
+        s_listconvbuf.push_back((float*)sift_aligned_malloc(convsize,16));
+        s_convbufsize = convsize;
+    }
+#ifdef _OPENMP
+    else {
+        for(LISTBUF::iterator it = s_listconvbuf.begin(); it != s_listconvbuf.end(); ++it)
+            memset(*it+cols+ksize+1,0,32);
+    }
+#endif
+
+#ifdef _OPENMP
+    for(int i = s_listconvbuf.size(); i < omp_get_max_threads(); ++i) {
+        s_listconvbuf.push_back((float*)sift_aligned_malloc(convsize,16));
+        memset(s_listconvbuf.back()+cols+ksize+1,0,32);
+    }
+#else
+    float* pconvbuf = s_listconvbuf.back();
+    memset(pconvbuf+cols+ksize+1,0,32);
+#endif
+
+    #pragma omp parallel for schedule(dynamic,16)
+    for(int i = 0; i < rows; i++) {
+    
+#ifdef _OPENMP
+        float* pconvbuf;
+        
+        // need to get a free buffer
+        #pragma omp critical
+        {
+            if( s_listconvbuf.size() == 0 ) {
+                // for some reason, crashes if this is ever executed....
+                pconvbuf = (float*)sift_aligned_malloc(convsize,16);
+                memset(pconvbuf+cols+ksize+1,0,32);
+            }
+            else {
+                pconvbuf = s_listconvbuf.back();
+                s_listconvbuf.pop_back();
+            }
+        }
+#endif
+        // get 16 byte aligned array
+        myaccum ac;
+        
+        float* pixels = _pixels+i*stride;
+        float* pdst = _pdst + i*stride;
+        
+        float* buf = pconvbuf+1;
+        float f0 = pixels[0], f0e = pixels[cols-1];
+        for(int j = 0; j < width; ++j)
+            buf[j] = f0;
+        memcpy(buf+width,pixels,cols*sizeof(float));
+        for(int j = 0; j < width; ++j)
+            buf[cols+width+j] = f0e;
+        
+        __m128 mkerbase = _mm_and_ps(_mm_loadu_ps(kernel), _mm_load_ps((float*)s_convmask));
+        
+        for(int j = 0; j < 2*(cols>>2); ++j) {
+            int off = 2*j-(j&1);
+            buf = pconvbuf+1+off;
+            __m128 maccum0 = _mm_mul_ps(_mm_loadu_ps(buf), mkerbase);
+            __m128 maccum1 = _mm_mul_ps(_mm_loadu_ps(buf+2), mkerbase);
+            
+            __m128 mbufprev = _mm_loadu_ps(buf+3);
+            for(int k = 3; k < ksize; k += 8) {
+                __m128 mbuf0 = mbufprev;
+                __m128 mker0 = _mm_load_ps(kernel+k);
+                __m128 mbuf1 = _mm_loadu_ps(buf+k+4);
+                __m128 mker1 = _mm_load_ps(kernel+k+4);
+
+                // accumulate the first row
+                maccum0 = _mm_add_ps(maccum0,_mm_mul_ps(mbuf0,mker0));
+                maccum0 = _mm_add_ps(maccum0,_mm_mul_ps(mbuf1,mker1));
+
+                mbufprev = _mm_loadu_ps(buf+k+8); // load new
+                mbuf0 = _mm_shuffle_ps(mbufprev,mbuf0,0xe4); // remove first 2 elts
+
+                // arrange kernel to match mbuf0 and mbuf1 for second row
+                __m128 mkertemp = mker0;
+                mker0 = _mm_shuffle_ps(mker1,mker0,0x4e);
+                mker1 = _mm_shuffle_ps(mkertemp,mker1,0x4e);
+
+                // accumulate the second row
+                maccum1 = _mm_add_ps(maccum1,_mm_mul_ps(mbuf0,mker0));
+                maccum1 = _mm_add_ps(maccum1,_mm_mul_ps(mbuf1,mker1));
+            }
+
+#ifdef __SSE3__
+            maccum0 = _mm_hadd_ps(maccum0,maccum1);
+            maccum0 = _mm_hadd_ps(maccum0,maccum0);
+            _mm_storel_pi((__m64*)ac.faccum[0],maccum0);
+            pdst[off] = ac.faccum[0][0];
+            pdst[off+2] = ac.faccum[0][1];
+#else
+            _mm_store_ps(ac.faccum[0],maccum0);
+            _mm_store_ps(ac.faccum[1],maccum1);
+            pdst[off] = ac.faccum[0][0]+ac.faccum[0][1]+ac.faccum[0][2]+ac.faccum[0][3];
+            pdst[off+2] = ac.faccum[1][0]+ac.faccum[1][1]+ac.faccum[1][2]+ac.faccum[1][3];
+#endif
+        }
+
+        // take care of the left over columns
+        for(int j=(cols&~3); j < cols; ++j) {
+            buf = pconvbuf+j+1;
+            __m128 maccum0 = _mm_mul_ps(_mm_loadu_ps(buf), mkerbase);
+                
+            for(int k = 3; k < ksize; k += 4) {
+                __m128 mbuf0 = _mm_loadu_ps(buf+k);
+                __m128 mker0 = _mm_load_ps(kernel+k);
+                maccum0 = _mm_add_ps(maccum0,_mm_mul_ps(mbuf0,mker0));
+            }
+
+#ifdef __SSE3__
+            maccum0 = _mm_hadd_ps(maccum0,maccum0);
+            maccum0 = _mm_hadd_ps(maccum0,maccum0);
+            _mm_store_ss(&pdst[j],maccum0);
+#else
+            _mm_store_ps(ac.faccum[0],maccum0);
+            pdst[j] = ac.faccum[0][0]+ac.faccum[0][1]+ac.faccum[0][2]+ac.faccum[0][3];
+#endif
+        }
+
+#ifdef _OPENMP
+        #pragma omp critical
+        {
+            s_listconvbuf.push_back(pconvbuf);
+        }
+#endif
+    }
+}
+
+void ConvVerticalFast(Image image, float* kernel, int ksize)
+{
+    int rows = image->rows, stride = image->stride;
+    
+    assert( ksize >= 3); // 3 is cutting it close
+#ifdef ALIGNED_IMAGE_ROWS
+    assert( !(image->stride&3) );
+#endif
+
+    DVSTARTPROFILE();
+
+    int convsize = max(100000,32*(image->rows + ksize));
+
+    if( s_listconvbuf.size() == 0 || s_convbufsize < convsize ) {
+        for(LISTBUF::iterator it = s_listconvbuf.begin(); it != s_listconvbuf.end(); ++it)
+            sift_aligned_free(*it);
+        
+        s_listconvbuf.clear();
+        
+        // create at least one
+        s_listconvbuf.push_back((float*)sift_aligned_malloc(convsize,16));
+        s_convbufsize = convsize;
+    }
+
+
+#ifdef _OPENMP
+    for(int i = s_listconvbuf.size(); i < omp_get_max_threads(); ++i) {
+        s_listconvbuf.push_back((float*)sift_aligned_malloc(convsize,16));
+    }
+#else
+    float* pconvbuf = s_listconvbuf.back();
+#endif
+
+    int width = (ksize >= 0 ? ksize : ksize-1)>>1;
+    float* _pixels = image->pixels;
+
+    #pragma omp parallel for
+    for(int j = 0; j < stride; j += 4) {
+        
+        float* pixels = _pixels+j;
+#ifndef ALIGNED_IMAGE_ROWS
+        myaccum ac;
+#endif
+        
+#ifdef _OPENMP
+        float* pconvbuf;
+        
+        // need to get a free buffer
+        #pragma omp critical
+        {
+            if( s_listconvbuf.size() == 0 ) {
+                pconvbuf = (float*)sift_aligned_malloc(convsize,16);
+            }
+            else {
+                pconvbuf = s_listconvbuf.back();
+                s_listconvbuf.pop_back();
+            }
+        }
+#endif
+        
+        __m128 mpprev = _MM_LOAD_ALIGNED(pixels);
+        
+        __m128 mprev = mpprev;
+        __m128 mker0 = _mm_load1_ps(kernel);
+        __m128 mker1 = _mm_load1_ps(kernel+1);
+        __m128 mker2 = _mm_load1_ps(kernel+2);
+
+        float* buf = pconvbuf;
+        for(int i = 2; i <= width; ++i) {
+            _mm_store_ps(buf,mpprev);
+            __m128 maccum = _mm_add_ps(_mm_mul_ps(mpprev,mker0), _mm_mul_ps(mprev,mker1));
+            maccum = _mm_add_ps(maccum,_mm_mul_ps(mprev,mker2));
+            _mm_store_ps(buf+4,maccum);
+            buf += 8;
+        }
+        for(int i = 1; i < rows-width+2; ++i) {
+            __m128 mnew = _MM_LOAD_ALIGNED(pixels+i*stride);
+
+            _mm_store_ps(buf,mpprev);
+            __m128 maccum = _mm_add_ps(_mm_mul_ps(mpprev,mker0), _mm_mul_ps(mprev,mker1));
+            maccum = _mm_add_ps(maccum,_mm_mul_ps(mnew,mker2));
+            mpprev = mprev;
+            _mm_store_ps(buf+4,maccum);
+            mprev = mnew;
+            buf += 8;
+        }
+        
+        _mm_store_ps(buf,mpprev); buf += 8;
+        for(int i = rows-width+2; i < rows; ++i) {
+            __m128 mnew = _mm_loadu_ps(pixels+i*stride);
+            _mm_store_ps(buf,mprev);
+            mprev = mnew;
+            buf += 8;
+        }
+
+        // mprev points to the last row
+        for(int i = 0; i < width; ++i) {
+            _mm_store_ps(buf,mprev);
+            buf += 8;
+        }
+
+        //// finally convolve
+        buf = pconvbuf;
+        for(int i = 0; i < rows; ++i, buf += 8) {
+            __m128 maccum = _mm_load_ps(buf+4);
+            if( ksize > 3 ) {
+                for(int j = 3; j < ksize; j += 4) {
+                    float* psrc = buf + 8*j;
+                    __m128 mkerall = _mm_load_ps(kernel+j);
+                    __m128 mnew0 = _mm_load_ps(psrc);
+                    mker0 = _mm_shuffle_ps(mkerall,mkerall,0);
+                    __m128 mnew1 = _mm_load_ps(psrc + 8);
+                    mker1 = _mm_shuffle_ps(mkerall,mkerall,0x55);
+                    __m128 mnew2 = _mm_load_ps(psrc + 16);
+                    maccum = _mm_add_ps(maccum,_mm_mul_ps(mker0,mnew0));
+                    maccum = _mm_add_ps(maccum,_mm_mul_ps(mker1,mnew1));
+                    __m128 mnew3 = _mm_load_ps(psrc + 24);
+                    mker2 = _mm_shuffle_ps(mkerall,mkerall,0xaa);
+                    maccum = _mm_add_ps(maccum,_mm_mul_ps(mker2,mnew2));
+                    maccum = _mm_add_ps(maccum,_mm_mul_ps(_mm_shuffle_ps(mkerall,mkerall,0xff),mnew3));
+                }
+            }
+
+#ifdef ALIGNED_IMAGE_ROWS
+            _mm_store_ps(pixels+i*stride,maccum);
+#else
+            if( j <= stride-4 )
+                _mm_storeu_ps(pixels+i*stride,maccum);
+            else {
+                _mm_store_ps(ac.faccum[0],maccum);
+                for(int k = 0; k < ((stride-j)&3); ++k)
+                    pixels[i*stride+k] = ac.faccum[0][k];
+            }
+#endif
+        }
+
+#ifdef _OPENMP
+        #pragma omp critical
+        {
+            s_listconvbuf.push_back(pconvbuf);
+        }
+#endif
+    }
+}
+
+#endif
+
+Keypoint FindMaxMin(Image* imdiff, Image* imgaus, float fscale, Keypoint keypts,unsigned int nbScales)
+{
+    DVSTARTPROFILE();
+
+    int rows = imdiff[0]->rows, cols = imdiff[0]->cols, stride = imdiff[0]->stride;
+    memset(s_MaxMinArray,0,rows*cols);
+
+    for( int index = 1; index < (int)nbScales+1; ++index) {
+        
+#if !defined(_MSC_VER) && defined(__SSE__)
+        GradOriImagesFast(imgaus[index],s_imgrad,s_imorient);
+#else
+        GradOriImages(imgaus[index],s_imgrad,s_imorient);
+#endif
+        assert( imdiff[index]->stride == stride );
+        float* _diffpixels = imdiff[index]->pixels;
+        
+//        for(int i = 0; i < rows; ++i) {
+//            for(int j = 0; j < cols; ++j) {
+//                if( isnan(imgaus[index]->pixels[i*cols+j]) ) {
+//                    fprintf(stderr, "gaus: %d %d %d %d %d %f %f\n", index,i,j,rows,cols,s_imgrad->pixels[i*cols+j],s_imorient->pixels[i*cols+j]);
+//                    //exit(0);
+//                }
+////                if( isnan(s_imorient->pixels[i*cols+j]) ) {
+////                    //GradOriImagesFast(imgaus[index],s_imgrad,s_imorient);
+////                    fprintf(stderr,"rc %d %d\n",rows,cols);
+////                    fprintf(stderr,"wtf %d %d %f %f %f %f\n",i,j, s_imgrad->pixels[i*cols+j], imgaus[index]->pixels[i*cols+j], imgaus[index]->pixels[i*cols+j-1], imgaus[index]->pixels[i*cols+j+1]);
+////                    fprintf(stderr,"%f %f\n",imgaus[index]->pixels[(i-1)*cols+j], imgaus[index]->pixels[(i+1)*cols+j]);
+////                    exit(0);
+////                }
+//            }
+//        }
+        
+        #pragma omp parallel for schedule(dynamic,8)
+        for( int rowstart = 5; rowstart < rows-5; ++rowstart ) {
+            Keypoint newkeypts = NULL;
+            float* diffpixels = _diffpixels + rowstart*stride;
+            for( int colstart = 5; colstart < cols-5; ++colstart ) {
+                   
+                float fval = diffpixels[colstart];
+                if( fabsf(fval) > PeakThresh*0.8f ) {
+                    if( LocalMaxMin(fval, imdiff[index],rowstart,colstart) &&
+                        LocalMaxMin(fval, imdiff[index-1],rowstart,colstart) &&
+                        LocalMaxMin(fval, imdiff[index+1],rowstart,colstart) &&
+                        NotOnEdge(imdiff[index],rowstart,colstart) ) {
+                        newkeypts = InterpKeyPoint(imdiff,index,rowstart,colstart,s_imgrad,s_imorient,s_MaxMinArray,fscale,newkeypts,5,nbScales);
+                    }
+                }
+            }
+
+            if( newkeypts != NULL ) {
+                // find the last keypoint
+                Keypoint lastkeypt = newkeypts;
+                while(lastkeypt->next)
+                    lastkeypt = lastkeypt->next;;
+            
+                #pragma omp critical
+                {
+                    lastkeypt->next = keypts;
+                    keypts = newkeypts;
+                }
+            }
+        }
+    }
+
+    return keypts;
+}
+
+void GradOriImages(Image image, Image imgrad, Image imorient)
+{
+    DVSTARTPROFILE();
+    
+    int rows = image->rows, cols = image->cols, stride = image->stride;
+    float* _pixels = image->pixels, *_pfgrad = imgrad->pixels, *_pforient = imorient->pixels;
+    float fdiffc, fdiffr;
+
+    #pragma omp parallel for schedule(dynamic,16) // might crash Matlab mex files
+    for(int i = 0; i < rows; ++i) {
+        float* pixels = _pixels + i*stride;
+        float* pfgrad = _pfgrad + i*stride;
+        float* pforient = _pforient + i*stride;
+
+        for(int j = 0; j < cols; ++j) {
+            if( j == 0 )
+                fdiffc = 2.0f*(pixels[1]-pixels[0]);
+            else if( j == cols-1 )
+                fdiffc = 2.0f*(pixels[j]-pixels[j-1]);
+            else
+                fdiffc = pixels[j+1] - pixels[j-1];
+
+            if( i == 0 )
+                fdiffr = 2.0f*(pixels[j] - pixels[stride+j]);
+            else if( i == rows-1 )
+                fdiffr = 2.0f*(pixels[-stride+j] - pixels[j]);
+            else
+                fdiffr = pixels[-stride+j] - pixels[stride+j];
+
+            pfgrad[j] = sqrtf(fdiffc*fdiffc + fdiffr*fdiffr);
+            pforient[j] = atan2f(fdiffr,fdiffc);
+        }
+    }
+}
+
+#if !defined(_MSC_VER) && defined(__SSE__)
+void GradOriImagesFast(Image image, Image imgrad, Image imorient)
+{
+    DVSTARTPROFILE();
+    
+    int rows = image->rows, cols = image->cols, stride = image->stride;
+    float* _pixels = image->pixels, *_pfgrad = imgrad->pixels, *_pforient = imorient->pixels;
+    int endcol = ((cols-1)&~3);
+    
+    {
+        // first row is special 2*(_pixels[0]-_pixels[stride])
+        float fdiffc, fdiffr;
+
+        // first and last elt is 2*([1]-[0]), have to improvise for sse
+        __m128 mprevj = _mm_set_ps(_pixels[2],_pixels[1],_pixels[0],2.0f*_pixels[0]-_pixels[1]);
+        
+        for(int j = 0; j < endcol; j += 4) {
+            float* pf = _pixels+j;
+            __m128 mnewj = _mm_loadu_ps(pf+3);
+            __m128 mgradr = _MM_LOAD_ALIGNED(pf);
+            __m128 mgradc = _mm_sub_ps(_mm_shuffle_ps(mprevj,mnewj,0x4e),mprevj);
+            mgradr = _mm_sub_ps(mgradr, _MM_LOAD_ALIGNED(pf+stride));
+            mgradr = _mm_add_ps(mgradr,mgradr);
+            
+            __m128 mrad = _mm_sqrt_ps(_mm_add_ps(_mm_mul_ps(mgradr,mgradr),_mm_mul_ps(mgradc,mgradc)));
+            __m128 morient = atan2f4(mgradr,mgradc);
+            
+            _MM_STORE_ALIGNED(_pfgrad+j,mrad);
+            mprevj = mnewj;
+            _MM_STORE_ALIGNED(_pforient+j,morient);
+        }
+
+        
+        // compute the rest the old way
+        for(int j = endcol; j < cols; ++j) {
+            if( j == 0 )
+                fdiffc = 2.0f*(_pixels[1]-_pixels[0]);
+            else if( j == cols-1 )
+                fdiffc = 2.0f*(_pixels[j]-_pixels[j-1]);
+            else
+                fdiffc = _pixels[j+1] - _pixels[j-1];
+
+            fdiffr = 2.0f*(_pixels[j] - _pixels[stride+j]);
+
+            _pfgrad[j] = sqrtf(fdiffc*fdiffc + fdiffr*fdiffr);
+            _pforient[j] = atan2f(fdiffr,fdiffc);
+        }
+    }
+
+    #pragma omp parallel for schedule(dynamic,16) // might crash Matlab mex files
+    for(int i = 1; i < rows-1; ++i) {
+
+        float fdiffc, fdiffr;
+        float* pixels = _pixels + i*stride;
+        float* pfgrad = _pfgrad + i*stride;
+        float* pforient = _pforient + i*stride;
+
+        // first and last elt is 2*([1]-[0]), have to improvise for sse
+        __m128 mprevj = _mm_set_ps(pixels[2],pixels[1],pixels[0],2.0f*pixels[0]-pixels[1]);
+
+        for(int j = 0; j < endcol; j += 4) {
+            float* pf = pixels+j;
+            __m128 mnewj = _mm_loadu_ps(pf+3);
+            __m128 mgradr = _MM_LOAD_ALIGNED(pf-stride);
+            __m128 mgradc = _mm_sub_ps(_mm_shuffle_ps(mprevj,mnewj,0x4e),mprevj);
+            mgradr = _mm_sub_ps(mgradr,_MM_LOAD_ALIGNED(pf+stride));
+            
+            __m128 mrad = _mm_sqrt_ps(_mm_add_ps(_mm_mul_ps(mgradr,mgradr),_mm_mul_ps(mgradc,mgradc)));
+            __m128 morient = atan2f4(mgradr,mgradc);
+            
+            _MM_STORE_ALIGNED(pfgrad+j,mrad);
+            mprevj = mnewj;
+            _MM_STORE_ALIGNED(pforient+j,morient);
+        }
+
+        assert( i != 0 && i != rows-1 );
+        // compute the rest the old way
+        for(int j = endcol; j < cols; ++j) {
+            if( j == cols-1 )
+                fdiffc = 2.0f*(pixels[j]-pixels[j-1]);
+            else
+                fdiffc = pixels[j+1] - pixels[j-1];
+
+            fdiffr = pixels[-stride+j] - pixels[stride+j];
+
+            pfgrad[j] = sqrtf(fdiffc*fdiffc + fdiffr*fdiffr);
+            pforient[j] = atan2f(fdiffr,fdiffc);
+        }
+    }
+
+    {
+        float fdiffc, fdiffr;
+        float* pixels = _pixels + (rows-1)*stride;
+        float* pfgrad = _pfgrad + (rows-1)*stride;
+        float* pforient = _pforient + (rows-1)*stride;
+
+        // last row is special 2*(pixels[stride*(cols-1)]-pixels[stride*(cols-2)])
+        // first and last elt is 2*([1]-[0]), have to improvise for sse
+        __m128 mprevj = _mm_set_ps(pixels[2],pixels[1],pixels[0],2.0f*pixels[0]-pixels[1]);
+
+        for(int j = 0; j < endcol; j += 4) {
+            float* pf = pixels+j;
+            __m128 mnewj = _mm_loadu_ps(pf+3);
+            __m128 mgradr = _MM_LOAD_ALIGNED(pf-stride);
+            __m128 mgradc = _mm_sub_ps(_mm_shuffle_ps(mprevj,mnewj,0x4e),mprevj);
+            mgradr = _mm_sub_ps(mgradr,_MM_LOAD_ALIGNED(pf));
+            mgradr = _mm_add_ps(mgradr,mgradr);
+            
+            __m128 mrad = _mm_sqrt_ps(_mm_add_ps(_mm_mul_ps(mgradr,mgradr),_mm_mul_ps(mgradc,mgradc)));
+            __m128 morient = atan2f4(mgradr,mgradc);
+            
+            _MM_STORE_ALIGNED(pfgrad+j,mrad);
+            mprevj = mnewj;
+            _MM_STORE_ALIGNED(pforient+j,morient);
+        }
+
+        // compute the rest the old way
+        for(int j = endcol; j < cols; ++j) {
+            if( j == cols-1 )
+                fdiffc = 2.0f*(pixels[j]-pixels[j-1]);
+            else
+                fdiffc = pixels[j+1] - pixels[j-1];
+
+            fdiffr = 2.0f*(pixels[-stride+j] - pixels[j]);
+
+            pfgrad[j] = sqrtf(fdiffc*fdiffc + fdiffr*fdiffr);
+            pforient[j] = atan2f(fdiffr,fdiffc);
+        }
+    }
+}
+#endif
+
+int LocalMaxMin(float fval, Image imdiff, int rowstart, int colstart)
+{
+    int stride = imdiff->stride;
+    float* pixels = imdiff->pixels;
+
+    if( fval > 0 ) {
+        for( int row = rowstart-1; row <= rowstart+1; ++row) {
+            float* pf = pixels + row*stride + colstart - 1;
+            if( pf[0] > fval || pf[1] > fval || pf[2] > fval )
+                return 0;
+        }
+    }
+    else {
+        for( int row = rowstart-1; row <= rowstart+1; ++row) {
+            float* pf = pixels + row*stride + colstart-1;
+            if( fval > pf[0] || fval > pf[1] || fval > pf[2] )
+                return 0;
+        }
+    }
+
+    return 1;
+}
+
+int NotOnEdge(Image imdiff, int row, int col)
+{
+    // probably wrong, something to do with plane normals?
+    int stride = imdiff->stride;
+    float* pixels = imdiff->pixels + row*stride;
+    float f1 = pixels[-stride+col] - pixels[col]*2 + pixels[stride+col];
+    float f2 = pixels[col-1] - pixels[col]*2 + pixels[col+1];
+    float f3 = pixels[stride+col+1] - pixels[stride+col-1];
+    float f4 = pixels[-stride+col+1] - pixels[-stride+col-1];
+    float f5 = (f3 - f4)*0.25f;
+    float f6 = f1*f2 - f5*f5;
+    float f8 = f1+f2;
+    return f6*11*11 > f8*f8*10;
+}
+
+Keypoint InterpKeyPoint(Image* imdiff, int index, int rowstart, int colstart,
+                        Image imgrad, Image imorient, char* pMaxMinArray, float fscale,Keypoint keypts, int steps, unsigned int nbScales)
+{
+    float X[3];
+    float fquadvalue = FitQuadratic(X, imdiff, index, rowstart, colstart);
+
+    int newrow = rowstart;
+    int newcol = colstart;
+    if( X[1] > 0.6f && rowstart < imdiff[0]->rows-3 )
+        newrow++;
+    if( X[1] < -0.6f && rowstart>3 )
+        newrow--;
+    if( X[2] > 0.6f && colstart < imdiff[0]->cols-3 )
+        newcol++;
+    if( X[2] < -0.6f && colstart>3 )
+        newcol--;
+
+    // check if local min/max is stable at (newrow,newcol). If not and steps haven't surprassed,
+    // recompute at new location
+    if( steps > 0 && (newrow != rowstart || newcol != colstart) )
+        return InterpKeyPoint(imdiff,index,newrow,newcol,imgrad,imorient,pMaxMinArray,fscale,keypts,steps-1,nbScales);
+
+    if(fabsf(X[0]) <= 1.5f && fabsf(X[1]) <= 1.5f && fabsf(X[2]) <= 1.5f && fabsf(fquadvalue) >= PeakThresh ) {
+        
+        char* pmaxmin = pMaxMinArray + rowstart*imgrad->cols+colstart;
+        bool bgetkeypts = false;
+        #pragma omp critical
+        {
+            if( !pmaxmin[0] ) {
+                bgetkeypts = true;
+                pmaxmin[0] = 1;
+            }
+        }
+        
+        if( bgetkeypts ) {
+            float fSize = InitSigma * powf(2.0f,((float)index + X[0])/(float)nbScales);
+            return AssignOriHist(imgrad,imorient,fscale,fSize,(float)rowstart+X[1],(float)colstart+X[2],keypts);
+        }
+    }
+
+    return keypts;
+}
+
+// fits a quadratic to a 3x3x3 box, returns the value of the quadratic at the center
+float FitQuadratic(float* X, Image* imdiff, int index, int r, int c)
+{
+    float H[9];
+    int stride = imdiff[index-1]->stride;
+    assert( stride == imdiff[index]->stride && stride == imdiff[index+1]->stride );
+    float* pixels0 = imdiff[index-1]->pixels + r*stride;
+    float* pixels1 = imdiff[index]->pixels + r*stride;
+    float* pixels2 = imdiff[index+1]->pixels + r*stride;
+
+    float Y[3];
+    Y[0] = 0.5f*(pixels2[c] - pixels0[c]);
+    Y[1] = 0.5f*(pixels1[stride+c] - pixels1[-stride+c]);
+    Y[2] = 0.5f*(pixels1[c+1] - pixels1[c-1]);
+    H[0] = pixels0[c] - 2.0f*pixels1[c] + pixels2[c];
+    H[4] = pixels1[-stride+c] - 2.0f*pixels1[c] + pixels1[stride+c];
+    H[8] = pixels1[c-1] - 2.0f*pixels1[c] + pixels1[c+1];
+    H[3] = H[1] = 0.25f*((pixels2[stride+c] - pixels2[-stride+c]) - (pixels0[stride+c] - pixels0[-stride+c]));
+    H[6] = H[2] = 0.25f*((pixels2[c+1] - pixels2[c-1]) - (pixels0[c+1] - pixels0[c-1]));
+    H[7] = H[5] = 0.25f*((pixels1[stride+c+1] - pixels1[stride+c-1]) - (pixels1[-stride+c+1] - pixels1[-stride+c-1]));
+
+    X[0] = -Y[0]; X[1] = -Y[1]; X[2] = -Y[2];
+    SolveLinearSystem(X,H,3); // writes answer to X?
+    return pixels1[c] + 0.5f * (X[0]*Y[0] + X[1]*Y[1] + X[2]*Y[2]);
+}
+
+// solve for X in H*X = Y using Gauss-Jordan method
+// write the result back in Y
+void SolveLinearSystem(float* Y, float* H, int dim)
+{
+    float fmax;
+    int bestj = 0;
+
+    for(int i = 0; i < dim-1; ++i) {
+
+        fmax = -1;
+        for(int j = i; j < dim; ++j) {
+            //if( i < dim ) {
+            float f = H[j*dim+i];
+            if( f < 0 )
+                f = -f;
+            if( f > fmax ) {
+                fmax = f;
+                bestj = j;
+            }
+        }
+
+        if( bestj != i ) {
+            for(int j = 0; j < dim; ++j)
+                swap(H[bestj*dim+j], H[i*dim+j]);
+            swap(Y[bestj],Y[i]);
+        }
+
+        for(int j = i+1; j < dim; ++j) {
+            float f = H[j*dim+i]/H[i*dim+i];
+            for(int k = i; k < dim; ++k)
+                H[j*dim+k] -= f*H[i*dim+k];
+            Y[j] -= Y[i]*f;
+        }
+    }
+    
+    // extract solution
+    for(int i = dim-1; i >= 0; --i) {
+        for(int j = dim-1; j > i; --j)
+            Y[i] -= Y[j]*H[i*dim+j];
+        Y[i] /= H[i*dim+i];
+    }
+}
+
+Keypoint AssignOriHist(Image imgrad, Image imorient, float fscale, float fSize,
+                       float frowstart, float fcolstart, Keypoint keypts)
+{
+    int rowstart = (int)(frowstart+0.5f);
+    int colstart = (int)(fcolstart+0.5f);
+    int rows = imgrad->rows, cols = imgrad->cols, stride = imgrad->stride;
+    float SIFT_ALIGNED16(hists[36]);
+    float fexpmult = -1.0f / (2.0f*1.5f*1.5f*fSize*fSize);
+    memset(hists,0,sizeof(hists));
+
+    const float fbinmult = 36.0f/(2*PI);
+    const float fbinadd = (float)(PI+0.001f)*fbinmult;
+
+    // gather votes for orientation
+    int windowsize = (int)(fSize*1.5f*3.0f);
+    float* pfgrad = imgrad->pixels + (rowstart-windowsize)*stride, *pforient = imorient->pixels;
+    for( int rowcur = rowstart-windowsize; rowcur <= rowstart+windowsize; ++rowcur, pfgrad += stride ) {
+
+        if( rowcur < 0 || rowcur >= rows-2 )
+            continue;
+
+        for( int colcur = colstart-windowsize; colcur <= colstart+windowsize; ++colcur ) {
+            
+            if( colcur < 0 || colcur >= cols-2 )
+                continue;
+
+            float fdx = pfgrad[colcur];
+            if( fdx > 0 ) {
+                float fdrow = (float)rowcur-frowstart, fdcol = (float)colcur-fcolstart;
+                float fradius2 = fdrow*fdrow+fdcol*fdcol;
+            
+                if( (float)(windowsize*windowsize) + 0.5f > fradius2 ) {
+                    float fweight = expf(fradius2*fexpmult);
+                    int binindex = (int)(pforient[rowcur*stride+colcur]*fbinmult+fbinadd);
+                    
+                    // there is a bug in pforient where it could be 2*PI sometimes
+                    if( binindex > 36 ) {
+                        //if( binindex != 54 )
+                            fprintf(stderr,"bin %d\n",binindex);
+                        binindex = 0;
+                    }
+                        
+                    assert( binindex >= 0 && binindex <= 36 );
+                    if( binindex == 36 )
+                        binindex = 35;
+                
+                    hists[binindex] += fdx*fweight;
+                }
+            }
+        }
+    }
+
+    // pick an orientation with the highest votes
+    for(int i = 0; i < 6; ++i)
+        SmoothHistogram(hists,36);
+    
+#ifdef __SSE__
+    float SIFT_ALIGNED16(fmaxval);
+    __m128 m0 = _mm_load_ps(&hists[0]);
+    __m128 m1 = _mm_load_ps(&hists[4]);
+    __m128 m2 = _mm_load_ps(&hists[8]);
+    __m128 m3 = _mm_load_ps(&hists[12]);
+    m0 = _mm_max_ps(m0,m1);
+    m2 = _mm_max_ps(m2,m3);
+    __m128 m4 = _mm_load_ps(&hists[16]);
+    __m128 m5 = _mm_load_ps(&hists[20]);
+    __m128 m6 = _mm_load_ps(&hists[24]);
+    __m128 m7 = _mm_load_ps(&hists[28]);
+    m4 = _mm_max_ps(m4,m5);
+    m6 = _mm_max_ps(m6,m7);
+    m0 = _mm_max_ps(m0,_mm_load_ps(&hists[32]));
+    m2 = _mm_max_ps(m2,_mm_max_ps(m4,m6));
+    m0 = _mm_max_ps(m0,m2);
+    m0 = _mm_max_ps(m0, _mm_shuffle_ps(m0,m0,0x4e));
+    _mm_store_ss(&fmaxval,_mm_max_ps(m0, _mm_shuffle_ps(m0,m0,0x11)));
+#else
+    float fmaxval = 0;
+    for(int i = 0; i < 36; ++i) {
+        if( hists[i] > fmaxval )
+            fmaxval = hists[i];
+    }
+#endif
+    
+    fmaxval *= 0.8f;
+    const float foriadd = 0.5f*2*PI/36.0f - PI, forimult = 2*PI/36.0f;
+    
+    int previndex = 35;
+    for(int index = 0; index < 36; ++index) {
+        if( index != 0 )
+            previndex = index-1;
+    
+        int nextindex = 0;
+        if( index != 35 )
+            nextindex = index+1;
+    
+        if( hists[index] <= hists[previndex] || hists[index] <= hists[nextindex] || hists[index] < fmaxval )
+            continue;
+
+        float fpeak = InterpPeak(hists[previndex],hists[index],hists[nextindex]);
+        float forient = (index + fpeak)*forimult + foriadd;
+        assert( forient >= -PI && forient <= PI ); // error, bad orientation
+
+        keypts = MakeKeypoint(imgrad,imorient,fscale,fSize,frowstart,fcolstart,forient,keypts);
+    }
+    
+    return keypts;
+}
+
+float InterpPeak(float f0, float f1, float f2)
+{
+    if( f1 < 0 ) {
+        f0 = -f0;
+        f1 = -f1;
+        f2 = -f2;
+    }
+    assert( f1 >= f0 && f1 > f2 );
+    return 0.5f*(f0 - f2) / (f0 - 2.0f*f1 + f2);
+}
+
+void SmoothHistogram(float* phist, int numbins)
+{
+    float ffirst = phist[0];
+    float fprev = phist[numbins-1];
+    for(int i = 0; i < numbins-1; ++i) {
+        float forg = phist[i];
+        phist[i] = (fprev + forg + phist[i+1])*0.33333333f;
+        fprev = forg;
+    }
+
+    // TODO: replace phist[0] with ffirst
+    phist[numbins-1] = (fprev+phist[numbins-1]+ffirst)*0.3333333f;
+}
+
+Keypoint MakeKeypoint(Image imgrad, Image imorient, float fscale, float fSize,
+                      float frowstart,float fcolstart,float forient, Keypoint keypts)
+{
+    Keypoint pnewkeypt;
+    #pragma omp critical
+    {
+        if( s_listKeypoints.size() > 0 ) {
+            pnewkeypt = s_listKeypoints.back();
+            s_listKeypoints.pop_back();
+        }
+        else
+            pnewkeypt = (Keypoint)sift_aligned_malloc(sizeof(KeypointSt),16);
+    }
+
+    pnewkeypt->next = keypts;
+    pnewkeypt->ori = forient;
+    pnewkeypt->row = fscale*frowstart;
+    pnewkeypt->col = fscale*fcolstart;
+    pnewkeypt->scale = fscale*fSize;
+    MakeKeypointSample(pnewkeypt,imgrad,imorient,fSize,frowstart,fcolstart);
+    
+    return pnewkeypt;
+}
+
+void MakeKeypointSample(Keypoint pkeypt, Image imgrad, Image imorient,
+                        float fSize, float frowstart, float fcolstart)
+{
+    float* fdesc = pkeypt->descrip;
+    memset(fdesc,0,sizeof(float)*128);
+    KeySample(fdesc, pkeypt, imgrad, imorient, fSize, frowstart, fcolstart);
+
+#ifdef __SSE__
+    __m128 maccum0 = _mm_load_ps(fdesc);
+    __m128 maccum1 = _mm_load_ps(fdesc+4);
+    maccum0 = _mm_mul_ps(maccum0,maccum0);
+    maccum1 = _mm_mul_ps(maccum1,maccum1);
+    for(int i = 8; i < 128; i += 8 ) {
+        __m128 m0 = _mm_load_ps(fdesc+i);
+        __m128 m1 = _mm_load_ps(fdesc+i+4);
+        maccum0 = _mm_add_ps(maccum0,_mm_mul_ps(m0,m0));
+        maccum1 = _mm_add_ps(maccum1,_mm_mul_ps(m1,m1));
+    }
+
+    maccum0 = _mm_add_ps(maccum0,maccum1);    
+#ifdef __SSE3__
+    maccum0 = _mm_hadd_ps(maccum0,maccum0);
+    maccum0 = _mm_hadd_ps(maccum0,maccum0);
+#else
+    maccum0 = _mm_add_ps(maccum0,_mm_shuffle_ps(maccum0,maccum0,0x4e));
+    maccum0 = _mm_add_ss(maccum0,_mm_shuffle_ps(maccum0,maccum0,0x55));
+#endif
+    
+    float fthresh;
+    float SIFT_ALIGNED16(flength2);
+    _mm_store_ss(&flength2, maccum0);
+    fthresh = 0.2f*sqrtf(flength2);
+
+    for(int i = 0; i < 128; ++i) {
+        if( fdesc[i] > fthresh ) {
+            flength2 += fthresh*fthresh-fdesc[i]*fdesc[i];
+            fdesc[i] = fthresh;
+        }
+    }
+
+    // normalizing
+    float flength = 1.0f/sqrtf(flength2);
+    maccum0 = _mm_load1_ps(&flength);
+    for(int i = 0; i < 128; i += 16 ) {
+        __m128 m0 = _mm_load_ps(fdesc+i);
+        __m128 m1 = _mm_load_ps(fdesc+i+4);
+        __m128 m2 = _mm_load_ps(fdesc+i+8);
+        __m128 m3 = _mm_load_ps(fdesc+i+12);
+        _mm_store_ps(fdesc+i,_mm_mul_ps(m0,maccum0));
+        _mm_store_ps(fdesc+i+4,_mm_mul_ps(m1,maccum0));
+        _mm_store_ps(fdesc+i+8,_mm_mul_ps(m2,maccum0));
+        _mm_store_ps(fdesc+i+12,_mm_mul_ps(m3,maccum0));
+    }
+
+    // converting to unsigned char
+//    float flength = 512.0f/sqrtf(flength2);
+//    maccum0 = _mm_load1_ps(&flength);
+//    unsigned char* pkeydesc = pkeypt->descrip;
+//    
+//    for(int i = 0; i < 128; i += 16 ) {
+//        __m128 m0 = _mm_load_ps(fdesc+i);
+//        __m128 m1 = _mm_load_ps(fdesc+i+4);
+//        __m128 m2 = _mm_load_ps(fdesc+i+8);
+//        __m128 m3 = _mm_load_ps(fdesc+i+12);
+//        __m128i mi0 = _mm_cvttps_epi32(_mm_mul_ps(m0,maccum0));
+//        __m128i mi1 = _mm_cvttps_epi32(_mm_mul_ps(m1,maccum0));
+//        __m128i mi2 = _mm_cvttps_epi32(_mm_mul_ps(m2,maccum0));
+//        __m128i mi3 = _mm_cvttps_epi32(_mm_mul_ps(m3,maccum0));
+//        _mm_store_si128((__m128i*)(pkeydesc+i), _mm_packus_epi16(_mm_packs_epi32(mi0,mi1),_mm_packs_epi32(mi2,mi3)));
+//    }
+#else
+    NormalizeVec(fdesc,128);
+    
+    bool brenormalize = false;
+    for(int i = 0; i < 128; ++i) {
+        if( fdesc[i] > 0.2f ) {
+            fdesc[i] = 0.2f;
+            brenormalize = true;
+        }
+    }
+    
+    if( brenormalize )
+        NormalizeVec(fdesc,128);
+#endif
+}
+
+void NormalizeVec(float* pf, int num)
+{
+    assert( (num&3) == 0 );
+    float faccum = 0;
+    for(int i = 0; i < num; ++i)
+        faccum += pf[i]*pf[i];
+    faccum = 1/sqrtf(faccum);
+    for(int i = 0; i < num; ++i)
+        pf[i] *= faccum;
+}
+
+void KeySample(float* fdesc, Keypoint pkeypt, Image imgrad, Image imorient,
+               float fSize, float frowstart, float fcolstart)
+{
+    int rowstart = (int)(frowstart+0.5f);
+    int colstart = (int)(fcolstart+0.5f);
+    float sinang = sinf(pkeypt->ori), cosang = cosf(pkeypt->ori);
+    float fdrow = frowstart-(float)rowstart;
+    float fdcol = fcolstart-(float)colstart;
+    float frealsize = 3.0f*fSize;
+    float firealsize = 1.0f/(3.0f*fSize);
+    int windowsize = (int)(frealsize*SQRT2*5.0f*0.5f+0.5f);
+    
+    float fsr = sinang*firealsize, fcr = cosang*firealsize, fdrr = -fdrow*firealsize, fdcr = -fdcol*firealsize;
+
+    for(int row = -windowsize; row <= windowsize; ++row) {
+//#ifdef _OPENMP
+//        float SIFT_ALIGNED16(fnewdesc[128]) = {0};
+//        bool badd = false;
+//#else
+        float* fnewdesc = fdesc;
+//#endif
+        
+        float frow = (float)row;
+        float fcol = -(float)windowsize;
+        for(int col = -windowsize; col <= windowsize; ++col, fcol += 1) {
+            float rpos = fsr*fcol + fcr*frow + fdrr;
+            float cpos = fcr*fcol - fsr*frow + fdcr;
+            float rx = rpos + (2.0f - 0.5f);
+            float cx = cpos + (2.0f - 0.5f);
+            
+            if( rx > -0.9999f && rx < 3.9999f && cx > -0.9999f && cx < 3.9999f ) {
+                AddSample(fnewdesc, pkeypt, imgrad, imorient, rowstart+row, colstart+col, rpos, cpos, rx, cx);
+//#ifdef _OPENMP
+//                badd = true;
+//#endif
+            }
+        }
+
+//#ifdef _OPENMP
+//        if( badd ) {
+//            #pragma omp critical
+//            {
+//#ifdef __SSE__
+//                for(int j = 0; j < 128; j += 16) {
+//                    _mm_store_ps(&fdesc[j], _mm_add_ps(_mm_load_ps(&fdesc[j]), _mm_load_ps(&fnewdesc[j])));
+//                    _mm_store_ps(&fdesc[j+4], _mm_add_ps(_mm_load_ps(&fdesc[j+4]), _mm_load_ps(&fnewdesc[j+4])));
+//                    _mm_store_ps(&fdesc[j+8], _mm_add_ps(_mm_load_ps(&fdesc[j+8]), _mm_load_ps(&fnewdesc[j+8])));
+//                    _mm_store_ps(&fdesc[j+12], _mm_add_ps(_mm_load_ps(&fdesc[j+12]), _mm_load_ps(&fnewdesc[j+12])));
+//                }
+//#else
+//                for(int j = 0; j < 128; ++j)
+//                    fdesc[j] += fnewdesc[j];
+//#endif
+//            }
+//        }
+//#endif
+    }
+}
+
+void AddSample(float* fdesc, Keypoint pkeypt, Image imgrad, Image imorient, int r, int c,
+               float rpos, float cpos, float rx, float cx)
+{
+    int rows = imgrad->rows, cols = imgrad->cols, stride = imgrad->stride;
+    if( r < 0 || r >= rows || c < 0 || c >= cols )
+        return;
+    
+    float fgrad = imgrad->pixels[r*stride+c] * expf(-0.125f*(rpos*rpos+cpos*cpos));
+    float forient = imorient->pixels[r*stride+c] - pkeypt->ori;
+    while( forient > 2*PI )
+        forient -= 2*PI;
+    while( forient < 0 )
+        forient += 2*PI;
+
+//    if( isnan(forient) )
+//        fprintf(stderr,"%f %f (%d,%d,%d,%d)\n", imorient->pixels[r*stride+c],pkeypt->ori,r,c,rows,cols);
+
+    PlaceInIndex(fdesc, fgrad, forient, rx, cx);
+}
+
+void PlaceInIndex(float* fdesc, float mag, float ori, float rx, float cx)
+{
+    float oribin = ori*(8.0f/(2*(float)PI));
+    int newrow, newcol, neworient;
+    float rfrac, cfrac, ofrac;
+    if( rx < 0 ) {
+        newrow = (int)(rx-1);
+    }
+    else {
+        newrow = (int)rx;
+    }
+    rfrac = rx-(float)newrow;
+
+    if( cx < 0 )
+        newcol = (int)(cx-1);
+    else
+        newcol = (int)cx;
+    cfrac = cx - (float)newcol;
+
+    if( oribin < 0 )
+        neworient = (int)(oribin-1);
+    else
+        neworient = (int)oribin;
+    ofrac = oribin-(float)neworient;
+
+    assert( newrow >= -1 && newrow < 4 && neworient >= 0 && neworient <= 8 && rfrac >= 0 && rfrac < 1);
+    
+    for(int i = 0; i < 2; ++i) {
+        if( (unsigned int)(i+newrow) >= 4 )
+            continue;
+        
+        float frowgrad;
+        if( i == 0 )
+            frowgrad = mag*(1-rfrac);
+        else
+            frowgrad = mag*rfrac;
+        
+        for(int j = 0; j < 2; ++j) {
+            if( (unsigned int)(j+newcol) >= 4 )
+                continue;
+
+            float fcolgrad;
+            if( j == 0 )
+                fcolgrad = frowgrad*(1-cfrac);
+            else
+                fcolgrad = frowgrad*cfrac;
+            
+            float* pfdescorient = fdesc + 8*(4*(i+newrow)+j+newcol);
+            for(int k = 0; k < 2; ++k) {
+                float forigrad;
+                if( k == 0 )
+                    forigrad = fcolgrad*(1-ofrac);
+                else
+                    forigrad = fcolgrad*ofrac;
+
+                pfdescorient[(neworient+k)&7] += forigrad;
+            }
+        }
+    }
+}
+
+void FreeKeypoints(Keypoint keypt)
+{
+    while(keypt != NULL) {
+        s_listKeypoints.push_back(keypt);
+        keypt = keypt->next;
+    }
+}
+
+void DestroyAllResources()
+{
+    DestroyAllImages();
+    for( map<float, float* >::iterator it = s_mapkernel.begin(); it != s_mapkernel.end(); ++it)
+        sift_aligned_free(it->second-1);
+    s_mapkernel.clear();
+#ifdef __SSE__
+    for(LISTBUF::iterator it = s_listconvbuf.begin(); it != s_listconvbuf.end(); ++it)
+        sift_aligned_free(*it);
+    s_listconvbuf.clear();
+    s_convbufsize = 0;
+#endif
+    for(list<Keypoint>::iterator it = s_listKeypoints.begin(); it != s_listKeypoints.end(); ++it)
+        sift_aligned_free(*it);
+    s_listKeypoints.clear();
+}
+
+#if !defined(_MSC_VER) && defined(__SSE__) && !defined(SIMDMATH_H) // copied from libsimdmath
+
+#define DEF_CONST(a,b) static  const vec_float4 a = {b,b,b,b};
+#define DEI_CONST(a,b) static  const vec_int4   a = {b,b,b,b};
+
+
+DEF_CONST(CF4_2414213562373095, 2.414213562373095f) 
+DEF_CONST(CF4_04142135623730950, 0.4142135623730950f) 
+DEF_CONST(CF4_805374449538e_2,      8.05374449538e-2f) 
+DEF_CONST(CF4_138776856032E_1,      1.38776856032E-1f) 
+DEF_CONST(CF4_199777106478E_1,      1.99777106478E-1f) 
+DEF_CONST(CF4_333329491539E_1,      3.33329491539E-1f) 
+
+#define VEC_F2I(a,b)   asm("cvttps2dq %1, %0":"=x" (a) :"x" (b))
+#define VEC_I2F(a,b)   asm("cvtdq2ps  %1, %0":"=x" (a) :"x" (b))
+/* definitions for a &= b; etc
+gcc generates very slow code for corresponding
+vec_float4 C-style expressions
+*/
+#define VEC_AND(a,b)   asm ("andps %2, %1":"=x" (a) :"0" (a),"x" (b))
+#define VEC_NAND(a,b)  asm ("andnps %2, %1":"=x" (a) :"0" (a),"x" (b))
+#define VEC_NAND3(a,b,c)  a=(typeof(a))_mm_andnot_ps((vec_float4)(c),(vec_float4)(b))
+#define VEC_OR(a,b)    asm ("orps  %2, %1":"=x" (a) :"0" (a),"x" (b))
+#define VEC_XOR(a,b)   asm ("xorps %2, %1":"=x" (a) :"0" (a),"x" (b))
+#define VEC_SUB(a,b)   asm ("subps %2, %1":"=x" (a) :"0" (a),"x" (b))
+
+#define VEC_GT(a,b)     __builtin_ia32_cmpgtps((vec_float4)a,(vec_float4)b)
+#define VEC_LT(a,b)     __builtin_ia32_cmpltps(a,b)
+#define VEC_EQ(a,b)     __builtin_ia32_cmpeqps(a,b)
+#define VEC_NE(a,b)     __builtin_ia32_cmpneqps(a,b)
+#define VEC_GE(a,b)     __builtin_ia32_cmpgeps(a,b)
+#define VEC_SR(v,n)     _mm_srli_epi32(v,n)
+#define VEC_SL(v,n)     _mm_slli_epi32(v,n)
+
+
+#define vec_re(x)             _mm_rcp_ps(x)
+#define vec_sr(x,y)           _mm_srli_epi32(x,y)
+#define vec_sl(x,y)           _mm_slli_epi32(x,y)
+#define vec_or(a,b)           ((a)|(b))
+#define vec_and(a,b)          ((a)&(b))
+#define vec_add(a,b)          ((a)+(b))
+#define vec_madd(a,b,c)       ((a)*(b)+(c))
+#define vec_nmsub(a,b,c)      ((c)-(a)*(b))
+#define vec_splat(x,n)        (typeof(x))_mm_shuffle_ps(x,x,_MM_SHUFFLE(n,n,n,n))
+
+DEF_CONST(CF4_0,        0.0f) 
+DEF_CONST(CF4_2,        2.0f) 
+DEI_CONST(CI4_SIGN,     0x80000000u)
+DEF_CONST(CF4__1,       -1.0f) 
+DEF_CONST(CF4_1,        1.0f) 
+DEF_CONST(CF4_SMALL,    1.0E-35f) 
+DEF_CONST(CF4_PIO2F,    1.570796326794896619f) 
+DEF_CONST(CF4_PIO4F,    0.7853981633974483096f) 
+DEF_CONST(CF4_PIF,      3.14159265358979323846f)
+
+inline vec_int4  __attribute__((__always_inline__))
+      _signf4( vec_float4 x ){
+      vec_int4 a = CI4_SIGN;
+      VEC_AND(a, x);
+      return a;
+}
+
+inline vec_float4 __attribute__((__always_inline__))
+            _atanf4( vec_float4 x ){
+      vec_float4 y, z,z1,z2;
+      vec_int4 a1, a2, a3;
+      /* make argument positive and save the sign */
+      vec_int4 sign = _signf4( x );
+      VEC_XOR(x, sign);
+      
+      /* range reduction */
+      a1 = VEC_GT (x , CF4_2414213562373095 );
+      a2 = VEC_GT (x , CF4_04142135623730950 );
+      a3 = ~a2; 
+      a2 ^= a1;
+
+      z1 = CF4__1 / (x+CF4_SMALL);
+      z2 = (x-CF4_1)/(x+CF4_1);
+      VEC_AND(z1, a1);
+      VEC_AND(z2, a2);
+      VEC_AND(x, a3);
+      VEC_OR(x, z1);
+      VEC_OR(x, z2);
+      
+      y = CF4_PIO2F;
+      z1 = CF4_PIO4F;
+      VEC_AND(y, a1);
+      VEC_AND(z1, a2);
+      VEC_OR(y, z1);
+
+      z = x * x;
+      y +=
+            ((( CF4_805374449538e_2 * z
+            - CF4_138776856032E_1) * z
+            + CF4_199777106478E_1) * z
+            - CF4_333329491539E_1) * z * x
+            + x;
+
+      VEC_XOR(y, sign);
+      return y;
+}
+
+inline vec_float4  __attribute__((__always_inline__))
+      atan2f4( vec_float4 y, vec_float4 x ){
+      vec_float4 z, w;
+      vec_float4 x_neg_PI    = CF4_PIF;
+      VEC_AND(x_neg_PI, VEC_GT( CF4_0, x ));
+      vec_float4 y_negativ_2 = CF4_2;
+      VEC_AND(y_negativ_2, VEC_GT( CF4_0, y ));
+
+      vec_int4 i_x_zero  = VEC_EQ ( CF4_0, x );
+      vec_int4 i_y_zero  = VEC_EQ ( CF4_0, y );
+      vec_float4 x_zero_PIO2 = CF4_PIO2F;
+      VEC_AND(x_zero_PIO2, i_x_zero);
+      vec_float4 y_zero    = CF4_1;
+      VEC_AND(y_zero, i_y_zero);
+
+
+      w = x_neg_PI *  ( CF4_1  - y_negativ_2 );
+
+      z = _atanf4( y / (x+x_zero_PIO2));
+      VEC_AND(z, ~(i_x_zero|i_y_zero));
+
+      return w + z + x_zero_PIO2 * ( CF4_1 - y_zero - y_negativ_2 );
+}
+
+#endif
diff --git a/Utilities/otbsiftfast/makerelease.sh b/Utilities/otbsiftfast/makerelease.sh
new file mode 100755
index 0000000000000000000000000000000000000000..9797c91466c41fa870d2b2c5a54de055c834e5a1
--- /dev/null
+++ b/Utilities/otbsiftfast/makerelease.sh
@@ -0,0 +1,9 @@
+version=1.0
+rm -rf libsiftfast-$version libsiftfast-$version-src
+make prefix=`pwd`/libsiftfast-$version
+make install
+cp README libsiftfast-$version/
+tar czf libsiftfast-$version-linux-i386.tgz libsiftfast-$version
+svn export . libsiftfast-$version-src/
+tar czf libsiftfast-$version-src.tgz libsiftfast-$version-src
+rm -rf libsiftfast-$version libsiftfast-$version-src
\ No newline at end of file
diff --git a/Utilities/otbsiftfast/profiler.cpp b/Utilities/otbsiftfast/profiler.cpp
new file mode 100755
index 0000000000000000000000000000000000000000..f38cb62b1ab7bbd713723878bf0e57ab965f8495
--- /dev/null
+++ b/Utilities/otbsiftfast/profiler.cpp
@@ -0,0 +1,247 @@
+#include "profiler.h"
+
+////////////////////
+// Small profiler //
+////////////////////
+#include <list>
+#include <string>
+#include <map>
+#include <sys/timeb.h>    // ftime(), struct timeb
+#include <sys/time.h>
+
+using namespace std;
+
+int g_bWriteProfile=1;
+
+static u64 luPerfFreq = 1000000;
+inline u64 GET_PROFILE_TIME()
+{
+    struct timeval t;
+    gettimeofday(&t, NULL);
+    return (u64)t.tv_sec*1000000+t.tv_usec;
+}
+
+
+struct DVPROFSTRUCT;
+
+struct DVPROFSTRUCT
+{
+    struct DATA
+    {
+        DATA(u64 time, u32 user = 0) : dwTime(time), dwUserData(user) {}
+        DATA() : dwTime(0), dwUserData(0) {}
+        
+        u64 dwTime;
+        u32 dwUserData;
+    };
+
+    ~DVPROFSTRUCT() {
+        list<DVPROFSTRUCT*>::iterator it = listpChild.begin();
+        while(it != listpChild.end() ) {
+            delete *it; *it = NULL;
+            ++it;
+        }
+    }
+
+    list<DATA> listTimes;       // before DVProfEnd is called, contains the global time it started
+                                // after DVProfEnd is called, contains the time it lasted
+                                // the list contains all the tracked times 
+    char pname[256];
+
+    list<DVPROFSTRUCT*> listpChild;     // other profilers called during this profiler period
+};
+
+struct DVPROFTRACK
+{
+    u32 dwUserData;
+    DVPROFSTRUCT::DATA* pdwTime;
+    DVPROFSTRUCT* pprof;
+};
+
+list<DVPROFTRACK> g_listCurTracking;    // the current profiling functions, the back element is the
+                                        // one that will first get popped off the list when DVProfEnd is called
+                                        // the pointer is an element in DVPROFSTRUCT::listTimes
+list<DVPROFSTRUCT> g_listProfilers;         // the current profilers, note that these are the parents
+                                            // any profiler started during the time of another is held in
+                                            // DVPROFSTRUCT::listpChild
+list<DVPROFSTRUCT*> g_listAllProfilers;     // ignores the hierarchy, pointer to elements in g_listProfilers
+
+void DVProfRegister(const char* pname)
+{
+    if( !g_bWriteProfile )
+        return;
+    list<DVPROFSTRUCT*>::iterator it = g_listAllProfilers.begin();
+    
+//  while(it != g_listAllProfilers.end() ) {
+//
+//      if( _tcscmp(pname, (*it)->pname) == 0 ) {
+//          (*it)->listTimes.push_back(timeGetTime());
+//          DVPROFTRACK dvtrack;
+//          dvtrack.pdwTime = &(*it)->listTimes.back();
+//          dvtrack.pprof = *it;
+//          g_listCurTracking.push_back(dvtrack);
+//          return;
+//      }
+//
+//      ++it;
+//  }
+
+    // else add in a new profiler to the appropriate parent profiler
+    DVPROFSTRUCT* pprof = NULL;
+    
+    if( g_listCurTracking.size() > 0 ) {
+        assert( g_listCurTracking.back().pprof != NULL );
+        g_listCurTracking.back().pprof->listpChild.push_back(new DVPROFSTRUCT());
+        pprof = g_listCurTracking.back().pprof->listpChild.back();
+    }
+    else {
+        g_listProfilers.push_back(DVPROFSTRUCT());
+        pprof = &g_listProfilers.back();
+    }
+
+    strncpy(pprof->pname, pname, 256);
+
+    // setup the profiler for tracking
+    pprof->listTimes.push_back(DVPROFSTRUCT::DATA(GET_PROFILE_TIME()));
+
+    DVPROFTRACK dvtrack;
+    dvtrack.pdwTime = &pprof->listTimes.back();
+    dvtrack.pprof = pprof;
+    dvtrack.dwUserData = 0;
+
+    g_listCurTracking.push_back(dvtrack);
+
+    // add to all profiler list
+    g_listAllProfilers.push_back(pprof);
+}
+
+void DVProfEnd(u32 dwUserData)
+{
+    if( !g_bWriteProfile )
+        return;
+    if( g_listCurTracking.size() == 0 )
+        return;
+
+    DVPROFTRACK dvtrack = g_listCurTracking.back();
+
+    assert( dvtrack.pdwTime != NULL && dvtrack.pprof != NULL );
+
+    dvtrack.pdwTime->dwTime = GET_PROFILE_TIME()- dvtrack.pdwTime->dwTime;
+    dvtrack.pdwTime->dwUserData= dwUserData;
+
+    g_listCurTracking.pop_back();
+}
+
+struct DVTIMEINFO
+{
+    DVTIMEINFO() : uInclusive(0), uExclusive(0) {}
+    u64 uInclusive, uExclusive;
+};
+
+map<string, DVTIMEINFO> mapAggregateTimes;
+
+u64 DVProfWriteStruct(FILE* f, DVPROFSTRUCT* p, int ident)
+{
+    fprintf(f, "%*s%s - ", ident, "", p->pname);
+
+    list<DVPROFSTRUCT::DATA>::iterator ittime = p->listTimes.begin();
+
+    u64 utime = 0;
+
+    while(ittime != p->listTimes.end() ) {
+//        if( ittime->dwTime > luPerfFreq*10 ) {
+//            ++ittime;
+//            continue;
+//        }
+        
+        utime += ittime->dwTime;
+        
+        if( ittime->dwUserData ) 
+            fprintf(f, "time: %d, user: 0x%8.8x", (u32)ittime->dwTime, ittime->dwUserData);
+        else
+            fprintf(f, "time: %d", (u32)ittime->dwTime);
+        ++ittime;
+    }
+
+    // yes this is necessary, maps have problems with constructors on their type
+    map<string, DVTIMEINFO>::iterator ittimes = mapAggregateTimes.find(p->pname);
+    if( ittimes == mapAggregateTimes.end() ) {
+        ittimes = mapAggregateTimes.insert(map<string, DVTIMEINFO>::value_type(p->pname, DVTIMEINFO())).first;
+        ittimes->second.uExclusive = 0;
+        ittimes->second.uInclusive = 0;
+    }
+
+    ittimes->second.uInclusive += utime;
+
+    fprintf(f, "\n");
+
+    list<DVPROFSTRUCT*>::iterator itprof = p->listpChild.begin();
+
+    u64 uex = utime;
+    while(itprof != p->listpChild.end() ) {
+        uex -= DVProfWriteStruct(f, *itprof, ident+4);
+        ++itprof;
+    }
+
+    if( uex > utime ) {
+        uex = 0;
+        //RAVEPRINT(L"profiling precision error!\n");
+    }
+
+    ittimes->second.uExclusive += uex;
+    return utime;
+}
+
+void DVProfWrite(const char* pfilename, u32 frames)
+{
+    assert( pfilename != NULL );
+    FILE* f = fopen(pfilename, "w");
+
+    // pop back any unused
+    mapAggregateTimes.clear();
+    list<DVPROFSTRUCT>::iterator it = g_listProfilers.begin();
+
+    while(it != g_listProfilers.end() ) {
+        DVProfWriteStruct(f, &(*it), 0);
+        ++it;
+    }
+
+    {
+        map<string, DVTIMEINFO>::iterator it;
+        fprintf(f, "\n\n--------------------------------------------------------------\n\n");
+
+        u64 uTotal[2]; uTotal[0] = uTotal[1] = 0;
+        double fiTotalTime[2]; fiTotalTime[0] = fiTotalTime[1] = 0;
+
+        for(it = mapAggregateTimes.begin(); it != mapAggregateTimes.end(); ++it) {
+            uTotal[0] += it->second.uExclusive;
+            uTotal[1] += it->second.uInclusive;
+        }
+
+        fprintf(f, "total times (%d): ex: %Lu ", frames, 1000000*uTotal[0]/(luPerfFreq*(u64)frames));
+        fprintf(f, "inc: %Lu\n", 1000000 * uTotal[1]/(luPerfFreq*(u64)frames));
+
+        if( uTotal[0] > 0 )
+            fiTotalTime[0] = 1.0 / (double)uTotal[0];
+        if( uTotal[1] > 0 )
+            fiTotalTime[1] = 1.0 / (double)uTotal[1];
+
+        // output the combined times
+        for(it = mapAggregateTimes.begin(); it != mapAggregateTimes.end(); ++it) {
+            fprintf(f, "%s - ex: %f%% (%fs) inc: %f%%\n", it->first.c_str(),
+                    100.0f*(float)((double)it->second.uExclusive * fiTotalTime[0]),
+                    (double)it->second.uExclusive/(double)luPerfFreq,
+                    100.0f*(float)((double)it->second.uInclusive * fiTotalTime[1]));
+        }
+    }
+
+
+    fclose(f);
+}
+
+void DVProfClear()
+{
+    g_listCurTracking.clear();
+    g_listProfilers.clear();
+    g_listAllProfilers.clear();
+}
diff --git a/Utilities/otbsiftfast/profiler.h b/Utilities/otbsiftfast/profiler.h
new file mode 100755
index 0000000000000000000000000000000000000000..e7b7a2ee1137aa6da9ef4ebd10b4be3ba07ecd0b
--- /dev/null
+++ b/Utilities/otbsiftfast/profiler.h
@@ -0,0 +1,58 @@
+#ifndef SMALL_PROFILER_H
+#define SMALL_PROFILER_H
+
+typedef unsigned int u32;
+typedef unsigned long long u64;
+typedef signed long long s64;
+
+#include <assert.h>
+
+////
+// profiling
+///
+
+extern int g_bWriteProfile; // global variable to enable/disable profiling (if DVPROFILE is defined)
+
+// IMPORTANT: For every Reigster there must be an End
+void DVProfRegister(const char* pname);			// first checks if this profiler exists in g_listProfilers
+void DVProfEnd(u32 dwUserData);
+void DVProfWrite(const char* pfilename, u32 frames = 1);
+void DVProfClear();						// clears all the profilers
+
+#if defined(DVPROFILE)
+
+#ifdef _MSC_VER
+
+#ifndef __PRETTY_FUNCTION__
+#define __PRETTY_FUNCTION__ __FUNCTION__
+#endif
+
+#endif
+
+#define DVSTARTPROFILE() DVProfileFunc _pf(__FUNCTION__);
+
+class DVProfileFunc
+{
+public:
+	u32 dwUserData;
+	DVProfileFunc(const char* pname) { DVProfRegister(pname); dwUserData = 0; }
+	DVProfileFunc(const char* pname, u32 dwUserData) : dwUserData(dwUserData) { DVProfRegister(pname); }
+	~DVProfileFunc() { DVProfEnd(dwUserData); }
+};
+
+#else
+
+#define DVSTARTPROFILE()
+
+class DVProfileFunc
+{
+public:
+	u32 dwUserData;
+    inline DVProfileFunc(const char* pname) {}
+    inline DVProfileFunc(const char* pname, u32 dwUserData) { }
+	~DVProfileFunc() {}
+};
+
+#endif
+
+#endif
diff --git a/Utilities/otbsiftfast/runcmake.bat b/Utilities/otbsiftfast/runcmake.bat
new file mode 100755
index 0000000000000000000000000000000000000000..f233dcb29e7261d1ace71fc2a80b15e3292dcd58
--- /dev/null
+++ b/Utilities/otbsiftfast/runcmake.bat
@@ -0,0 +1,3 @@
+mkdir build
+cd build
+cmake ..
diff --git a/Utilities/otbsiftfast/siftfast.cpp b/Utilities/otbsiftfast/siftfast.cpp
new file mode 100755
index 0000000000000000000000000000000000000000..f3806514c73ddc87553be8cb3acdf6c3c617138f
--- /dev/null
+++ b/Utilities/otbsiftfast/siftfast.cpp
@@ -0,0 +1,157 @@
+// exact C++ implementation of lowe's sift program
+// author: zerofrog(@gmail.com), Sep 2008
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+//This program is distributed in the hope that it will be useful,
+//but WITHOUT ANY WARRANTY; without even the implied warranty of
+//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//Lesser GNU General Public License for more details.
+//
+//You should have received a copy of the GNU Lesser General Public License
+//along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+// This source code was carefully calibrated to match David Lowe's SIFT features program
+#include "siftfast.h"
+
+#include <iostream>
+
+#include <sys/timeb.h>    // ftime(), struct timeb
+
+#ifdef _WIN32
+#include <io.h>
+#include <fcntl.h>
+#endif
+
+#include <cstdlib>
+#include <assert.h>
+
+using namespace std;
+
+typedef unsigned short u16;
+typedef unsigned int u32;
+typedef unsigned long long u64;
+
+inline u32 timeGetTime()
+{
+#ifdef _WIN32
+    _timeb t;
+    _ftime(&t);
+#else
+    timeb t;
+    ftime(&t);
+#endif
+
+    return (u32)(t.time*1000+t.millitm);
+}
+
+// code from david lowe
+void SkipComments(FILE *fp)
+{
+    int ch;
+
+    fscanf(fp," "); // Skip white space.
+    while ((ch = fgetc(fp)) == '#') {
+        while ((ch = fgetc(fp)) != '\n'  &&  ch != EOF)
+            ;
+        fscanf(fp," ");
+    }
+    ungetc(ch, fp); // Replace last character read.
+}
+
+// code from david lowe
+SiftFastImage ReadPGM(FILE *fp)
+{
+    int char1, char2, width, height, max, c1, c2, c3, r, c;
+    SiftFastImage image, nextimage;
+
+    char1 = fgetc(fp);
+    char2 = fgetc(fp);
+    SkipComments(fp);
+    c1 = fscanf(fp, "%d", &width);
+    SkipComments(fp);
+    c2 = fscanf(fp, "%d", &height);
+    SkipComments(fp);
+    c3 = fscanf(fp, "%d", &max);
+
+    if (char1 != 'P' || char2 != '5' || c1 != 1 || c2 != 1 || c3 != 1 || max > 255) {
+        cerr << "Input is not a standard raw 8-bit PGM file." << endl
+             << "Use xv or pnmdepth to convert file to 8-bit PGM format." << endl;
+        exit(1);
+    }
+
+    fgetc(fp);  // Discard exactly one byte after header.
+
+    // Create floating point image with pixels in range [0,1].
+    image = CreateImage(height, width);
+    for (r = 0; r < height; r++)
+        for (c = 0; c < width; c++)
+            image->pixels[r*image->stride+c] = ((float) fgetc(fp)) / 255.0;
+
+    //Check if there is another image in this file, as the latest PGM
+    // standard allows for multiple images.
+    SkipComments(fp);
+    if (getc(fp) == 'P') {
+        cerr << "ignoring other images" << endl;
+        ungetc('P', fp);
+        nextimage = ReadPGM(fp);
+        //image->next = nextimage;
+    }
+    return image;
+}
+
+int main(int argc, char **argv)
+{
+#ifdef _WIN32
+    // have to set to binary
+     _setmode(_fileno(stdin),_O_BINARY);
+#endif
+
+    SiftFastImage image = ReadPGM(stdin);
+    Keypoint keypts;
+    float fproctime;
+
+    cerr << "Finding keypoints (image " << image->cols << "x" << image->rows << ")..." << endl;
+    {
+        u32 basetime = timeGetTime();
+        keypts = GetKeypoints(image,3);
+        fproctime = (timeGetTime()-basetime)*0.001f;
+    }
+
+    // write the keys to the output
+    int numkeys = 0;
+    Keypoint key = keypts;
+    while(key) {
+        numkeys++;
+        key = key->next;
+    }
+
+    cerr << numkeys << " keypoints found in " << fproctime << " seconds." << endl;
+
+    cout << numkeys << " " << 128 << endl;
+    key = keypts;
+    while(key) {
+        cout << key->row << " " << key->col << " " << key->scale << " " << key->ori << endl;
+
+        for(int i = 0; i < 128; ++i) {
+            int intdesc = (int)(key->descrip[i]*512.0f);
+            assert( intdesc >= 0 );
+            
+            if( intdesc > 255 )
+                intdesc = 255;
+            cout << intdesc << " ";
+            if( (i&15)==15 )
+                cout << endl;
+        }
+        cout << endl;
+        key = key->next;
+    }
+
+    FreeKeypoints(keypts);
+    DestroyAllResources();
+
+    return 0;
+}
diff --git a/Utilities/otbsiftfast/siftfast.h b/Utilities/otbsiftfast/siftfast.h
new file mode 100755
index 0000000000000000000000000000000000000000..6b2d84553b5929d93fa95517d9d6d1eb7c7f7185
--- /dev/null
+++ b/Utilities/otbsiftfast/siftfast.h
@@ -0,0 +1,33 @@
+#ifndef SIFT_FAST_H
+#define SIFT_FAST_H
+
+typedef struct ImageSt {
+    int rows, cols;          // Dimensions of image.
+    float *pixels;          // 2D array of image pixels.
+    int stride;             // how many floats until the next row
+                            // (used to add padding to make rows aligned to 16 bytes)
+} *SiftFastImage;
+
+typedef struct KeypointSt {
+    float row, col;             // Subpixel location of keypoint.
+    float scale, ori;           // Scale and orientation (range [-PI,PI])
+    float descrip[128];     // Vector of descriptor values
+    struct KeypointSt *next;    // Pointer to next keypoint in list.
+} *Keypoint;
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+Keypoint GetKeypoints(SiftFastImage porgimage,unsigned int nbScales);
+SiftFastImage CreateImage(int rows, int cols);
+SiftFastImage CreateImageFromMatlabData(double* pdata, int rows, int cols);
+void DestroyAllImages();
+void DestroyAllResources();
+void FreeKeypoints(Keypoint keypt);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif
diff --git a/Utilities/otbsiftfast/siftfast.m b/Utilities/otbsiftfast/siftfast.m
new file mode 100755
index 0000000000000000000000000000000000000000..7f9b64389071f72a73ce21b4961929c715c5a0de
--- /dev/null
+++ b/Utilities/otbsiftfast/siftfast.m
@@ -0,0 +1,7 @@
+% [frames, descriptors] = sift_fast(I)
+%
+% Returns the sift feature of a grayscale image
+% I - grayscale in real format, all values have to be in [0,1]
+% frames - 4xN matrix of the sift locations each column is (X,Y,Scale,Orientation)
+% descriptors - KxN matrix (usually K=128) of the sift descriptors
+function [frames, descriptors] = sift_fast(I)
diff --git a/Utilities/otbsiftfast/siftmex.cpp b/Utilities/otbsiftfast/siftmex.cpp
new file mode 100755
index 0000000000000000000000000000000000000000..73e4e7838194c0eb15184f8c3bf31f4a8b5c67a8
--- /dev/null
+++ b/Utilities/otbsiftfast/siftmex.cpp
@@ -0,0 +1,86 @@
+// author: zerofrog(@gmail.com), Sep 2008
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU Lesser General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// at your option) any later version.
+//
+//This program is distributed in the hope that it will be useful,
+//but WITHOUT ANY WARRANTY; without even the implied warranty of
+//MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//Lesser GNU General Public License for more details.
+//
+//You should have received a copy of the GNU Lesser General Public License
+//along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+#include "mex.h"
+#include <stdio.h>
+#include <string.h>
+
+#include <list>
+#include <string>
+using namespace std;
+
+#ifdef DVPROFILE
+#include "profiler.h"
+#endif
+
+#include "siftfast.h"
+
+typedef unsigned long long u64;
+
+// [frames,descr]=sift_mex(I,...)
+// takes an image and outputs frames and a descriptor
+void mexFunction(int nout, mxArray *out[], int nin, const mxArray *in[])
+{
+    if(nin == 0 || !mxIsDouble(in[0]) )
+        mexErrMsgTxt("sift_fast takes 1 argument: a grayscale image in real format ");
+
+    const int* dimensions = mxGetDimensions(in[0]);
+    Image image = CreateImageFromMatlabData(mxGetPr(in[0]), dimensions[0], dimensions[1]);
+    Keypoint keypts = GetKeypoints(image);
+
+    // write the keys to the output
+    int numkeys = 0;
+    Keypoint key = keypts;
+    while(key) {
+        numkeys++;
+        key = key->next;
+    }
+
+    double* frames = NULL, *descr = NULL;
+    if( nout > 0 ) {
+        out[0] = mxCreateDoubleMatrix(4,numkeys,mxREAL);
+        frames = mxGetPr(out[0]);
+    }
+    if( nout > 1 ) {
+        out[1] = mxCreateDoubleMatrix(128,numkeys,mxREAL);
+        descr = mxGetPr(out[1]);
+    }
+
+    key = keypts;
+    while(key) {
+
+        if( frames != NULL ) {
+            frames[0] = key->col; frames[1] = key->row; frames[2] = key->scale; frames[3] = key->ori;
+            frames += 4;
+        }
+
+        if( descr != NULL ) {
+            for(int i = 0; i < 128; ++i)
+                descr[i] = (double)key->descrip[i];
+            descr += 128;
+        }
+
+        key = key->next;
+    }
+    
+    FreeKeypoints(keypts);
+    
+#ifdef DVPROFILE
+    DVProfWrite("prof.txt");
+    DVProfClear();
+#endif
+
+    DestroyAllImages();
+}
diff --git a/otbIncludeDirectories.cmake b/otbIncludeDirectories.cmake
index 2474fae52f4efda7bf393159d32307eb3a956800..45a6206e63597170f7130faff511bf7aa365a319 100644
--- a/otbIncludeDirectories.cmake
+++ b/otbIncludeDirectories.cmake
@@ -49,6 +49,7 @@ SET(OTB_INCLUDE_DIRS_BUILD_TREE ${OTB_INCLUDE_DIRS_BUILD_TREE}
   ${OTB_SOURCE_DIR}/Utilities/otbedison/segm
   ${OTB_SOURCE_DIR}/Utilities/otbedison/prompt
   ${OTB_SOURCE_DIR}/Utilities/otbedison/edge
+  ${OTB_SOURCE_DIR}/Utilities/otbsiftfast		
 
 )
 IF(OTB_COMPILE_JPEG2000)
@@ -219,6 +220,7 @@ SET(OTB_INCLUDE_DIRS_INSTALL_TREE ${OTB_INCLUDE_DIRS_INSTALL_TREE}
 # ${OTB_INSTALL_INCLUDE_DIR}/Utilities/otbkml/third_party/zlib-1.2.3/contrib
   ${OTB_INSTALL_INCLUDE_DIR}/Utilities/otbliblas/include
   ${OTB_INSTALL_INCLUDE_DIR}/Utilities/otbedison
+  ${OTB_INSTALL_INCLUDE_DIR}/Utilities/otbsiftfast
 )
 
 IF(OTB_COMPILE_JPEG2000)