otbTrainImagesBase.h 10.9 KB
Newer Older
Ludovic Hussonnois's avatar
Ludovic Hussonnois committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/*
 * Copyright (C) 2005-2017 Centre National d'Etudes Spatiales (CNES)
 *
 * This file is part of Orfeo Toolbox
 *
 *     https://www.orfeo-toolbox.org/
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
20 21 22
#ifndef otbTrainImagesBase_h
#define otbTrainImagesBase_h

23
#include "otbVectorDataFileWriter.h"
24 25 26
#include "otbWrapperCompositeApplication.h"
#include "otbWrapperApplicationFactory.h"

27 28
#include "otbStatisticsXMLFileWriter.h"
#include "otbImageToEnvelopeVectorDataFilter.h"
29
#include "otbSamplingRateCalculator.h"
30
#include "otbOGRDataToSamplePositionFilter.h"
31
#include <string>
32 33 34 35 36 37

namespace otb
{
namespace Wrapper
{

38
/** \class TrainImagesBase
39
 * \brief Base class for the TrainImagesClassifier
40 41 42 43 44 45 46
 *
 * This class intends to hold common input/output parameters and
 * composite application connection for both supervised and unsupervised
 * model training.
 *
 * \ingroup OTBAppClassification
 */
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
class TrainImagesBase : public CompositeApplication
{
public:
  /** Standard class typedefs. */
  typedef TrainImagesBase Self;
  typedef CompositeApplication Superclass;
  typedef itk::SmartPointer<Self> Pointer;
  typedef itk::SmartPointer<const Self> ConstPointer;

  /** Standard macro */
  itkTypeMacro( TrainImagesBase, Superclass )

  /** filters typedefs*/
  typedef otb::OGRDataToSamplePositionFilter<FloatVectorImageType, UInt8ImageType, otb::PeriodicSampler> PeriodicSamplerType;

  typedef otb::SamplingRateCalculator::MapRateType MapRateType;

protected:

66
  typedef enum
67 68
  {
    CLASS, GEOMETRIC
69
  } SamplingStrategy;
70
  struct SamplingRates;
71 72
  class TrainFileNamesHandler;

73 74 75 76
  /**
   * Initialize all the input and output parameter used for the train images
   */
  void InitIO();
77

78 79 80 81
  /**
   * Initialize sampling related application and parameters
   */
  void InitSampling();
82

83 84 85 86 87
  void ShareSamplingParameters();
  void ConnectSamplingParameters();
  void InitClassification();
  void ShareClassificationParams();
  void ConnectClassificationParams();
88 89

  /**
90 91 92 93
   * Compute polygon statistics given provided strategy with PolygonClassStatistics class
   * \param imageList list of input images
   * \param vectorFileNames list of input vector file names
   * \param statisticsFileNames list of out
94
   */
95
  void ComputePolygonStatistics(FloatVectorImageListType *imageList, const std::vector<std::string> &vectorFileNames,
96
                                const std::vector<std::string> &statisticsFileNames);
97 98

  /**
99
   * Compute final maximum training and validation
100 101 102
   * \param dedicatedValidation
   * \return SamplingRates final maximum training and final maximum validation
   */
103
  SamplingRates ComputeFinalMaximumSamplingRates(bool dedicatedValidation);
104

105

106
  /**
107 108 109 110 111
   * Compute rates using MultiImageSamplingRate application
   * \param statisticsFileNames
   * \param ratesFileName
   * \param maximum final maximum value computed by ComputeFinalMaximumSamplingRates
   * \sa ComputeFinalMaximumSamplingRates
112
   */
113 114 115
  void ComputeSamplingRate(const std::vector<std::string> &statisticsFileNames,
                           const std::string &ratesFileName,
                           long maximum);
116 117 118 119 120 121 122
  /**
   * Train the model with training and optional validation data samples
   * \param imageList list of input images
   * \param sampleTrainFileNames files names of the training samples
   * \param sampleValidationFileNames file names of the validation sample
   */
  void TrainModel(FloatVectorImageListType *imageList, const std::vector<std::string> &sampleTrainFileNames,
123
                  const std::vector<std::string> &sampleValidationFileNames);
124 125

  /**
126 127 128 129 130 131 132
   * Select samples by class or by geographic strategy
   * \param image
   * \param vectorFileName
   * \param sampleFileName
   * \param statisticsFileName
   * \param ratesFileName
   * \param strategy
133
   */
134
  void SelectAndExtractSamples(FloatVectorImageType *image, std::string vectorFileName, std::string sampleFileName,
135
                               std::string statisticsFileName, std::string ratesFileName, SamplingStrategy strategy,
136
                               std::string selectedField = "");
137 138
  /**
   * Select and extract samples with the SampleSelection and SampleExtraction application.
139 140 141 142 143
   * \param fileNames
   * \param imageList
   * \param vectorFileNames
   * \param strategy the strategy used for selection (by class or with geometry)
   * \param selectedFieldName
144 145
   */
  void SelectAndExtractTrainSamples(const TrainFileNamesHandler &fileNames, FloatVectorImageListType *imageList,
146
                                    std::vector<std::string> vectorFileNames, SamplingStrategy strategy,
147
                                    std::string selectedFieldName = "");
148

149

150 151 152 153 154 155 156 157 158
  /**
   * Function used to select validation samples based on a defined strategy (geometric in unsupervised mode)
   * and extract them. With dedicated validation the 'by class' sampling strategy and statistics are used.
   * Otherwise this function split training to validation samples corresponding to sample.vtr percentage.
   * or do nothing if this percentage is == 0
   * \param fileNames
   * \param imageList
   * \param validationVectorFileList optional validation vector file for each images
   */
159
  void SelectAndExtractValidationSamples(const TrainFileNamesHandler &fileNames, FloatVectorImageListType *imageList,
160 161 162 163 164 165 166 167 168
                                         const std::vector<std::string> &validationVectorFileList = std::vector<std::string>());

  /**
   * Function used to split all training samples from all images in a set of training and validation.
   * \param fileNames
   * \param imageList
   * \sa SplitTrainingAndValidationSamples
   */
  void SplitTrainingToValidationSamples(const TrainFileNamesHandler &fileNames, FloatVectorImageListType *imageList);
169

170
private:
171 172 173 174 175 176 177 178 179

  /**
   * Function used to split training samples in set of training and validation.
   * \param image input image
   * \param sampleFileName the input sample file name
   * \param sampleTrainFileName the input training file name
   * \param sampleValidFileName the input validation file name
   * \param ratesTrainFileName the rates file name
   */
180 181
  void SplitTrainingAndValidationSamples(FloatVectorImageType *image, std::string sampleFileName,
                                         std::string sampleTrainFileName, std::string sampleValidFileName,
182
                                         std::string ratesTrainFileName);
183 184


185
protected:
186 187 188 189 190 191 192 193 194 195 196

  struct SamplingRates
  {
    long int fmt;
    long int fmv;
  };

  /**
   * \class TrainFileNamesHandler
   * This class is used to store file names requires for the application's input and output.
   * And to clear temporary files generated by the applications
197
   * \ingroup OTBAppClassification
198 199 200 201
   */
  class TrainFileNamesHandler
  {
  public :
202
    void CreateTemporaryFileNames(std::string outModel, size_t nbInputs, bool dedicatedValidation)
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
    {

      if( dedicatedValidation )
        {
        rateTrainOut = outModel + "_ratesTrain.csv";
        }
      else
        {
        rateTrainOut = outModel + "_rates.csv";
        }

      rateValidOut = outModel + "_ratesValid.csv";
      for( unsigned int i = 0; i < nbInputs; i++ )
        {
        std::ostringstream oss;
        oss << i + 1;
        std::string strIndex( oss.str() );
        if( dedicatedValidation )
          {
          polyStatTrainOutputs.push_back( outModel + "_statsTrain_" + strIndex + ".xml" );
          polyStatValidOutputs.push_back( outModel + "_statsValid_" + strIndex + ".xml" );
          ratesTrainOutputs.push_back( outModel + "_ratesTrain_" + strIndex + ".csv" );
          ratesValidOutputs.push_back( outModel + "_ratesValid_" + strIndex + ".csv" );
          sampleOutputs.push_back( outModel + "_samplesTrain_" + strIndex + ".shp" );
          }
        else
          {
          polyStatTrainOutputs.push_back( outModel + "_stats_" + strIndex + ".xml" );
          ratesTrainOutputs.push_back( outModel + "_rates_" + strIndex + ".csv" );
          sampleOutputs.push_back( outModel + "_samples_" + strIndex + ".shp" );
          }
        sampleTrainOutputs.push_back( outModel + "_samplesTrain_" + strIndex + ".shp" );
        sampleValidOutputs.push_back( outModel + "_samplesValid_" + strIndex + ".shp" );
        }
    }

    void clear()
    {
      for( unsigned int i = 0; i < polyStatTrainOutputs.size(); i++ )
        RemoveFile( polyStatTrainOutputs[i] );
      for( unsigned int i = 0; i < polyStatValidOutputs.size(); i++ )
        RemoveFile( polyStatValidOutputs[i] );
      for( unsigned int i = 0; i < ratesTrainOutputs.size(); i++ )
        RemoveFile( ratesTrainOutputs[i] );
      for( unsigned int i = 0; i < ratesValidOutputs.size(); i++ )
        RemoveFile( ratesValidOutputs[i] );
      for( unsigned int i = 0; i < sampleOutputs.size(); i++ )
        RemoveFile( sampleOutputs[i] );
      for( unsigned int i = 0; i < sampleTrainOutputs.size(); i++ )
        RemoveFile( sampleTrainOutputs[i] );
      for( unsigned int i = 0; i < sampleValidOutputs.size(); i++ )
        RemoveFile( sampleValidOutputs[i] );
255 256
      for( unsigned int i = 0; i < tmpVectorFileList.size(); i++ )
        RemoveFile( tmpVectorFileList[i] );
257 258 259 260 261 262 263 264 265 266
    }

  public:
    std::vector<std::string> polyStatTrainOutputs;
    std::vector<std::string> polyStatValidOutputs;
    std::vector<std::string> ratesTrainOutputs;
    std::vector<std::string> ratesValidOutputs;
    std::vector<std::string> sampleOutputs;
    std::vector<std::string> sampleTrainOutputs;
    std::vector<std::string> sampleValidOutputs;
267
    std::vector<std::string> tmpVectorFileList;
268 269 270 271 272 273 274
    std::string rateValidOut;
    std::string rateTrainOut;

  private:
    bool RemoveFile(std::string &filePath)
    {
      bool res = true;
275
      if( itksys::SystemTools::FileExists( filePath ) )
276 277 278 279 280 281 282 283 284 285 286
        {
        size_t posExt = filePath.rfind( '.' );
        if( posExt != std::string::npos && filePath.compare( posExt, std::string::npos, ".shp" ) == 0 )
          {
          std::string shxPath = filePath.substr( 0, posExt ) + std::string( ".shx" );
          std::string dbfPath = filePath.substr( 0, posExt ) + std::string( ".dbf" );
          std::string prjPath = filePath.substr( 0, posExt ) + std::string( ".prj" );
          RemoveFile( shxPath );
          RemoveFile( dbfPath );
          RemoveFile( prjPath );
          }
287
        res = itksys::SystemTools::RemoveFile( filePath );
288 289 290 291 292 293 294 295 296 297 298 299 300 301
        if( !res )
          {
          //otbAppLogINFO( <<"Unable to remove file  "<<filePath );
          }
        }
      return res;
    }
  };

};

} // end namespace Wrapper
} // end namespace otb

302
#ifndef OTB_MANUAL_INSTANTIATION
303
#include "otbTrainImagesBase.hxx"
304
#endif
305 306

#endif //otbTrainImagesBase_h