otbVectorClassifier.cxx 15.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
/*
 * Copyright (C) 2005-2017 Centre National d'Etudes Spatiales (CNES)
 *
 * This file is part of Orfeo Toolbox
 *
 *     https://www.orfeo-toolbox.org/
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#include "otbWrapperApplication.h"
#include "otbWrapperApplicationFactory.h"

#include "otbOGRDataSourceWrapper.h"
#include "otbOGRFeatureWrapper.h"

#include "itkVariableLengthVector.h"
#include "otbStatisticsXMLFileReader.h"

#include "itkListSample.h"
#include "otbShiftScaleSampleListFilter.h"

#include "otbMachineLearningModelFactory.h"

#include "otbMachineLearningModel.h"

#include <time.h>

namespace otb
{
namespace Wrapper
{

/** Utility function to negate std::isalnum */
bool IsNotAlphaNum(char c)
  {
  return !std::isalnum(c);
  }

class VectorClassifier : public Application
{
public:
  /** Standard class typedefs. */
  typedef VectorClassifier              Self;
  typedef Application                   Superclass;
  typedef itk::SmartPointer<Self>       Pointer;
  typedef itk::SmartPointer<const Self> ConstPointer;

  /** Standard macro */
  itkNewMacro(Self);

  itkTypeMacro(Self, Application)

  /** Filters typedef */
65
  typedef float                                         ValueType;
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82
  typedef unsigned int                                  LabelType;
  typedef itk::FixedArray<LabelType,1>                  LabelSampleType;
  typedef itk::Statistics::ListSample<LabelSampleType>  LabelListSampleType;

  typedef otb::MachineLearningModel<ValueType,LabelType>          MachineLearningModelType;
  typedef otb::MachineLearningModelFactory<ValueType, LabelType>  MachineLearningModelFactoryType;
  typedef MachineLearningModelType::Pointer                       ModelPointerType;
  typedef MachineLearningModelType::ConfidenceListSampleType      ConfidenceListSampleType;

  /** Statistics Filters typedef */
  typedef itk::VariableLengthVector<ValueType>                    MeasurementType;
  typedef otb::StatisticsXMLFileReader<MeasurementType>           StatisticsReader;

  typedef itk::VariableLengthVector<ValueType>                    InputSampleType;
  typedef itk::Statistics::ListSample<InputSampleType>            ListSampleType;
  typedef otb::Statistics::ShiftScaleSampleListFilter<ListSampleType, ListSampleType> ShiftScaleFilterType;

83
  ~VectorClassifier() override
84 85 86 87 88
    {
    MachineLearningModelFactoryType::CleanFactories();
    }

private:
89
  void DoInit() override
90 91
  {
    SetName("VectorClassifier");
92
    SetDescription("Performs a classification of the input vector data according to a model file.");
93 94 95

    SetDocName("Vector Classification");
    SetDocAuthors("OTB-Team");
96 97 98 99 100 101 102 103 104 105 106
    SetDocLongDescription("This application performs a vector data classification "
      "based on a model file produced by the TrainVectorClassifier application."
      "Features of the vector data output will contain the class labels decided by the classifier "
      "(maximal class label = 65535). \n"
      "There are two modes: \n"
        "1) Update mode: add of the 'cfield' field containing the predicted class in the input file. \n"
        "2) Write mode: copies the existing fields of the input file in the output file "
           " and add the 'cfield' field containing the predicted class. \n"
      "If you have declared the output file, the write mode applies. "
      "Otherwise, the input file update mode will be applied.");

107
    SetDocLimitations("Shapefiles are supported. But the SQLite format is only supported in update mode.");
108
    SetDocSeeAlso("TrainVectorClassifier");
109
    AddDocTag(Tags::Learning);
110 111

    AddParameter(ParameterType_InputVectorData, "in", "Name of the input vector data");
112
    SetParameterDescription("in","The input vector data file to classify.");
113 114 115

    AddParameter(ParameterType_InputFilename, "instat", "Statistics file");
    SetParameterDescription("instat", "A XML file containing mean and standard deviation to center"
116
      "and reduce samples before classification, produced by ComputeImagesStatistics application.");
117 118 119
    MandatoryOff("instat");

    AddParameter(ParameterType_InputFilename, "model", "Model file");
120
    SetParameterDescription("model", "Model file produced by TrainVectorClassifier application.");
121

122
    AddParameter(ParameterType_String,"cfield","Field class");
123
    SetParameterDescription("cfield","Field containing the predicted class."
124 125 126
      "Only geometries with this field available will be taken into account.\n"
      "The field is added either in the input file (if 'out' off) or in the output file.\n"
      "Caution, the 'cfield' must not exist in the input file if you are updating the file.");
127
    SetParameterString("cfield","predicted");
128

129 130 131
    AddParameter(ParameterType_ListView, "feat", "Field names to be calculated.");
    SetParameterDescription("feat","List of field names in the input vector data used as features for training. "
      "Put the same field names as the TrainVectorClassifier application.");
132

133
    AddParameter(ParameterType_Bool, "confmap",  "Confidence map");
134 135 136 137 138 139 140 141 142 143
    SetParameterDescription( "confmap", "Confidence map of the produced classification. The confidence index depends on the model: \n\n"
      "* LibSVM: difference between the two highest probabilities (needs a model with probability estimates, so that classes probabilities can be computed for each sample)\n"
      "* Boost: sum of votes\n"
      "* DecisionTree: (not supported)\n"
      "* GradientBoostedTree: (not supported)\n"
      "* KNearestNeighbors: number of neighbors with the same label\n"
      "* NeuralNetwork: difference between the two highest responses\n"
      "* NormalBayes: (not supported)\n"
      "* RandomForest: Confidence (proportion of votes for the majority class). Margin (normalized difference of the votes of the 2 majority classes) is not available for now.\n"
      "* SVM: distance to margin (only works for 2-class models)\n");
144 145 146 147 148 149 150 151 152 153

    AddParameter(ParameterType_OutputFilename, "out", "Output vector data file containing class labels");
    SetParameterDescription("out","Output vector data file storing sample values (OGR format)."
      "If not given, the input vector data file is updated.");
    MandatoryOff("out");

    // Doc example parameter settings
    SetDocExampleParameterValue("in", "vectorData.shp");
    SetDocExampleParameterValue("instat", "meanVar.xml");
    SetDocExampleParameterValue("model", "svmModel.svm");
154
    SetDocExampleParameterValue("out", "vectorDataLabeledVector.shp");
155 156
    SetDocExampleParameterValue("feat", "perimeter  area  width");
    SetDocExampleParameterValue("cfield", "predicted");
157

158
    SetOfficialDocLink();
159 160
  }

161
  void DoUpdateParameters() override
162 163 164 165 166 167 168 169 170 171 172
  {
    if ( HasValue("in") )
    {
      std::string shapefile = GetParameterString("in");

      otb::ogr::DataSource::Pointer ogrDS;

      OGRSpatialReference oSRS("");
      std::vector<std::string> options;

      ogrDS = otb::ogr::DataSource::New(shapefile, otb::ogr::DataSource::Modes::Read);
173 174
      otb::ogr::Layer layer = ogrDS->GetLayer(0);
      OGRFeatureDefn &layerDefn = layer.GetLayerDefn();
175 176 177

      ClearChoices("feat");

178
      for(int iField=0; iField< layerDefn.GetFieldCount(); iField++)
179
      {
180 181 182 183
        std::string item = layerDefn.GetFieldDefn(iField)->GetNameRef();
        std::string key(item);
        key.erase( std::remove_if(key.begin(),key.end(),IsNotAlphaNum), key.end());
        std::transform(key.begin(), key.end(), key.begin(), tolower);
184

185
        OGRFieldType fieldType = layerDefn.GetFieldDefn(iField)->GetType();
186
        if(fieldType == OFTInteger ||  fieldType == OFTInteger64 || fieldType == OFTReal)
187
          {
188
          std::string tmpKey="feat."+key;
189 190 191 192 193 194
          AddChoice(tmpKey,item);
          }
      }
    }
  }

195
  void DoExecute() override
196 197 198
  {
    clock_t tic = clock();

199
    std::string shapefile = GetParameterString("in");
200 201 202 203 204 205 206 207

    otb::ogr::DataSource::Pointer source = otb::ogr::DataSource::New(shapefile, otb::ogr::DataSource::Modes::Read);
    otb::ogr::Layer layer = source->GetLayer(0);

    ListSampleType::Pointer input = ListSampleType::New();

    const int nbFeatures = GetSelectedItems("feat").size();
    input->SetMeasurementVectorSize(nbFeatures);
Julien Michel's avatar
Julien Michel committed
208
  
209 210 211
    otb::ogr::Layer::const_iterator it = layer.cbegin();
    otb::ogr::Layer::const_iterator itEnd = layer.cend();
    for( ; it!=itEnd ; ++it)
212
      {
213 214 215
      MeasurementType mv;
      mv.SetSize(nbFeatures);
      for(int idx=0; idx < nbFeatures; ++idx)
216
        {
Julien Michel's avatar
Julien Michel committed
217 218 219 220
        // Beware that itemIndex differs from ogr layer field index
        unsigned int itemIndex = GetSelectedItems("feat")[idx];
        std::string fieldName = GetChoiceNames( "feat" )[itemIndex];
        
221
        mv[idx] = static_cast<ValueType>((*it)[fieldName].GetValue<double>());
222
        }
223
      input->PushBack(mv);
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
      }

    // Statistics for shift/scale
    MeasurementType meanMeasurementVector;
    MeasurementType stddevMeasurementVector;
    if (HasValue("instat") && IsParameterEnabled("instat"))
      {
      StatisticsReader::Pointer statisticsReader = StatisticsReader::New();
      std::string XMLfile = GetParameterString("instat");
      statisticsReader->SetFileName(XMLfile);
      meanMeasurementVector = statisticsReader->GetStatisticVectorByName("mean");
      stddevMeasurementVector = statisticsReader->GetStatisticVectorByName("stddev");
      }
    else
      {
      meanMeasurementVector.SetSize(nbFeatures);
      meanMeasurementVector.Fill(0.);
      stddevMeasurementVector.SetSize(nbFeatures);
      stddevMeasurementVector.Fill(1.);
      }

    ShiftScaleFilterType::Pointer trainingShiftScaleFilter = ShiftScaleFilterType::New();
    trainingShiftScaleFilter->SetInput(input);
    trainingShiftScaleFilter->SetShifts(meanMeasurementVector);
    trainingShiftScaleFilter->SetScales(stddevMeasurementVector);
    trainingShiftScaleFilter->Update();
    otbAppLogINFO("mean used: " << meanMeasurementVector);
    otbAppLogINFO("standard deviation used: " << stddevMeasurementVector);

    otbAppLogINFO("Loading model");
    m_Model = MachineLearningModelFactoryType::CreateMachineLearningModel(GetParameterString("model"),
                                                MachineLearningModelFactoryType::ReadMode);

    if (m_Model.IsNull())
      {
      otbAppLogFATAL(<< "Error when loading model " << GetParameterString("model") << " : unsupported model type");
      }

    m_Model->Load(GetParameterString("model"));
    otbAppLogINFO("Model loaded");

265
    ListSampleType::Pointer listSample = trainingShiftScaleFilter->GetOutput();
266

267
    ConfidenceListSampleType::Pointer quality;
268

269
    bool computeConfidenceMap(GetParameterInt("confmap") && m_Model->HasConfidenceIndex() 
270 271
                              && !m_Model->GetRegressionMode());

272
    if (!m_Model->HasConfidenceIndex() && GetParameterInt("confmap"))
273 274 275 276
      {
      otbAppLogWARNING("Confidence map requested but the classifier doesn't support it!");
      }

277
    LabelListSampleType::Pointer target;
278 279 280 281 282 283 284 285 286 287 288
    if (computeConfidenceMap)
      {
      quality = ConfidenceListSampleType::New();
      target = m_Model->PredictBatch(listSample, quality);
      }
      else
      {
      target = m_Model->PredictBatch(listSample);
      }

    ogr::DataSource::Pointer output;
289 290
    ogr::DataSource::Pointer buffer = ogr::DataSource::New();
    bool updateMode = false;
291 292
    if (IsParameterEnabled("out") && HasValue("out"))
      {
293
      // Create new OGRDataSource
294
      output = ogr::DataSource::New(GetParameterString("out"), ogr::DataSource::Modes::Overwrite);
295 296 297 298 299 300 301 302 303 304 305
      otb::ogr::Layer newLayer = output->CreateLayer(
        GetParameterString("out"),
        const_cast<OGRSpatialReference*>(layer.GetSpatialRef()),
        layer.GetGeomType());
      // Copy existing fields
      OGRFeatureDefn &inLayerDefn = layer.GetLayerDefn();
      for (int k=0 ; k<inLayerDefn.GetFieldCount() ; k++)
        {
        OGRFieldDefn fieldDefn(inLayerDefn.GetFieldDefn(k));
        newLayer.CreateField(fieldDefn);
        }
306 307 308 309
      }
    else
      {
      // Update mode
310
      updateMode = true;
311
      otbAppLogINFO("Update input vector data.");
312 313 314 315 316 317
      // fill temporary buffer for the transfer
      otb::ogr::Layer inputLayer = layer;
      layer = buffer->CopyLayer(inputLayer, std::string("Buffer"));
      // close input data source
      source->Clear();
      // Re-open input data source in update mode
318 319 320
      output = otb::ogr::DataSource::New(shapefile, otb::ogr::DataSource::Modes::Update_LayerUpdate);
      }

321 322
    otb::ogr::Layer outLayer = output->GetLayer(0);

323 324 325 326 327 328
    OGRErr errStart = outLayer.ogr().StartTransaction();
    if (errStart != OGRERR_NONE)
      {
      itkExceptionMacro(<< "Unable to start transaction for OGR layer " << outLayer.ogr().GetName() << ".");
      }

329 330 331 332
    // Add the field of prediction in the output layer if field not exist
    OGRFeatureDefn &layerDefn = layer.GetLayerDefn();
    int idx = layerDefn.GetFieldIndex(GetParameterString("cfield").c_str());
    if (idx >= 0)
333
      {
334 335
      if (layerDefn.GetFieldDefn(idx)->GetType() != OFTInteger)
        itkExceptionMacro("Field name "<< GetParameterString("cfield") << " already exists with a different type!");
336 337
      }
    else
338 339 340
      {
      OGRFieldDefn predictedField(GetParameterString("cfield").c_str(), OFTInteger);
      ogr::FieldDefn predictedFieldDef(predictedField);
341
      outLayer.CreateField(predictedFieldDef);
342
      }
343 344

    // Add confidence field in the output layer
345
    std::string confFieldName("confidence");
346 347
    if (computeConfidenceMap)
      {
348 349 350 351 352 353
      idx = layerDefn.GetFieldIndex(confFieldName.c_str());
      if (idx >= 0)
        {
        if (layerDefn.GetFieldDefn(idx)->GetType() != OFTReal)
          itkExceptionMacro("Field name "<< confFieldName << " already exists with a different type!");
        }
354
      else
355 356 357 358 359
        {
        OGRFieldDefn confidenceField(confFieldName.c_str(), OFTReal);
        confidenceField.SetWidth(confidenceField.GetWidth());
        confidenceField.SetPrecision(confidenceField.GetPrecision());
        ogr::FieldDefn confFieldDefn(confidenceField);
360
        outLayer.CreateField(confFieldDefn);
361
        }
362 363
      }

364
    // Fill output layer
365 366
    unsigned int count=0;
    std::string classfieldname = GetParameterString("cfield");
367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385
    it = layer.cbegin();
    itEnd = layer.cend();
    for( ; it!=itEnd ; ++it, ++count)
      {
      ogr::Feature dstFeature(outLayer.GetLayerDefn());
      dstFeature.SetFrom( *it , TRUE);
      dstFeature.SetFID(it->GetFID());
      dstFeature[classfieldname].SetValue<int>(target->GetMeasurementVector(count)[0]);
      if (computeConfidenceMap)
        dstFeature[confFieldName].SetValue<double>(quality->GetMeasurementVector(count)[0]);
      if (updateMode)
        {
        outLayer.SetFeature(dstFeature);
        }
      else
        {
        outLayer.CreateFeature(dstFeature);
        }
      }
386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409

    if(outLayer.ogr().TestCapability("Transactions"))
      {
      const OGRErr errCommitX = outLayer.ogr().CommitTransaction();
      if (errCommitX != OGRERR_NONE)
        {
        itkExceptionMacro(<< "Unable to commit transaction for OGR layer " << outLayer.ogr().GetName() << ".");
        }
      }

    output->SyncToDisk();

    clock_t toc = clock();
    otbAppLogINFO( "Elapsed: "<< ((double)(toc - tic) / CLOCKS_PER_SEC)<<" seconds.");

  }

  ModelPointerType m_Model;
};

}
}

OTB_APPLICATION_EXPORT(otb::Wrapper::VectorClassifier)