Commit eadecf33 authored by Yannick TANGUY's avatar Yannick TANGUY

Add a notebook to make a simple land cover classification

parent 3b122953
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"<img style=\"float: left; margin:0px 15px 15px 0px; width:120px\" src=\"https://www.orfeo-toolbox.org/wp-content/uploads/2016/03/logo-orfeo-toolbox.png\">\n",
"\n",
"# OTB Guided Tour - FOSS4G 2019 Bucharest \n",
"## Yannick TANGUY and David YOUSSEFI (CNES, French Space Agency)\n",
"\n",
"<br>\n",
"\n",
"<b> Press <span style=\"color:black;background:yellow\">SHIFT+ENTER</span> to execute the notebook interactively cell by cell </b></div>\n",
"\n",
"## Let's play with OTB classification framework to produce a land cover classification"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"3 images will be used\n"
]
}
],
"source": [
"import otbApplication\n",
"import os\n",
"import utils\n",
"import numpy as np\n",
"\n",
"# Data directory\n",
"DATA_DIR = \"data\"\n",
"\n",
"# Our dataset contains polygons, of 4 different land cover classes : urban, cultures, forest, sea/water\n",
"DB = \"GroundTruth_4_classes.sqlite\"\n",
"\n",
"# Associates a RGB color to each class\n",
"CMAP = \"4_classes_colormap.txt\"\n",
"\n",
"# Output directory\n",
"OUTPUT_DIR = \"output\"\n",
"\n",
"# Input / Output filenames\n",
"images_list = utils.list_images(DATA_DIR,\"xt_SENTINEL2B\",\".tif\")\n",
"training_set = DATA_DIR+\"/\"+DB\n",
"colormap = DATA_DIR+\"/\"+CMAP\n",
"image_stack = OUTPUT_DIR+\"/\"+\"image_stack.tif\"\n",
"stats = OUTPUT_DIR+\"/\"+\"stats.xml\"\n",
"samples = OUTPUT_DIR+\"/\"+\"samples.sqlite\"\n",
"rf_model = OUTPUT_DIR+\"/\"+\"rf_model.txt\"\n",
"classif = OUTPUT_DIR+\"/\"+\"classif_4_classes.tif\"\n",
"confmatrix = OUTPUT_DIR+\"/\"+\"conf_matrix.tif\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### First, make an image stack with all our images\n",
"We will work on a (small..) time serie of Sentinel2 images"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2019-07-30 09:40:22 (INFO) ConcatenateImages: Default RAM limit for OTB is 256 MB\n",
"2019-07-30 09:40:22 (INFO) ConcatenateImages: GDAL maximum cache size is 12847 MB\n",
"2019-07-30 09:40:22 (INFO) ConcatenateImages: OTB will use at most 48 threads\n",
"2019-07-30 09:40:22 (INFO): Estimated memory for full processing: 917.413MB (avail.: 256 MB), optimal image partitioning: 4 blocks\n",
"2019-07-30 09:40:22 (INFO): File output/image_stack.tif will be written in 5 blocks of 2410x417 pixels\n",
"Writing output/image_stack.tif...: 100% [**************************************************] (3s)\n"
]
},
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"app = otbApplication.Registry.CreateApplication(\"ConcatenateImages\")\n",
"app.SetParameterStringList(\"il\",images_list)\n",
"app.SetParameterString(\"out\",image_stack)\n",
"app.ExecuteAndWriteOutput()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Then, we compute stats on our ground-truth polygons\n",
"The aim is to observe how the different classes are represented over the input region..."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2019-07-30 09:40:25 (INFO) PolygonClassStatistics: Default RAM limit for OTB is 256 MB\n",
"2019-07-30 09:40:25 (INFO) PolygonClassStatistics: GDAL maximum cache size is 12847 MB\n",
"2019-07-30 09:40:25 (INFO) PolygonClassStatistics: OTB will use at most 48 threads\n",
"2019-07-30 09:40:25 (INFO) PolygonClassStatistics: Elevation management: setting default height above ellipsoid to 0 meters\n",
"2019-07-30 09:40:25 (INFO): Estimated memory for full processing: 458.478MB (avail.: 256 MB), optimal image partitioning: 2 blocks\n",
"2019-07-30 09:40:25 (INFO): Estimation will be performed in 3 blocks of 2410x694 pixels\n",
"Analyze polygons...: 100% [**************************************************] (8s)\n"
]
},
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"app = otbApplication.Registry.CreateApplication(\"PolygonClassStatistics\")\n",
"app.SetParameterString(\"in\",image_stack)\n",
"app.SetParameterString(\"vec\",training_set)\n",
"# OTB Python API trick : we need to call \"UpdateParameters\" so OTB opens the training set\n",
"# and list the possible field names...\n",
"app.UpdateParameters()\n",
"app.SetParameterString(\"field\",\"code\")\n",
"app.SetParameterString(\"out\",stats)\n",
"app.ExecuteAndWriteOutput()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Display stats file"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<?xml version=\"1.0\" ?>\n",
"\n",
"<GeneralStatistics>\n",
"\n",
" <Statistic name=\"samplesPerClass\">\n",
"\n",
" <StatisticMap key=\"1\" value=\"437540\" />\n",
"\n",
" <StatisticMap key=\"2\" value=\"1650038\" />\n",
"\n",
" <StatisticMap key=\"3\" value=\"147831\" />\n",
"\n",
" <StatisticMap key=\"4\" value=\"468701\" />\n",
"\n",
" </Statistic>\n",
"\n"
]
}
],
"source": [
"stats_file = open(stats,\"r\") \n",
"for cpt in range(8):\n",
" print(stats_file.readline())"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### We observe that the class \"3\" (forest) is less represented\n",
"* we will limit the number of samples per class to this value (or lower)\n",
"* a lot of other strategies exist !!\n",
"\n",
"Let's choose 80 000 samples by class"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2019-07-30 09:40:33 (INFO) SampleSelection: Default RAM limit for OTB is 256 MB\n",
"2019-07-30 09:40:33 (INFO) SampleSelection: GDAL maximum cache size is 12847 MB\n",
"2019-07-30 09:40:33 (INFO) SampleSelection: OTB will use at most 48 threads\n",
"2019-07-30 09:40:33 (INFO) SampleSelection: Elevation management: setting default height above ellipsoid to 0 meters\n",
"2019-07-30 09:40:33 (INFO) SampleSelection: Sampling strategy : set a constant number of samples for all classes\n",
"2019-07-30 09:40:33 (INFO) SampleSelection: Sampling rates : className requiredSamples totalSamples rate\n",
"1\t80000\t437540\t0.18284\n",
"2\t80000\t1650038\t0.0484837\n",
"3\t80000\t147831\t0.541158\n",
"4\t80000\t468701\t0.170685\n",
"\n",
"2019-07-30 09:40:33 (INFO): Estimated memory for full processing: 458.478MB (avail.: 256 MB), optimal image partitioning: 2 blocks\n",
"2019-07-30 09:40:33 (INFO): Estimation will be performed in 4 blocks of 1584x1584 pixels\n",
"Selecting positions with periodic sampler...: 100% [**************************************************] (15s)\n"
]
},
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"app = otbApplication.Registry.CreateApplication(\"SampleSelection\")\n",
"app.SetParameterString(\"in\",image_stack)\n",
"app.SetParameterString(\"vec\",training_set)\n",
"# OTB Python API trick : we need to call \"UpdateParameters\" so OTB opens the training set\n",
"# and list the possible field names...\n",
"app.UpdateParameters()\n",
"app.SetParameterString(\"field\",\"code\")\n",
"app.SetParameterString(\"instats\",stats)\n",
"app.SetParameterString(\"strategy\",\"constant\")\n",
"app.SetParameterInt(\"strategy.constant.nb\",80000)\n",
"app.SetParameterString(\"out\",samples)\n",
"app.ExecuteAndWriteOutput()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Now we have to extract values for each sample, from our image stack\n",
"\n",
"Each sample (a pixel of 10m by 10m) will be updated with the time serie values (Blue band for image 1, Green band for image 1, ..., Near Infrared band for image 3)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"2019-07-30 09:40:49 (INFO) SampleExtraction: Default RAM limit for OTB is 256 MB\n",
"2019-07-30 09:40:49 (INFO) SampleExtraction: GDAL maximum cache size is 12847 MB\n",
"2019-07-30 09:40:49 (INFO) SampleExtraction: OTB will use at most 48 threads\n",
"2019-07-30 09:40:49 (INFO): Estimated memory for full processing: 687.946MB (avail.: 256 MB), optimal image partitioning: 3 blocks\n",
"2019-07-30 09:40:49 (INFO): Estimation will be performed in 4 blocks of 2410x521 pixels\n",
"Extracting sample values...: 100% [**************************************************] (25s)\n"
]
},
{
"data": {
"text/plain": [
"0"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"app = otbApplication.Registry.CreateApplication(\"SampleExtraction\")\n",
"app.SetParameterString(\"in\",image_stack)\n",
"app.SetParameterString(\"vec\",samples)\n",
"# OTB Python API trick : we need to call \"UpdateParameters\" so OTB opens the training set\n",
"# and list the possible field names...\n",
"app.UpdateParameters()\n",
"app.SetParameterString(\"field\",\"code\")\n",
"app.SetParameterString(\"outfield\",\"prefix\")\n",
"app.SetParameterString(\"outfield.prefix.name\",\"band_\")\n",
"app.ExecuteAndWriteOutput()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### We can now train our classifier !\n",
"* Let's choose a Random Forest classifier\n",
"* it will learn from our sample set \n",
"* it needs to know :\n",
" * the name of the field representing the classes\n",
" * the names of the parameters to learn from"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# workaround because the UpdateParameters() did not seem to work fine for this app !\n",
"import subprocess\n",
"args = ['otbcli_TrainVectorClassifier', '-io.vd', samples, \n",
" '-cfield', 'code', '-io.out', rf_model, '-classifier', 'rf', \n",
" '-classifier.rf.cat', '4', '-feat', 'band_0', 'band_1', 'band_2', \n",
" 'band_3', 'band_4', 'band_5', 'band_6', 'band_7', 'band_8', \n",
" 'band_9', 'band_10', 'band_11','-io.confmatout',confmatrix]\n",
"subprocess.call(args)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### The confusion matrix give us some stats on the learning step"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"stats_learn = open(confmatrix,\"r\") \n",
"#for line in stats_learn:\n",
"# print(line)\n",
" \n",
"tab_val = []\n",
"for i in range(6):\n",
" line = stats_learn.readline()\n",
" if (i>1):\n",
" datas = np.array(line.split(',')).astype(int)\n",
" print(\"Recall class\",i-2,\" \\t:\",datas[i-2]/datas.sum())\n",
" tab_val.append(datas)\n",
"print(\"---------------------------------------------\")\n",
"for i in range(4):\n",
" precision = tab_val[i][i]/(tab_val[0][i]+tab_val[1][i]+tab_val[2][i]+tab_val[3][i])\n",
" print(\"Precision class\",i, \" \\t:\",precision)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"app = otbApplication.Registry.CreateApplication(\"ImageClassifier\")\n",
"app.SetParameterString(\"in\",image_stack)\n",
"app.SetParameterString(\"model\",rf_model)\n",
"app.SetParameterString(\"out\",classif)\n",
"app.Execute()\n",
"\n",
"app2 = otbApplication.Registry.CreateApplication(\"ColorMapping\")\n",
"app2.SetParameterInputImage(\"in\",app.GetParameterOutputImage(\"out\"))\n",
"app2.SetParameterString(\"method\",\"custom\")\n",
"app2.SetParameterString(\"method.custom.lut\",colormap)\n",
"app2.SetParameterString(\"out\",classif)\n",
"app2.ExecuteAndWriteOutput()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import rasterio\n",
"import display_api\n",
"\n",
"raster = rasterio.open(classif)\n",
"\n",
"m, dc = display_api.rasters_on_map([raster, rasterio.open(images_list[0])], OUTPUT_DIR, [\"Classification 4 classes\",\"Image S2B\"])\n",
"m"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
......@@ -18,3 +18,19 @@ def list_images(data_dir, suffix):
print(len(list_im)," images will be used")
return list_im
def list_images(data_dir, prefix, suffix):
"""
returns a list of file names, contained in a directory, with a name that matches a suffix
:param data_dir: directory to browse for files
:param prefix: prefix of the files to search for
:param suffix: suffix of the files to search for
"""
list_im = []
for root, dirs, files in os.walk(data_dir):
for name in files:
if name.endswith(suffix) and name.startswith(prefix):
list_im.append(str(root)+"/"+str(name))
print(len(list_im)," images will be used")
return list_im
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment