Analisis y clasificación de imágenes satelitales

Maestría en Ecohidrología
Universidad de Cuenca
http://www.ucuenca.edu.ec/maestria-ecohidrologia/

Daniela Ballari (PhD)
daniela.ballari@ucuenca.edu.ec
http://www.researchgate.net/profile/Daniela_Ballari/publications

Curso completo en: http://rpubs.com/daniballari/analisis_espacial_indice

Objetivo de la práctica. Utilizar imágenes satelitales de Landsat 8 para comprender los conceptos de combinación de bandas, cálculo de índices de vegetación como es NDVI y clasificación supervisada y no supervisada. La sección de clasificación está basada en https://geoscripting-wur.github.io/AdvancedRasterAnalysis/#classifying-raster-data.

Una presentación teórica sobre teledetección y clasificación puede accederse aqui

Temario

A- Cargar librerias y datos
B- Combinación de bandas
C- NDVI - Indice de vegetación normalizada
D- Clasificación no supervisada k-means
E- Clasificación supervisada RandomForest

A- Cargar librerias y datos

library(raster)

## Loading required package: sp

library(rgdal)

## rgdal: version: 1.0-7, (SVN revision 559)
##  Geospatial Data Abstraction Library extensions to R successfully loaded
##  Loaded GDAL runtime: GDAL 1.11.2, released 2015/02/10
##  Path to GDAL shared files: C:/Users/dani/Documents/R/win-library/3.2/rgdal/gdal
##  GDAL does not use iconv for recoding strings.
##  Loaded PROJ.4 runtime: Rel. 4.9.1, 04 March 2015, [PJ_VERSION: 491]
##  Path to PROJ.4 shared files: C:/Users/dani/Documents/R/win-library/3.2/rgdal/proj
##  Linking to sp version: 1.2-0

library(randomForest)

## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.

library(RColorBrewer)

setwd("C:/R_ecohidrologia/clasificacion-imagenes")

files <- list.files(pattern='.tif') #extraer listado de archivos
files

## [1] "b2.tif" "b3.tif" "b4.tif" "b5.tif" "b6.tif" "b7.tif"

imageraster <- raster(files[1])#cargar imagen
spplot(imageraster)

names(imageraster)

## [1] "b2"

rbrick <- imageraster#asignar a la variable rbrick

for (i in 2:length(files)) #leer el resto de imagenes
{ imageraster <- raster(files[i])
  rbrick <- addLayer(rbrick,imageraster)
}
rbrick

## class       : RasterStack 
## dimensions  : 413, 682, 281666, 6  (nrow, ncol, ncell, nlayers)
## resolution  : 29.98118, 29.96721  (x, y)
## extent      : 714390.8, 734838, -324721, -312344.6  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=utm +zone=17 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0 
## names       :    b2,    b3,    b4,    b5,    b6,    b7 
## min values  :     0,     0,     0,     0,     0,     0 
## max values  : 65535, 65535, 65535, 65535, 65535, 65535

names(rbrick)

## [1] "b2" "b3" "b4" "b5" "b6" "b7"

summary(rbrick)

## Warning in .local(object, ...): summary is an estimate based on a sample of 1e+05 cells (35.5% of all cells)

##            b2    b3    b4    b5       b6    b7
## Min.        0     0     0     0     0.00     0
## 1st Qu.     0     0     0     0     0.00     0
## Median      0     0     0     0     0.00     0
## 3rd Qu.  9049  8974  8478 14144 12705.25  9041
## Max.    37243 37478 57077 59888 63155.00 37243
## NA's        0     0     0     0     0.00     0

rbrick <- projectRaster(rbrick,crs="+init=epsg:32717")
rbrick

## class       : RasterBrick 
## dimensions  : 423, 692, 292716, 6  (nrow, ncol, ncell, nlayers)
## resolution  : 30, 30  (x, y)
## extent      : 714240.8, 735000.8, 9675115, 9687805  (xmin, xmax, ymin, ymax)
## coord. ref. : +init=epsg:32717 +proj=utm +zone=17 +south +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0 
## data source : in memory
## names       :        b2,        b3,        b4,        b5,        b6,        b7 
## min values  : -4121.761, -4108.614, -3987.156, -6075.951, -4911.979, -4080.662 
## max values  :  44964.99,  46990.53,  43041.27,  52095.74,  58746.49,  44964.99

#reemplazar valores 0 por NA
rbrick[rbrick[] == 0 ] <- NA

spplot(rbrick)

#Guardar archivo multibanda
 writeRaster(rbrick, filename="rbrick.tif", format="GTiff", overwrite=TRUE)

B- Combinación de bandas

# color natural
plotRGB(rbrick, r=3, g=2, b=1,  axes=TRUE, colNA="white", stretch='lin')

# falso color vegetacion
plotRGB(rbrick, r=4, g=3, b=2,  axes=TRUE, colNA="white", stretch='hist')

# falso color urbano
plotRGB(rbrick, r=6, g=5, b=3,  axes=TRUE, colNA="white", stretch='hist')

# agricultura
plotRGB(rbrick, r=5, g=4, b=1,  axes=TRUE, colNA="white", stretch='hist')

#Guardar archivo multibanda
 writeRaster(rbrick[[c(3,2,1)]], filename="321.tif", format="GTiff", overwrite=TRUE)

 writeRaster(rbrick[[c(4,3,2)]], filename="432.tif", format="GTiff", overwrite=TRUE)

 writeRaster(rbrick[[c(6,5,3)]], filename="653.tif", format="GTiff", overwrite=TRUE)

 writeRaster(rbrick[[c(1,2,3)]], filename="123.tif", format="GTiff", overwrite=TRUE)

C- NDVI - Indice de vegetación normalizada

# ndvi <- (NIR - R)/(NIR + R)
ndvi <- (rbrick[[4]] - rbrick[[3]])/(rbrick[[4]] + rbrick[[3]])
summary(ndvi)

##                 layer
## Min.    -1.143248e-01
## 1st Qu.  1.295918e-01
## Median   2.112367e-01
## 3rd Qu.  2.888631e-01
## Max.     6.234485e-01
## NA's     2.062220e+05

cuts <-seq(minValue(ndvi),maxValue(ndvi),length.out=8)
cuts = round(cuts,digits=2)
col.regions = brewer.pal(length(cuts)+2, "RdYlGn")
spplot(as(ndvi, 'SpatialGridDataFrame'),at=cuts,col.regions=col.regions,colorkey=list(labels=list(at=cuts),at=cuts),pretty=TRUE,scales=list(draw=T))

#NDVI por partes
ndvi<- function(nir,red, filename) {
  out <- raster(nir)
  out <- writeStart(out, filename, overwrite=TRUE)
  bs <- blockSize(nir)
  for (i in 1:bs$n) { 
    vnir <- getValues(nir, row=bs$row[i], nrows=bs$nrows[i] )
    vred <- getValues(red, row=bs$row[i], nrows=bs$nrows[i] )
    ndvi <- (vnir - vred)/(vnir + vred)
    out <- writeValues(out, ndvi, bs$row[i])
  }
  out <- writeStop(out)
  return(out)
}

ndvi(rbrick[[4]],rbrick[[3]], "ndvi.tif")

## class       : RasterLayer 
## dimensions  : 423, 692, 292716  (nrow, ncol, ncell)
## resolution  : 30, 30  (x, y)
## extent      : 714240.8, 735000.8, 9675115, 9687805  (xmin, xmax, ymin, ymax)
## coord. ref. : +proj=utm +zone=17 +south +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0 
## data source : C:\R_ecohidrologia\clasificacion-imagenes\ndvi.tif 
## names       : ndvi 
## values      : -0.1143248, 0.6234485  (min, max)

ndvi2<- raster("ndvi.tif") 

cuts <-seq(minValue(ndvi2),maxValue(ndvi2),length.out=8)
cuts = round(cuts,digits=2)
col.regions = brewer.pal(length(cuts)+2, "RdYlGn")
spplot(as(ndvi2, 'SpatialGridDataFrame'),at=cuts,col.regions=col.regions,colorkey=list(labels=list(at=cuts),at=cuts), pretty=TRUE,scales=list(draw=T))

D- Clasificación no supervisada

# Método:k-means - 3 clases
tablavalores <- getValues(rbrick)
km <-kmeans(na.omit(tablavalores), 3)
names(km)

## [1] "cluster"      "centers"      "totss"        "withinss"    
## [5] "tot.withinss" "betweenss"    "size"         "iter"        
## [9] "ifault"

rNA <- setValues(raster(rbrick), 0)#crear un raster en blanco con valores por defecto 0
for(i in 1:nlayers(rbrick)){ rNA[is.na(rbrick[[i]])] <- 1}# asignar valor de 1 a valores NA del raster rbrick
rNA <- getValues(rNA)#convertir rNA en un vector de valores
tablavalores <- as.data.frame(tablavalores)#convertir tablavalores en un data frame
tablavalores$clase[rNA==0] <- km$cluster #Si rNA es 0, asignar el valor del cluster k-mean 
tablavalores$clase[rNA==1] <- NA#Si rNA es 1, asignar NA
clases <- raster(rbrick)
clases <- setValues(clases, tablavalores$clase)#asignar los valores del raster clases a partir de  tablavalores$clase
plot(clases, legend=FALSE, col=c("pink", "blue", "green"))

writeRaster(clases, filename="kmeans_clase3.tif", format="GTiff", overwrite=TRUE)

# Método:k-means - 5 clases
tablavalores <- getValues(rbrick)
km <-kmeans(na.omit(tablavalores), 5)
rNA <- setValues(raster(rbrick), 0)#crear un raster en blanco con valores por defecto 0
for(i in 1:nlayers(rbrick)){ rNA[is.na(rbrick[[i]])] <- 1}# asgnar valor de 1 a valores NA del raster rbrick
rNA <- getValues(rNA)#convertir rNA en un vector de valores
tablavalores <- as.data.frame(tablavalores)#convertir tablavalores en un data frame
tablavalores$clase[rNA==0] <- km$cluster #Si rNA es 0, asignar el valor del cluster k-mean 
tablavalores$clase[rNA==1] <- NA#Si rNA es 1, asignar NA
clases <- raster(rbrick)
clases <- setValues(clases, tablavalores$clase)#asignar los valores del raster clases a partir de  tablavalores$clase
plot(clases, legend=FALSE, col=c("pink", "blue", "green", "red", "yellow"))

writeRaster(clases, filename="kmeans_clase5.tif", format="GTiff", overwrite=TRUE)

E- Clasificación supervisada

#Método: Random Forest 
#La clasificación aqui realizada es solo a efectos demostrativos. Para mejorar la clasificación deben incluirse mas poligonos que representen adecuadamente las clases seleccionadas.
#Leer archivo poligonos de entremanento
entrenamiento<-readOGR(".","entrenamiento_32717")#La clasificación aqui realizada es solo a efectos demostrativos. Para mejorar la clasificación deben incluirse mas poligonos que representen adecuadamente las clases seleccionadas.

## OGR data source with driver: ESRI Shapefile 
## Source: ".", layer: "entrenamiento_32717"
## with 19 features
## It has 4 fields

head(entrenamiento@data)

##   MC_ID    MC_info C_ID     C_info
## 0     1       agua    1       agua
## 1     1       agua    2       agua
## 2     2 vegetacion    3 vegetacion
## 3     2 vegetacion    4 vegetacion
## 4     2 vegetacion    5 vegetacion
## 5     2 vegetacion    6 vegetacion

entrenamiento@data$MC_ID

##  [1] 1 1 2 2 2 2 2 2 2 2 3 3 3 3 3 1 4 4 3

#rasterizar poligonos
clase <- rasterize(entrenamiento, rbrick, 'MC_ID')
spplot(clase, scales=list(draw=T))

#aplicar mascara sobre rbrick
mascara <- mask(rbrick, clase)

entrenamiento_r <- addLayer(mascara, clase)
plot(entrenamiento_r)

names(entrenamiento_r)<- c("b2", "b3", "b4", "b5", "b6","b7","clase")

tablavalores <- getValues(entrenamiento_r)
tablavalores <- na.omit(tablavalores)
tablavalores <- as.data.frame(tablavalores)
tablavalores$clase <- factor(tablavalores$clase, levels = c(1:4))

modelRF <- randomForest(x=tablavalores[ ,c(1:6)], y=tablavalores$clase,ntree=500, importance = TRUE, do.trace=FALSE)
colnames(modelRF$confusion) <- c("agua", "vegetacion", "urbano", "suelo desnudo","error de clase")
rownames(modelRF$confusion) <- c("agua", "vegetacion", "urbano", "suelo desnudo")

modelRF#para mejorar la clasificación deben incluirse mas poligonos que representen adecuadamente las clases seleccionadas.

## 
## Call:
##  randomForest(x = tablavalores[, c(1:6)], y = tablavalores$clase,      ntree = 500, importance = TRUE, do.trace = FALSE) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 1.47%
## Confusion matrix:
##               agua vegetacion urbano suelo desnudo error de clase
## agua           131          0      3             0    0.022388060
## vegetacion       0        278      0             1    0.003584229
## urbano           1          0    610             5    0.009740260
## suelo desnudo    0          0      6            52    0.103448276

varImpPlot(modelRF)

predLC <- predict(rbrick, model=modelRF, filename="predicccion.tif", na.rm=TRUE, overwrite=TRUE)
save(modelRF, file="prediction_modelRF.RData")

spplot(as(predLC, "SpatialGridDataFrame"), cuts=4, at=0:4, col.regions=c("blue", "green", "red", "grey"))

Has llegado al final del material!