R Notebook

library(raster)

## Loading required package: sp

nlcd <- brick("c://Users//david//Desktop//rsdata//rs//nlcd-L1.tif")
names(nlcd) <- c("nlcd2001", "nlcd2011")
nlcd

## class      : RasterBrick 
## dimensions : 1230, 1877, 2308710, 2  (nrow, ncol, ncell, nlayers)
## resolution : 0.0002694946, 0.0002694946  (x, y)
## extent     : -121.9258, -121.42, 37.85402, 38.1855  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0 
## source     : c:/Users/david/Desktop/rsdata/rs/nlcd-L1.tif 
## names      : nlcd2001, nlcd2011 
## min values :        1,        1 
## max values :        9,        9

# The class names and colors for plotting
nlcdclass <- c("Water", "Developed", "Barren", "Forest", "Shrubland", "Herbaceous", "Planted/Cultivated", "Wetlands")
classdf <- data.frame(classvalue1 = c(1,2,3,4,5,7,8,9), classnames1 = nlcdclass)
# Hex codes of colors
classcolor <- c("#5475A8", "#B50000", "#D2CDC0", "#38814E", "#AF963C", "#D1D182", "#FBF65D", "#C8E6F8")
# Now we ratify (RAT = "Raster Attribute Table") the ncld2011 (define RasterLayer as a categorical variable). This is helpful for plotting.
nlcd2011 <- nlcd[[2]]
nlcd2011 <- ratify(nlcd2011)
rat <- levels(nlcd2011)[[1]]
#
rat$landcover <- nlcdclass
levels(nlcd2011) <- rat

set.seed(99)
samp2011 <- sampleStratified(nlcd2011, size = 200, na.rm = TRUE, sp = TRUE)
samp2011

## class       : SpatialPointsDataFrame 
## features    : 1600 
## extent      : -121.9257, -121.4207, 37.85415, 38.18536  (xmin, xmax, ymin, ymax)
## crs         : +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0 
## variables   : 2
## names       :    cell, nlcd2011 
## min values  :     829,        1 
## max values  : 2308569,        9

table(samp2011$nlcd2011)

## 
##   1   2   3   4   5   7   8   9 
## 200 200 200 200 200 200 200 200

library(rasterVis)

## Loading required package: lattice

## Loading required package: latticeExtra

## Loading required package: RColorBrewer

plt <- levelplot(nlcd2011, col.regions = classcolor, main = 'Distribution of Training Sites')
print(plt + layer(sp.points(samp2011, pch = 3, cex = 0.5, col = 1)))

landsat5 <- stack("c://Users//david//Desktop//rsdata//rs//centralvalley-2011LT5.tif")
names(landsat5) <- c('blue', 'green', 'red', 'NIR', 'SWIR1', 'SWIR2')

sampvals <- extract(landsat5, samp2011, df = TRUE)
sampvals <- sampvals[, -1]
sampdata <- data.frame(classvalue = samp2011@data$nlcd2011, sampvals)

library(rpart)
cart <- rpart(as.factor(classvalue)~., data=sampdata, method = 'class', minsplit = 5)
plot(cart, uniform=TRUE, main="Classification Tree")
text(cart, cex = 0.8)

pr2011 <- predict(landsat5, cart, type='class')
pr2011

## class      : RasterLayer 
## dimensions : 1230, 1877, 2308710  (nrow, ncol, ncell)
## resolution : 0.0002694946, 0.0002694946  (x, y)
## extent     : -121.9258, -121.42, 37.85402, 38.1855  (xmin, xmax, ymin, ymax)
## crs        : +proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0 
## source     : memory
## names      : layer 
## values     : 1, 9  (min, max)
## attributes :
##        ID value
##  from:  1     1
##   to :  8     9

pr2011 <- ratify(pr2011)
rat <- levels(pr2011)[[1]]
rat$legend <- classdf$classnames
levels(pr2011) <- rat
levelplot(pr2011, maxpixels = 1e6,
          col.regions = classcolor,
          scales=list(draw=FALSE),
          main = "Decision Tree classification of Landsat 5")

library(dismo)
set.seed(99)
j <- kfold(sampdata, k = 5, by=sampdata$classvalue)
table(j)

## j
##   1   2   3   4   5 
## 320 320 320 320 320

x <- list()
for (k in 1:5) {
    train <- sampdata[j!= k, ]
    test <- sampdata[j == k, ]
    cart <- rpart(as.factor(classvalue)~., data=train, method = 'class', minsplit = 5)
    pclass <- predict(cart, test, type='class')
    # create a data.frame using the reference and prediction
    x[[k]] <- cbind(test$classvalue, as.integer(pclass))
}

y <- do.call(rbind, x)
y <- data.frame(y)
colnames(y) <- c('observed', 'predicted')
conmat <- table(y)
# change the name of the classes
colnames(conmat) <- classdf$classnames
rownames(conmat) <- classdf$classnames
conmat

##                     predicted
## observed             Water Developed Barren Forest Shrubland Herbaceous
##   Water                172         6      1      3         3          1
##   Developed              8        93     22     10         8         37
##   Barren                 6        54     52      6         8         62
##   Forest                 0         3      2    102        49          8
##   Shrubland              0         8      1     52       105         12
##   Herbaceous             1        16     24      4        16        121
##   Planted/Cultivated     2        20      5     24        37         24
##   Wetlands              14        12      1     32        25         16
##                     predicted
## observed             Planted/Cultivated Wetlands
##   Water                               1       13
##   Developed                          20        2
##   Barren                              5        7
##   Forest                             13       23
##   Shrubland                          17        5
##   Herbaceous                         17        1
##   Planted/Cultivated                 81        7
##   Wetlands                           32       68

n <- sum(conmat)
diag <- diag(conmat)
OA <- sum(diag) / n
OA

## [1] 0.49625

# observed (true) cases per class
rowsums <- apply(conmat, 1, sum)
p <- rowsums / n
# predicted cases per class
colsums <- apply(conmat, 2, sum)
q <- colsums / n
expAccuracy <- sum(p*q)
kappa <- (OA - expAccuracy) / (1 - expAccuracy)
kappa

## [1] 0.4242857

## [1] 0.4257143

# Producer accuracy
PA <- diag / colsums
# User accuracy
UA <- diag / rowsums
outAcc <- data.frame(producerAccuracy = PA, userAccuracy = UA)
outAcc