Get data downloaded and in correct working directory.

rm(list= ls())
if(!require(raster)) {
  install.packages("raster")
}
## Loading required package: raster
## Loading required package: sp
library(raster)

if(!require(sp)) {
  install.packages("sp")
}
library(sp)


if (dir.exists("data") == FALSE) {dir.create("data")}

download.file(url="https://github.com/GeoScripting-WUR/AdvancedRasterAnalysis/raw/gh-pages/data/GewataBands.zip", destfile = "data/gewata.zip")

unzip("./data/gewata.zip", exdir = './data')


load("data/GewataB1.rda")
load("data/GewataB2.rda")
load("data/GewataB3.rda")
load("data/GewataB4.rda")
load("data/GewataB5.rda")
load("data/GewataB7.rda")
load("data/vcfGewata.rda")

Produce one or more plots that demonstrate the relationship between the Landsat bands and the VCF tree cover. also eliminating values higher than 100 (Measuring Errors). VCF is a percentage, higher than 100% is not possible.

vcfGewata[vcfGewata > 100] <- NA
opar <- par(mfrow=c(1, 3))
plot(vcfGewata, GewataB1)
## Warning in .local(x, y, ...): plot used a sample of 5.5% of the cells. You
## can use "maxpixels" to increase the sample)
plot(vcfGewata, GewataB2)
## Warning in .local(x, y, ...): plot used a sample of 5.5% of the cells. You
## can use "maxpixels" to increase the sample)
plot(vcfGewata, GewataB3)
## Warning in .local(x, y, ...): plot used a sample of 5.5% of the cells. You
## can use "maxpixels" to increase the sample)

plot(vcfGewata, GewataB4)
## Warning in .local(x, y, ...): plot used a sample of 5.5% of the cells. You
## can use "maxpixels" to increase the sample)
plot(vcfGewata, GewataB5)
## Warning in .local(x, y, ...): plot used a sample of 5.5% of the cells. You
## can use "maxpixels" to increase the sample)
plot(vcfGewata, GewataB7)
## Warning in .local(x, y, ...): plot used a sample of 5.5% of the cells. You
## can use "maxpixels" to increase the sample)

A linear relationship can be seen between the vcf and all bands except Band 4 (NIR). Now we can build the linear model (lm). Band 7 is not used in the linear model because the summary showed it was not significant.

alldata <- brick(GewataB1, GewataB2, GewataB3, GewataB4, GewataB5, GewataB7, vcfGewata)
names(alldata) <- c("band1", "band2", "band3", "band4", "band5", "band7", "VCF")
df <- as.data.frame(getValues(alldata))
linearmodel <- lm(VCF ~ band1 + band2 + band3 + band4 + band5, data=df)
summary(linearmodel)
## 
## Call:
## lm(formula = VCF ~ band1 + band2 + band3 + band4 + band5, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -58.204  -4.636   0.715   5.211 258.586 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  8.550e+01  5.601e-02  1526.5   <2e-16 ***
## band1        9.291e-02  1.876e-04   495.3   <2e-16 ***
## band2       -1.505e-01  2.314e-04  -650.5   <2e-16 ***
## band3       -2.504e-03  1.473e-04   -17.0   <2e-16 ***
## band4        1.661e-02  2.443e-05   679.9   <2e-16 ***
## band5       -2.004e-02  4.158e-05  -482.0   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.606 on 1808278 degrees of freedom
##   (13712 observations deleted due to missingness)
## Multiple R-squared:  0.8582, Adjusted R-squared:  0.8582 
## F-statistic: 2.189e+06 on 5 and 1808278 DF,  p-value: < 2.2e-16

squared root of the linear model.

summary(linearmodel)$r.squared
## [1] 0.8582281

Predict VCF values and visualize the result. Values of vcf prediction smaller than 0 are eliminated

lmMap <- predict(alldata, model=linearmodel)
lmMap[lmMap<0] <- NA
opar <- par(mfrow=c(1, 2))
plot(lmMap, main= 'Predicted VCF')
plot(vcfGewata, main= 'Real VCF')

computing the RMSE between our predicted and the actual tree cover values

names(lmMap) <- 'PredictedVCF'
dfmodel <- as.data.frame(lmMap)
RMSE_Predicted<-sqrt( mean( (dfmodel$PredictedVCF-df$VCF)^2 , na.rm = TRUE ) )
RMSE_Predicted
## [1] 8.35677

Are the differences between the predicted and actual tree cover the same for all of the 3 classes we used for the Random Forest classfication?

load("data/trainingPoly.rda")
trainingPoly@data$Code <- as.numeric(trainingPoly@data$Class)
trainingPoly@data
##    OBJECTID    Class Code
## 0         1  wetland    3
## 1         2  wetland    3
## 2         3  wetland    3
## 3         4  wetland    3
## 4         5  wetland    3
## 5         6   forest    2
## 6         7   forest    2
## 7         8   forest    2
## 8         9   forest    2
## 9        10   forest    2
## 10       11 cropland    1
## 11       12 cropland    1
## 12       13 cropland    1
## 13       14 cropland    1
## 14       15 cropland    1
## 15       16 cropland    1
Classes<-rasterize(trainingPoly,vcfGewata, field='Code')


mClasses_real<-zonal(vcfGewata, Classes, fun='mean')
mClasses_pred<-zonal(lmMap, Classes, fun='mean')

RMSEforest<- sqrt(mean( (mClasses_pred[1,2] - mClasses_real[1,2])^2 ))

RMSEwetland <-sqrt(mean( (mClasses_pred[2,2] - mClasses_real[2,2])^2) )

RMSEcrop <- sqrt(mean( (mClasses_pred[3,2] - mClasses_real[3,2])^2 ))

print(paste("RMSE of forest is",round(RMSEforest, digits = 2),", RMSE of wetland is",round(RMSEwetland, digits = 2),", and RMSE of croplands is",round(RMSEcrop, digits = 2)))
## [1] "RMSE of forest is 2.51 , RMSE of wetland is 2.32 , and RMSE of croplands is 6.91"