Import libraries and download the data

library(raster)
library(rasterVis)

ifolder <- './data/'
ofolder <- './output/'
dir.create(ifolder, showWarnings = FALSE)
dir.create(ofolder, showWarnings = FALSE)
dataURL <- "https://github.com/GeoScripting-WUR/AdvancedRasterAnalysis/archive/gh-pages.zip"
inputZip <- list.files(path=ifolder, pattern= '^.*\\.zip$')

if (length(inputZip) == 0){ ##only download when not alrady downloaded
  download.file(url = dataURL, destfile = 'data/landsatData.zip', method = 'wget')
} 

unzip('data/landsatData.zip', exdir=ifolder)

Load the data

load("data/AdvancedRasterAnalysis-gh-pages/data/GewataB2.rda")
load("data/AdvancedRasterAnalysis-gh-pages/data/GewataB3.rda")
load("data/AdvancedRasterAnalysis-gh-pages/data/GewataB4.rda")
load("data/AdvancedRasterAnalysis-gh-pages/data/GewataB1.rda")
load("data/AdvancedRasterAnalysis-gh-pages/data/GewataB5.rda")
load("data/AdvancedRasterAnalysis-gh-pages/data/GewataB7.rda")
load("data/AdvancedRasterAnalysis-gh-pages/data/vcfGewata.rda")
load("data/AdvancedRasterAnalysis-gh-pages/data/trainingPoly.rda")

Brick the layers

gewata <- brick(GewataB1, GewataB2, GewataB3, GewataB4, GewataB5, GewataB7)

Remove all values above 100 for

vcfGewata[vcfGewata > 100] <- NA

Add VCF to brick

covs <- addLayer(gewata, vcfGewata)

Rename the columns

names(covs) <- c('band1', 'band2', 'band3', 'band4', 'band5', 'band7', 'vcf')

Pair the bands to the vcf

pairs(covs)

Make covs a data frame and omit the NA’s

valuetable <- getValues(covs) 
valuetable <- na.omit(valuetable)
valuetable <- as.data.frame(valuetable)

Make the linear model and show the summary

regression <- lm(vcf ~ band1 + band2 + band3 + band4 + band5 + band7, data = valuetable)
summary(regression)
## 
## Call:
## lm(formula = vcf ~ band1 + band2 + band3 + band4 + band5 + band7, 
##     data = valuetable)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -58.205  -4.636   0.715   5.211 258.436 
## 
## Coefficients:
##               Estimate Std. Error  t value Pr(>|t|)    
## (Intercept)  8.549e+01  5.723e-02 1493.805   <2e-16 ***
## band1        9.291e-02  1.889e-04  491.817   <2e-16 ***
## band2       -1.505e-01  2.353e-04 -639.622   <2e-16 ***
## band3       -2.528e-03  1.698e-04  -14.889   <2e-16 ***
## band4        1.661e-02  2.790e-05  595.378   <2e-16 ***
## band5       -2.006e-02  7.036e-05 -285.119   <2e-16 ***
## band7        2.549e-05  8.969e-05    0.284    0.776    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 8.606 on 1808277 degrees of freedom
## Multiple R-squared:  0.8582, Adjusted R-squared:  0.8582 
## F-statistic: 1.824e+06 on 6 and 1808277 DF,  p-value: < 2.2e-16

Predict the VCF

predVCF <- predict(covs, model=regression, na.rm=TRUE)

Remove the negative values

predVCF[predVCF<0] <- NA

Plot the results

mycolorkey <- list(at=seq(0, 100, 5), labels=list(labels=seq(0, 100, 10), at=seq(0, 100, 10)), space = 'bottom')
levelplot(stack(vcfGewata, predVCF), col.regions = colorRampPalette(c('brown','yellow','darkgreen'))(255), colorkey = mycolorkey,
          names.attr = c('Original VCF','Predicted VCF'), main = list('Tree cover in Gewata',cex=1.8,vjust=-1),scales=list(draw=FALSE),xlab=list('Tree cover (%)',vjust=7))

Calculate the difference between the tree cover and the predicted tree cover

difference <- (predVCF - vcfGewata)

Calculate the root mean square error

RMSE <- sqrt((cellStats((difference)^2, stat = mean, na.rm = TRUE)))
names(RMSE) <- 'RMSE Total Raster'

Add column ‘Code’ to the trainingPoly

trainingPoly@data$Code <- as.numeric(trainingPoly@data$Class)

Rasterize the trainingPoly

zones <- rasterize(trainingPoly, difference, field='Code')

Mask the classes

differenceMasked <- mask(difference, zones)

Compute the mean for the difference^2 for the zones

zones <- zonal((differenceMasked)^2, zones)

Calculate the RSME for the zones

RMSEzones <- sqrt(zones)

Plot the result, remove the column ‘zone’, name the rows

rownames(RMSEzones) <- c('Croplands','Forest','Wetlands')
RMSEzones <- RMSEzones[,-1]
barplot(c(RMSEzones, RMSE), main = 'RMSE per zone', ylim = c(0, 12), col = c('lightgreen','darkgreen','lightblue','pink') )