Introduction

Summary of 2019 S1 and S2 data compared to Indonesian and Malaysian Palm data from https://essd.copernicus.org/articles/13/1211/2021/.

Data summary

Summary of the number of polygons of each class with 2019 Palm mask as training

library(dplyr)
library(ggplot2)
library(knitr)
library(plotly)
library(caret)


setwd("C:/Users/evanr/Desktop/EOSD/PalmMasks/Explore")

d <- read.csv("PalmSamplesExplore.csv")
d <- as.data.frame(d)

WC <- c("Palm", "Other landcover", "Water", "Urban")
joinC <- data.frame(classification = c(0:3), classTxt = WC, WCcolor = c('#CBE39B', '#009474', 'blue', 'black'))
head(joinC)
##   classification        classTxt WCcolor
## 1              0            Palm #CBE39B
## 2              1 Other landcover #009474
## 3              2           Water    blue
## 4              3           Urban   black
d <- merge(d, joinC, by = "classification")


d <- d %>% dplyr::select(-system.index, -.geo, -PointClass)
d <- na.omit(d)

#summary of interpretation

summary <- d %>% group_by(classTxt) %>% summarize(n())
kable(summary)
classTxt n()
Other landcover 1200
Palm 850
Urban 279
Water 432

Violin plots

With 2019 palm prediction as training

RSvars <- colnames(d)[2:44]


for (i in 1:length(RSvars)){
  var <- RSvars[i]
  print(
  ggplot(d, aes_string(x = "classTxt", y = var, fill = "classTxt")) + 
    geom_violin() +
    scale_fill_manual(name = "", values=c('#ff0000','#ef00ff', 'blue', 'black')) +
    xlab("Class")
  )
}

Violin plots - with no other

With 2019 palm prediction as training

d2 <- filter(d, classification < 2)
for (i in 1:length(RSvars)){
  var <- RSvars[i]
  print(
  ggplot(d2, aes_string(x = "classTxt", y = var, fill = "classTxt")) + 
    geom_violin() +
    scale_fill_manual(name = "", values=c('#ff0000','#ef00ff')) +
    xlab("Class")
  )
}

Random Forest model and varialbe importance

With 2019 palm prediction as training

dmodel <- d %>% dplyr::select(-classTxt, -WCcolor)
#dmodel <- dmodel %>% sample_n(300)

RFModel <- train(as.factor(classification) ~ .,
                 data = dmodel,
                 method = "rf",
                 importance = T,
                 ntree = 500)
print(RFModel$finalModel)
## 
## Call:
##  randomForest(x = x, y = y, ntree = 500, mtry = min(param$mtry,      ncol(x)), importance = ..1) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 22
## 
##         OOB estimate of  error rate: 9.74%
## Confusion matrix:
##     0    1   2   3 class.error
## 0 790   57   0   3  0.07058824
## 1  69 1077  22  32  0.10250000
## 2   0   11 421   0  0.02546296
## 3  10   65   0 204  0.26881720
plot(varImp(RFModel, scale = F)) #variable importance plots