2,000 wetland/open water polygons were randomly selected from the classified spring wetland polygons. 926 additional polygons were then selected to ballance out the swamp, open water, and upland class. These 2,926 polygons were then assigned a wetland classs according to the Alberta Wetland Classification System. From these polygon, 72 different remote sensing metrics were extracted from Sentinel-1, -2, and topographic data for each polygon. This includes static, multi-temporal, variabiliy, and topographic measures.
Four landcover classes were assigned to the polygons. The distribution of the labels by class is seen below.
library(dplyr)
library(ggplot2)
library(xgboost)
library(knitr)
library(plotly)
library(caret)
library(kableExtra)
setwd("C:/Users/Evan/Desktop/ALPHA/SouthernWetlandInventory/Explore")
d1 <- read.csv("trainingPolys_wClass.csv")
d2 <- read.csv("trainingPolys_wValues1.csv")
d3 <- read.csv("trainingPolys_wValues2.csv")
d2 <- rbind(d2, d3)
d <- merge(d1, d2, by = "ID")
area <- d$area
d <- d %>% select(-ID, -OBJECTID, -Shape_Length, -Shape_Area, -notes, -area) %>% rename(WetlandClassAbrev = WetlandClass)
d <- cbind(d, area)
d <- na.omit(d)
jt <- data.frame(WetlandClassAbrev = c("M", "S", "OW", "U"),
WetlandClass = c("Marsh", "Swamp", "Open Water", "Upland"),
Wetland = c("Wetland", "Wetland", "Open water", "Upland"),
WCcolor = c('#8c510a', '#858f30', '#08306B', '#006D2C'),
Wcolor = c('#80cdc1', '#80cdc1', '#08306B', '#006D2C'),
lcClass = c(0, 1, 2, 3))
d <- merge(d, jt, by = "WetlandClassAbrev")
#summary of interpretation
WC <- c("Marsh", "Swamp", "Open Water", "Upland")
summary <- d %>% group_by(WetlandClass) %>% summarize(n())
kable(summary) %>% kable_styling(bootstrap_options = "striped", font_size = 14)| WetlandClass | n() |
|---|---|
| Marsh | 1548 |
| Open Water | 305 |
| Swamp | 701 |
| Upland | 372 |
This explores the class seperation for each of the input variables.
RSvars <- colnames(d[,2:(length(d)-6)])
#RSvars <- colnames(d[,2:3])
for (i in 1:length(RSvars)){
var <- RSvars[i]
p99 <- d %>% summarise(p90 = quantile((!!sym(var)), probs=0.99, na.rm=TRUE))
p01 <- d %>% summarise(p90 = quantile((!!sym(var)), probs=0.01, na.rm=TRUE))
dClamp <- d %>%
filter((!!sym(var)) < as.numeric(p99)) %>%
filter((!!sym(var)) > as.numeric(p01))
print(
ggplot(dClamp, aes_string(x = "WetlandClass", y = var, fill = "WetlandClass")) +
geom_violin() +
scale_fill_manual(name = "", values=c('#8c510a', '#08306B', '#858f30', '#006D2C'))
)
}Below we: 1) divide the label data into test and train, 2) test varibles importance in a XGB model, 3) select varibles to be included in the predictive model, 4) Test the cross correlation of the chosen varibles, 5) predict the landcover class of the test data with the train model, 6) assess the accuracy of the prediction
The label data was divided into test and train data. Below is the number of classes in both the test and train data.
numThread <- 20
dML <- d %>% select(-WetlandClassAbrev, -WetlandClass, -Wetland, -WCcolor, -Wcolor)
sampleS <- floor(0.7*nrow(dML))
set.seed(777)
picked = sample(seq_len(nrow(dML)),size = sampleS)
dMLtrain <- dML[picked,]
dMLtest <- dML[-picked,]
trainSummary <- dMLtrain %>% group_by(lcClass) %>% summarize(n())
kable(trainSummary, caption = "number of samples in train data") %>% kable_styling(bootstrap_options = "striped", font_size = 13)| lcClass | n() |
|---|---|
| 0 | 1087 |
| 1 | 478 |
| 2 | 218 |
| 3 | 265 |
testSummary <- dMLtest %>% group_by(lcClass) %>% summarize(n())
kable(testSummary, caption = "number of samples in test data") %>% kable_styling(bootstrap_options = "striped", font_size = 13)| lcClass | n() |
|---|---|
| 0 | 461 |
| 1 | 223 |
| 2 | 87 |
| 3 | 107 |
The variable importance when all variables are put into a XGB model
trainD <- data.matrix(dMLtrain[,1:(ncol(dMLtrain)-1)])
classes <- dMLtrain[,ncol(dMLtrain)]
fit <- xgboost(trainD, classes, objective = "multi:softmax", num_class = length(unique(classes)),
nthread = numThread, nrounds = 500, max_depth = 4,
eta = 0.03, gamma = 1, min_child_weigth = 1,
sumbample = 0.5, colsample_bytree = 0.8, verbose = 0)
varImp <- xgb.importance(feature_names = colnames(trainD), model = fit)
xgb.plot.importance(importance_matrix = varImp, cex = 1.2)| Feature | Gain | Cover | Frequency | Importance |
|---|---|---|---|---|
| B4fall_mean | 0.13935 | 0.03453 | 0.01338 | 0.13935 |
| VVfall_mean | 0.08974 | 0.06547 | 0.03571 | 0.08974 |
| VHfall_mean | 0.08747 | 0.03588 | 0.01246 | 0.08747 |
| B8summer_mean | 0.08123 | 0.03666 | 0.01898 | 0.08123 |
| TPI_stdDev | 0.05465 | 0.04063 | 0.03663 | 0.05465 |
| SWI_mean | 0.05119 | 0.03214 | 0.02358 | 0.05119 |
| B8spring_mean | 0.02902 | 0.04738 | 0.03521 | 0.02902 |
| dB12_mean | 0.02707 | 0.03796 | 0.02542 | 0.02707 |
| dB11_mean | 0.02347 | 0.01804 | 0.01313 | 0.02347 |
| dVH_mean | 0.02136 | 0.02617 | 0.01924 | 0.02136 |
| TPI_mean | 0.01900 | 0.03458 | 0.03128 | 0.01900 |
| NDWIfall_stdDev | 0.01832 | 0.02401 | 0.01438 | 0.01832 |
| logArea | 0.01623 | 0.02227 | 0.01890 | 0.01623 |
| NDWIfall_mean | 0.01500 | 0.01032 | 0.01020 | 0.01500 |
| B3fall_mean | 0.01496 | 0.00543 | 0.00360 | 0.01496 |
| NDVIfall_mean | 0.01347 | 0.02046 | 0.01188 | 0.01347 |
| NDVIsummer_mean | 0.01170 | 0.02111 | 0.01999 | 0.01170 |
| dVV_mean | 0.01152 | 0.02137 | 0.02108 | 0.01152 |
| B2spring_mean | 0.01029 | 0.01248 | 0.01246 | 0.01029 |
| ALratio | 0.00985 | 0.01237 | 0.02333 | 0.00985 |
| B11summer_mean | 0.00975 | 0.00531 | 0.00920 | 0.00975 |
| DPOLfall_mean | 0.00874 | 0.00993 | 0.01288 | 0.00874 |
| dSummerNDVI_mean | 0.00865 | 0.01349 | 0.01890 | 0.00865 |
| ARIfall_mean | 0.00763 | 0.01032 | 0.01254 | 0.00763 |
| VVspring_mean | 0.00730 | 0.01152 | 0.01589 | 0.00730 |
| B11spring_stdDev | 0.00632 | 0.00954 | 0.01129 | 0.00632 |
| B12summer_mean | 0.00629 | 0.00931 | 0.00920 | 0.00629 |
| B8fall_mean | 0.00618 | 0.00477 | 0.00820 | 0.00618 |
| B3spring_mean | 0.00602 | 0.00385 | 0.00544 | 0.00602 |
| dARI_mean | 0.00582 | 0.00824 | 0.01472 | 0.00582 |
| B11fall_mean | 0.00573 | 0.00734 | 0.01330 | 0.00573 |
| VHspring_mean | 0.00573 | 0.01098 | 0.01539 | 0.00573 |
| DPOLspring_mean | 0.00568 | 0.00989 | 0.01271 | 0.00568 |
| VHfall_stdDev | 0.00547 | 0.01567 | 0.01405 | 0.00547 |
| NDVIspring_mean | 0.00544 | 0.01062 | 0.00845 | 0.00544 |
| B2fall_mean | 0.00475 | 0.01299 | 0.01296 | 0.00475 |
| dNDVI_stdDev | 0.00475 | 0.00929 | 0.00920 | 0.00475 |
| B12spring_mean | 0.00465 | 0.01316 | 0.01196 | 0.00465 |
| VVspring_stdDev | 0.00454 | 0.00672 | 0.00552 | 0.00454 |
| dB11_stdDev | 0.00454 | 0.00852 | 0.01213 | 0.00454 |
| B4fall_stdDev | 0.00453 | 0.00689 | 0.01188 | 0.00453 |
| B12fall_mean | 0.00445 | 0.00783 | 0.01380 | 0.00445 |
| NDWIspring_mean | 0.00428 | 0.00873 | 0.00753 | 0.00428 |
| dVV_stdDev | 0.00426 | 0.01328 | 0.01430 | 0.00426 |
| B8fall_stdDev | 0.00426 | 0.01254 | 0.01204 | 0.00426 |
| area | 0.00420 | 0.00554 | 0.00460 | 0.00420 |
| SWI_stdDev | 0.00389 | 0.01378 | 0.01296 | 0.00389 |
| B12fall_stdDev | 0.00383 | 0.00551 | 0.01154 | 0.00383 |
| ARIfall_stdDev | 0.00374 | 0.01054 | 0.01288 | 0.00374 |
| ARIspring_mean | 0.00372 | 0.00546 | 0.00953 | 0.00372 |
| B12summer_stdDev | 0.00372 | 0.01121 | 0.01213 | 0.00372 |
| B11spring_mean | 0.00370 | 0.00532 | 0.00928 | 0.00370 |
| NDVIfall_stdDev | 0.00363 | 0.00812 | 0.01179 | 0.00363 |
| B8spring_stdDev | 0.00352 | 0.00380 | 0.00853 | 0.00352 |
| B4spring_stdDev | 0.00335 | 0.00728 | 0.00945 | 0.00335 |
| dNDWI_mean | 0.00325 | 0.00982 | 0.00644 | 0.00325 |
| dVH_stdDev | 0.00320 | 0.00465 | 0.00878 | 0.00320 |
| B2summer_mean | 0.00320 | 0.00216 | 0.00477 | 0.00320 |
| dDPOL_stdDev | 0.00310 | 0.00443 | 0.00769 | 0.00310 |
| ARIsummer_stdDev | 0.00308 | 0.00392 | 0.00627 | 0.00308 |
| VVfall_stdDev | 0.00298 | 0.00830 | 0.00928 | 0.00298 |
| ARIsummer_mean | 0.00295 | 0.00637 | 0.00811 | 0.00295 |
| B4spring_mean | 0.00285 | 0.00226 | 0.00293 | 0.00285 |
| B11fall_stdDev | 0.00285 | 0.00497 | 0.00652 | 0.00285 |
| B8summer_stdDev | 0.00256 | 0.00532 | 0.00887 | 0.00256 |
| B4summer_stdDev | 0.00256 | 0.00314 | 0.00602 | 0.00256 |
| ARIspring_stdDev | 0.00240 | 0.00580 | 0.00786 | 0.00240 |
| B3fall_stdDev | 0.00232 | 0.00355 | 0.00728 | 0.00232 |
| dB12_stdDev | 0.00225 | 0.00419 | 0.00820 | 0.00225 |
| dDPOL_mean | 0.00215 | 0.00380 | 0.00652 | 0.00215 |
| B4summer_mean | 0.00208 | 0.00152 | 0.00427 | 0.00208 |
| B3spring_stdDev | 0.00204 | 0.00577 | 0.00594 | 0.00204 |
| dSummerNDVI_stdDev | 0.00202 | 0.00312 | 0.00703 | 0.00202 |
| dNDVI_mean | 0.00190 | 0.00247 | 0.00552 | 0.00190 |
| B2fall_stdDev | 0.00186 | 0.00528 | 0.00644 | 0.00186 |
| B3summer_mean | 0.00174 | 0.00133 | 0.00485 | 0.00174 |
| DPOLfall_stdDev | 0.00173 | 0.00368 | 0.00577 | 0.00173 |
| DPOLspring_stdDev | 0.00169 | 0.00326 | 0.00585 | 0.00169 |
| NDWIspring_stdDev | 0.00165 | 0.00271 | 0.00560 | 0.00165 |
| NDWIsummer_mean | 0.00159 | 0.00243 | 0.00468 | 0.00159 |
| dNDWI_stdDev | 0.00153 | 0.00261 | 0.00544 | 0.00153 |
| B12spring_stdDev | 0.00134 | 0.00122 | 0.00351 | 0.00134 |
| NDVIspring_stdDev | 0.00133 | 0.00377 | 0.00544 | 0.00133 |
| NDWIsummer_stdDev | 0.00126 | 0.00188 | 0.00460 | 0.00126 |
| dARI_stdDev | 0.00115 | 0.00134 | 0.00360 | 0.00115 |
| B3summer_stdDev | 0.00112 | 0.00069 | 0.00360 | 0.00112 |
| VHspring_stdDev | 0.00103 | 0.00121 | 0.00343 | 0.00103 |
| B2spring_stdDev | 0.00093 | 0.00267 | 0.00284 | 0.00093 |
| NDVIsummer_stdDev | 0.00086 | 0.00136 | 0.00360 | 0.00086 |
| B2summer_stdDev | 0.00082 | 0.00079 | 0.00301 | 0.00082 |
| B12summer_stdDev.1 | 0.00069 | 0.00162 | 0.00243 | 0.00069 |
Cross correlation of chosen variables and results of the XGB prediction
dXGB <- dMLtrain %>% select(B4fall_mean, VVfall_mean, VHfall_mean, B8summer_mean, SWI_mean, B8spring_mean, TPI_stdDev, dB12_mean,
dVH_mean, TPI_mean, NDVIsummer_mean, dSummerNDVI_mean, NDWIfall_stdDev, NDWIfall_mean, B3fall_mean, lcClass)
kable(cor(dXGB), caption = "cross correlation of slecected variables", digits = 2) %>% kable_styling(bootstrap_options = "striped", font_size = 12)| B4fall_mean | VVfall_mean | VHfall_mean | B8summer_mean | SWI_mean | B8spring_mean | TPI_stdDev | dB12_mean | dVH_mean | TPI_mean | NDVIsummer_mean | dSummerNDVI_mean | NDWIfall_stdDev | NDWIfall_mean | B3fall_mean | lcClass | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| B4fall_mean | 1.00 | -0.19 | -0.51 | 0.43 | 0.39 | 0.32 | -0.38 | -0.56 | 0.54 | 0.09 | -0.05 | -0.05 | -0.52 | -0.51 | 0.98 | -0.53 |
| VVfall_mean | -0.19 | 1.00 | 0.78 | 0.38 | -0.50 | 0.24 | 0.25 | 0.33 | -0.61 | -0.02 | 0.67 | -0.29 | -0.23 | -0.36 | -0.24 | 0.14 |
| VHfall_mean | -0.51 | 0.78 | 1.00 | 0.12 | -0.51 | 0.03 | 0.36 | 0.47 | -0.91 | -0.05 | 0.49 | -0.23 | 0.00 | -0.01 | -0.52 | 0.39 |
| B8summer_mean | 0.43 | 0.38 | 0.12 | 1.00 | -0.04 | 0.35 | -0.08 | -0.14 | 0.02 | 0.09 | 0.63 | -0.47 | -0.57 | -0.59 | 0.37 | -0.33 |
| SWI_mean | 0.39 | -0.50 | -0.51 | -0.04 | 1.00 | -0.23 | -0.66 | -0.54 | 0.49 | 0.11 | -0.24 | 0.00 | -0.05 | -0.10 | 0.40 | -0.48 |
| B8spring_mean | 0.32 | 0.24 | 0.03 | 0.35 | -0.23 | 1.00 | 0.20 | 0.30 | 0.04 | 0.03 | 0.11 | -0.06 | -0.37 | -0.33 | 0.32 | 0.01 |
| TPI_stdDev | -0.38 | 0.25 | 0.36 | -0.08 | -0.66 | 0.20 | 1.00 | 0.46 | -0.36 | -0.23 | 0.06 | 0.10 | 0.18 | 0.29 | -0.36 | 0.51 |
| dB12_mean | -0.56 | 0.33 | 0.47 | -0.14 | -0.54 | 0.30 | 0.46 | 1.00 | -0.48 | -0.03 | 0.08 | -0.09 | 0.31 | 0.37 | -0.52 | 0.56 |
| dVH_mean | 0.54 | -0.61 | -0.91 | 0.02 | 0.49 | 0.04 | -0.36 | -0.48 | 1.00 | 0.07 | -0.30 | 0.14 | -0.14 | -0.13 | 0.52 | -0.45 |
| TPI_mean | 0.09 | -0.02 | -0.05 | 0.09 | 0.11 | 0.03 | -0.23 | -0.03 | 0.07 | 1.00 | 0.08 | -0.07 | -0.13 | -0.09 | 0.07 | -0.11 |
| NDVIsummer_mean | -0.05 | 0.67 | 0.49 | 0.63 | -0.24 | 0.11 | 0.06 | 0.08 | -0.30 | 0.08 | 1.00 | -0.70 | -0.32 | -0.49 | -0.14 | -0.13 |
| dSummerNDVI_mean | -0.05 | -0.29 | -0.23 | -0.47 | 0.00 | -0.06 | 0.10 | -0.09 | 0.14 | -0.07 | -0.70 | 1.00 | 0.08 | 0.15 | 0.00 | 0.12 |
| NDWIfall_stdDev | -0.52 | -0.23 | 0.00 | -0.57 | -0.05 | -0.37 | 0.18 | 0.31 | -0.14 | -0.13 | -0.32 | 0.08 | 1.00 | 0.68 | -0.45 | 0.42 |
| NDWIfall_mean | -0.51 | -0.36 | -0.01 | -0.59 | -0.10 | -0.33 | 0.29 | 0.37 | -0.13 | -0.09 | -0.49 | 0.15 | 0.68 | 1.00 | -0.42 | 0.50 |
| B3fall_mean | 0.98 | -0.24 | -0.52 | 0.37 | 0.40 | 0.32 | -0.36 | -0.52 | 0.52 | 0.07 | -0.14 | 0.00 | -0.45 | -0.42 | 1.00 | -0.47 |
| lcClass | -0.53 | 0.14 | 0.39 | -0.33 | -0.48 | 0.01 | 0.51 | 0.56 | -0.45 | -0.11 | -0.13 | 0.12 | 0.42 | 0.50 | -0.47 | 1.00 |
trainD <- data.matrix(dXGB[,1:(ncol(dXGB)-1)])
classes <- dXGB[,ncol(dXGB)]
fit <- xgboost(trainD, classes, objective = "multi:softmax", num_class = length(unique(classes)),
nthread = numThread, nrounds = 500, max_depth = 4,
eta = 0.03, gamma = 1, min_child_weigth = 1,
sumbample = 0.5, colsample_bytree = 0.8, verbose = 0)
dXGBtest <- dMLtest %>% select(B4fall_mean, VVfall_mean, VHfall_mean, B8summer_mean, SWI_mean, B8spring_mean, TPI_stdDev, dB12_mean,
dVH_mean, TPI_mean, NDVIsummer_mean, dSummerNDVI_mean, NDWIfall_stdDev, NDWIfall_mean, B3fall_mean, lcClass)
testD <- data.matrix(dXGBtest[,1:(ncol(dXGBtest)-1)])
pred <- predict(fit, testD)
results <- dXGBtest$lcClass
df <- cbind(results, pred)
df <- na.omit(df)
correct <- df[,1] - df[,2]
accuracy <- length(correct[correct==0])/length(correct)
cm <- confusionMatrix(as.factor(df[,2]), as.factor(df[,1]))
Kappa <- cm$overall[2]
cm <- cm$table
colnames(cm) <- c("Marsh", "Swamp", "Open Water", "Upland")
row.names(cm) <- c("Marsh", "Swamp", "Open Water", "Upland")
df <- as.data.frame(cm)
df.trues <- dplyr::filter(df, Prediction==Reference)
trues <- c(df.trues[,3])
user.accuracy <- t(trues/rowSums(cm)*100)
producer.accuracy <- trues/colSums(cm)*100
cm <- cbind(cm, user.accuracy[1,])
cm <- rbind(cm, c(producer.accuracy,accuracy*100))
kable(cm, caption = "confusion matrix", digits = 2) %>% kable_styling(bootstrap_options = "striped", font_size = 14)| Marsh | Swamp | Open Water | Upland | ||
|---|---|---|---|---|---|
| Marsh | 426.00 | 33.00 | 19.00 | 7.00 | 87.84 |
| Swamp | 24.00 | 178.00 | 1.00 | 6.00 | 85.17 |
| Open Water | 8.00 | 1.00 | 67.00 | 2.00 | 85.90 |
| Upland | 3.00 | 11.00 | 0.00 | 92.00 | 86.79 |
| 92.41 | 79.82 | 77.01 | 85.98 | 86.90 |