packages <- c('tmap','sf','spdep','dplyr','car','GWmodel','ggplot2','gridExtra')
for(p in packages){
  if(!require(p, character.only = T)){
    install.packages(p)
  }
  library(p, character.only = T)
}
# clear the workspace
rm(list = ls())
# load the data 
load("soils.RData")
load("df_sf.RData")
load("df_zone.RData")
load("boundary.RData")
# convert to sp format
df_sp = as(df_sf, "Spatial")
zone_sp = as(df_zone, "Spatial")
df_sf$Longitude <- data$Longitude
df_sp@data$Longitude <- data$Longitude
df_sf$Latitude <- data$Latitude
df_sp@data$Latitude <- data$Latitude
df_sf$TNPC <- log(df_sf$TNPC+0.0001)
df_sf$TPPC <- (df_sf$TPPC)^0.5
df_sf$SOCgkg <- log(df_sf$SOCgkg)
df_sf$NO3Ngkg <- log(abs(df_sf$NO3Ngkg))
df_sf$NH4Ngkg <- log(df_sf$NH4Ngkg)

df_sp@data$TNPC <- log(df_sp@data$TNPC+0.0001)
df_sp@data$TPPC <- (df_sp@data$TPPC)^0.5
df_sp@data$SOCgkg <- log(df_sp@data$SOCgkg)
df_sp@data$NO3Ngkg <- log(abs(df_sp@data$NO3Ngkg))
df_sp@data$NH4Ngkg <- log(df_sp@data$NH4Ngkg)
reg.mod = as.formula(TNPC ~SOCgkg+SiltPC+NO3Ngkg+NH4Ngkg)
mod.OLS = lm(reg.mod, data = df_sf)
summary(mod.OLS)
## 
## Call:
## lm(formula = reg.mod, data = df_sf)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.2115 -0.1594  0.0381  0.1987  3.1023 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.220137   0.222938  -9.959  < 2e-16 ***
## SOCgkg       0.689018   0.039624  17.389  < 2e-16 ***
## SiltPC       0.014503   0.001487   9.753  < 2e-16 ***
## NO3Ngkg      0.126049   0.029304   4.301 1.94e-05 ***
## NH4Ngkg     -0.146518   0.073502  -1.993   0.0466 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5434 on 684 degrees of freedom
## Multiple R-squared:  0.6091, Adjusted R-squared:  0.6069 
## F-statistic: 266.5 on 4 and 684 DF,  p-value: < 2.2e-16
vif(mod.OLS)
##   SOCgkg   SiltPC  NO3Ngkg  NH4Ngkg 
## 1.781388 1.329538 1.539140 1.248517
EUDM <- gw.dist(coordinates(df_sp))
gwr.bwd.f <- bw.gwr(reg.mod, df_sp, approach="AIC", kernel="bisquare", adaptive=FALSE, dMat=EUDM)
## Fixed bandwidth: 2313.006 AICc value: 1099.89 
## Fixed bandwidth: 1429.802 AICc value: 1087.707 
## Fixed bandwidth: 883.9523 AICc value: 1069.068 
## Fixed bandwidth: 546.5984 AICc value: 1090.815 
## Fixed bandwidth: 1092.448 AICc value: 1075.501 
## Fixed bandwidth: 755.0946 AICc value: 1067.221 
## Fixed bandwidth: 675.4562 AICc value: 1070.29 
## Fixed bandwidth: 804.3139 AICc value: 1067.427 
## Fixed bandwidth: 724.6754 AICc value: 1067.842 
## Fixed bandwidth: 773.8947 AICc value: 1067.156 
## Fixed bandwidth: 785.5138 AICc value: 1067.208 
## Fixed bandwidth: 766.7137 AICc value: 1067.156 
## Fixed bandwidth: 762.2756 AICc value: 1067.17 
## Fixed bandwidth: 769.4566 AICc value: 1067.153 
## Fixed bandwidth: 771.1518 AICc value: 1067.153 
## Fixed bandwidth: 768.4089 AICc value: 1067.153 
## Fixed bandwidth: 770.1041 AICc value: 1067.153
n.min <- 200 # user-specified minimum bandwidth
n.max <- max(EUDM)+100 # user-specified maximum bandwidth
interval.size <- 100
fixed <- seq(n.min,n.max,by=interval.size)
b.func.gwr <- matrix(nrow=length(fixed),ncol=1)
for(i in 1:length(fixed)) {
    g.gwr <- gwr.aic(fixed[i], Y = as.matrix(df_sp@data$TNPC), 
                              X = as.matrix(df_sp@data[, c("SOCgkg", "SiltPC", "NO3Ngkg", "NH4Ngkg")]), 
                              kernel="bisquare", adaptive=F, 
                              dp.locat = coordinates(df_sp), dMat=EUDM)
    b.func.gwr[i] <- g.gwr[1]
    if(i%%10 ==0) cat(i, "\t")
}
## Fixed bandwidth: 200 AICc value: 2974.398 
## Fixed bandwidth: 300 AICc value: 1344.895 
## Fixed bandwidth: 400 AICc value: 1142.449 
## Fixed bandwidth: 500 AICc value: 1105.506 
## Fixed bandwidth: 600 AICc value: 1095.959 
## Fixed bandwidth: 700 AICc value: 1095.849 
## Fixed bandwidth: 800 AICc value: 1102.622 
## Fixed bandwidth: 900 AICc value: 1111.476 
## Fixed bandwidth: 1000 AICc value: 1119.57 
## Fixed bandwidth: 1100 AICc value: 1127.882 
## 10   Fixed bandwidth: 1200 AICc value: 1136.154 
## Fixed bandwidth: 1300 AICc value: 1143.456 
## Fixed bandwidth: 1400 AICc value: 1149.623 
## Fixed bandwidth: 1500 AICc value: 1155.094 
## Fixed bandwidth: 1600 AICc value: 1159.788 
## Fixed bandwidth: 1700 AICc value: 1163.775 
## Fixed bandwidth: 1800 AICc value: 1167.147 
## Fixed bandwidth: 1900 AICc value: 1170.036 
## Fixed bandwidth: 2000 AICc value: 1172.541 
## Fixed bandwidth: 2100 AICc value: 1174.697 
## 20   Fixed bandwidth: 2200 AICc value: 1176.583 
## Fixed bandwidth: 2300 AICc value: 1178.307 
## Fixed bandwidth: 2400 AICc value: 1179.89 
## Fixed bandwidth: 2500 AICc value: 1181.407 
## Fixed bandwidth: 2600 AICc value: 1182.904 
## Fixed bandwidth: 2700 AICc value: 1184.378 
## Fixed bandwidth: 2800 AICc value: 1185.831 
## Fixed bandwidth: 2900 AICc value: 1187.232 
## Fixed bandwidth: 3000 AICc value: 1188.581 
## Fixed bandwidth: 3100 AICc value: 1189.877 
## 30   Fixed bandwidth: 3200 AICc value: 1191.116 
## Fixed bandwidth: 3300 AICc value: 1192.29 
## Fixed bandwidth: 3400 AICc value: 1193.396 
## Fixed bandwidth: 3500 AICc value: 1194.436 
## Fixed bandwidth: 3600 AICc value: 1195.411 
## Fixed bandwidth: 3700 AICc value: 1196.322 
## Fixed bandwidth: 3800 AICc value: 1197.173
fixed[which.min(b.func.gwr)]
## [1] 700
xy <- data.frame(x = fixed,y = b.func.gwr)
ggplot() + 
    geom_point(data = xy, aes(x=x, y=y), size = 0.7, alpha = 0.5) +
    geom_line(data = xy, aes(x=x, y=y)) +
    geom_vline(xintercept = gwr.bwd.f, colour = "red") +
    scale_x_continuous(breaks = seq(100, max(EUDM), 200))+
    labs(
        subtitle = "GWR bandwidth function", 
        x = "Bandwidth size (m)", 
        y = "AIC")

mod.GWR.f <- gwr.basic(reg.mod, data = df_sp, bw = gwr.bwd.f, kernel="bisquare", adaptive=FALSE, dMat=EUDM)
print(mod.GWR.f)
##    ***********************************************************************
##    *                       Package   GWmodel                             *
##    ***********************************************************************
##    Program starts at: 2020-04-05 10:59:20 
##    Call:
##    gwr.basic(formula = reg.mod, data = df_sp, bw = gwr.bwd.f, kernel = "bisquare", 
##     adaptive = FALSE, dMat = EUDM)
## 
##    Dependent (y) variable:  TNPC
##    Independent variables:  SOCgkg SiltPC NO3Ngkg NH4Ngkg
##    Number of data points: 689
##    ***********************************************************************
##    *                    Results of Global Regression                     *
##    ***********************************************************************
## 
##    Call:
##     lm(formula = formula, data = data)
## 
##    Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.2115 -0.1594  0.0381  0.1987  3.1023 
## 
##    Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
##    (Intercept) -2.220137   0.222938  -9.959  < 2e-16 ***
##    SOCgkg       0.689018   0.039624  17.389  < 2e-16 ***
##    SiltPC       0.014503   0.001487   9.753  < 2e-16 ***
##    NO3Ngkg      0.126049   0.029304   4.301 1.94e-05 ***
##    NH4Ngkg     -0.146518   0.073502  -1.993   0.0466 *  
## 
##    ---Significance stars
##    Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1 
##    Residual standard error: 0.5434 on 684 degrees of freedom
##    Multiple R-squared: 0.6091
##    Adjusted R-squared: 0.6069 
##    F-statistic: 266.5 on 4 and 684 DF,  p-value: < 2.2e-16 
##    ***Extra Diagnostic information
##    Residual sum of squares: 201.9788
##    Sigma(hat): 0.5422187
##    AIC:  1121.84
##    AICc:  1121.963
##    ***********************************************************************
##    *          Results of Geographically Weighted Regression              *
##    ***********************************************************************
## 
##    *********************Model calibration information*********************
##    Kernel function: bisquare 
##    Fixed bandwidth: 770.1041 
##    Regression points: the same locations as observations are used.
##    Distance metric: A distance matrix is specified for this model calibration.
## 
##    ****************Summary of GWR coefficient estimates:******************
##                    Min.    1st Qu.     Median    3rd Qu.    Max.
##    Intercept -3.6711378 -2.3659029 -1.9583848 -1.5091932 -0.1868
##    SOCgkg     0.3019910  0.5994925  0.6771323  0.7650495  1.0074
##    SiltPC    -0.0034119  0.0065802  0.0112325  0.0161693  0.0289
##    NO3Ngkg   -0.2189294  0.0132858  0.0895466  0.1996894  0.7412
##    NH4Ngkg   -1.0984084 -0.3088857 -0.1267562 -0.0085670  0.3957
##    ************************Diagnostic information*************************
##    Number of data points: 689 
##    Effective number of parameters (2trace(S) - trace(S'S)): 84.16203 
##    Effective degrees of freedom (n-2trace(S) + trace(S'S)): 604.838 
##    AICc (GWR book, Fotheringham, et al. 2002, p. 61, eq 2.33): 1067.153 
##    AIC (GWR book, Fotheringham, et al. 2002,GWR p. 96, eq. 4.22): 988.6646 
##    Residual sum of squares: 154.5778 
##    R-square value:  0.7008707 
##    Adjusted R-square value:  0.6591785 
## 
##    ***********************************************************************
##    Program stops at: 2020-04-05 10:59:20
plot.gwr.coefs = function(gwr.model.SDF, variable.name, tvalues) {
  # determine which observations are significant from via the t-values
  tval = tvalues
  signif = tval < -1.96 | tval > 1.96
  # create the background
  p = tm_shape(boundary)+tm_polygons("lightgrey")+
  # create the map 
  tm_shape(gwr.model.SDF) +
    tm_dots(variable.name, size = 0.2, midpoint = 0) +
  tm_layout(legend.position = c("right","bottom"))+
  # now add the t-values layer
  tm_shape(gwr.model.SDF[signif,]) +
        tm_dots(shape = 1, size = 0.2) 
  return(p)
}
p1 <- plot.gwr.coefs(mod.GWR.f$SDF, "Intercept", mod.GWR.f$SDF$Intercept_TV)
p2 <- plot.gwr.coefs(mod.GWR.f$SDF, "SOCgkg", mod.GWR.f$SDF$SOCgkg_TV)
p3 <- plot.gwr.coefs(mod.GWR.f$SDF, "SiltPC", mod.GWR.f$SDF$SiltPC_TV)
p4 <- plot.gwr.coefs(mod.GWR.f$SDF, "NO3Ngkg",mod.GWR.f$SDF$NO3Ngkg_TV)
p5 <- plot.gwr.coefs(mod.GWR.f$SDF, "NH4Ngkg",mod.GWR.f$SDF$NH4Ngkg_TV)
tmap_arrange(p2, p3, ncol=2)
## Warning: The shape boundary is invalid. See sf::st_is_valid

## Warning: The shape boundary is invalid. See sf::st_is_valid

tmap_arrange(p4, p5, ncol=2)
## Warning: The shape boundary is invalid. See sf::st_is_valid

## Warning: The shape boundary is invalid. See sf::st_is_valid

p1
## Warning: The shape boundary is invalid. See sf::st_is_valid

mod.MGWR.f1 <- gwr.multiscale(reg.mod, data = df_sp, max.iterations = 30,
             criterion="CVR", kernel = "bisquare", adaptive=FALSE,  
               bws0=c(gwr.bwd.f,gwr.bwd.f,gwr.bwd.f,gwr.bwd.f,gwr.bwd.f),
               dMats=list(EUDM,EUDM,EUDM,EUDM,EUDM),
               verbose = F, hatmatrix = F, predictor.centered=rep(T, 4))
# extract the bandwidths (see below)
mgwr.bwd.f  <- round(mod.MGWR.f1[[2]]$bws,1) # the estimated bandwidths
mod.MGWR.f2 <- gwr.multiscale(reg.mod, data = df_sp, max.iterations = 30,
             criterion="CVR", kernel = "bisquare", adaptive=FALSE,  
               bws0=c(mgwr.bwd.f),bw.seled=rep(T, 5),
               dMats=list(EUDM,EUDM,EUDM,EUDM,EUDM),
               verbose = F, hatmatrix = T, predictor.centered=rep(F, 4))