Summary

Co kriging assumptions

The target variable must be

  • Normally distributed
  • Stationary - local variation doesn’t change in different areas of the map
  • No trends

The co variables must have

  • a feature-space correlation with the target variable
  • a spatial structure (i.e. be modelled as a regional variable)
  • a spatial co-variance with the target variable

Not 100% sure how to check most of those assumptions, but let’s check normality.

par(mfrow=c(1,3))
hist(my_grid$Mg, main = "", xlab = "Mg")
hist(log(my_grid$Mg), main = "", xlab = "log Mg")
hist(sqrt(my_grid$Mg), main = "", xlab = "sqrt Mg")

I’m going to be working with the sqrt transformed data, as they seem most normal.

Let’s look at the observed values

my_soil_df %>% as.data.frame() %>% 
  ggplot(aes(lon, lat, colour = MgSqrt)) +
  geom_point(size = 4) +
  my_colour

Package automap automatically fits variograms and krige

First I’m going to do ordinary kriging without the covariates, then cokriging with covariates, then repeat for each site individually.

Kriging

All sites

coordinates(my_soil_df) <- c("lon", "lat")
coordinates(my_grid) <- c("lon", "lat")
test_dat <- my_soil_df %>% as.data.frame() %>% filter(type == "random")

#Auto Kriging

#fit variogram

Mgsqrt_vgm <- autofitVariogram(MgSqrt ~ 1,  my_grid)
plot(Mgsqrt_vgm)

Mgsqrt_krige <- autoKrige(MgSqrt ~ 1, my_grid, my_soil_df)
## [using ordinary kriging]
## Warning in sqrt(krige_result$var1.var): NaNs produced
pred <- Mgsqrt_krige[1] %>% as.data.frame() %>%  left_join(test_dat, Mgsqrt_krige, by = c("krige_output.lon" = "lon")) %>% filter(type == "random")
mod <- lm(pred$krige_output.var1.pred ~ pred$MgSqrt)
summary(mod) #R squared = 0.6383
## 
## Call:
## lm(formula = pred$krige_output.var1.pred ~ pred$MgSqrt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.19454 -0.34609  0.03161  0.31411  1.24371 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.88682    0.20540   4.318 9.74e-05 ***
## pred$MgSqrt  0.65411    0.07689   8.507 1.35e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4991 on 41 degrees of freedom
## Multiple R-squared:  0.6383, Adjusted R-squared:  0.6295 
## F-statistic: 72.37 on 1 and 41 DF,  p-value: 1.348e-10
plot(pred$krige_output.var1.pred ~ pred$MgSqrt, xlab = "Observed", ylab = "Predicted")
abline(0,1)
abline(mod, col = "blue")

Mgsqrt_krige[1] %>% as.data.frame() %>% ggplot(aes(krige_output.lon, krige_output.lat, colour = krige_output.var1.pred)) +
  geom_point(size = 4) +
  my_colour

The r squared was 0.6383, but the ratio between predicted and observed wasn’t 1:1.

Does co-kriging improve this?

Co-kriging all sites

Mgsqrt_vgm <- autofitVariogram(MgSqrt ~ my_hue + value + chroma,  my_grid)
plot(Mgsqrt_vgm)

Mgsqrt_krige <- autoKrige(MgSqrt ~ my_hue + value + chroma, my_grid, my_soil_df)
## [using universal kriging]
pred <- Mgsqrt_krige[1] %>% as.data.frame() %>%  left_join(test_dat, Mgsqrt_krige, by = c("krige_output.lon" = "lon")) %>% filter(type == "random")
mod <- lm(pred$krige_output.var1.pred ~ pred$MgSqrt)
summary(mod)
## 
## Call:
## lm(formula = pred$krige_output.var1.pred ~ pred$MgSqrt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.08460 -0.35113 -0.09205  0.27283  1.20807 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.09968    0.20427   5.383 3.24e-06 ***
## pred$MgSqrt  0.57158    0.07647   7.475 3.55e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4963 on 41 degrees of freedom
## Multiple R-squared:  0.5768, Adjusted R-squared:  0.5664 
## F-statistic: 55.87 on 1 and 41 DF,  p-value: 3.551e-09
plot(pred$krige_output.var1.pred ~ pred$MgSqrt, xlab = "Observed", ylab = "Predicted")
abline(0,1)
abline(mod, col = "blue")

Mgsqrt_krige[1] %>% as.data.frame() %>% ggplot(aes(krige_output.lon, krige_output.lat, colour = krige_output.var1.pred)) +
  geom_point(size = 4) +
  my_colour

Cokriging actually reduced the r squared (0.5664)

I repeated the analysis with only one covariate at a time, but there wasn’t much effect on the r squared compared to ordinary kriging

hue only = 0.6377
value only = 0.5939
chroma only = 0.6442

Site 1

site1<- my_soil_df %>% as.data.frame() %>% filter(site == "site1")
site1_grid <- my_grid %>% as.data.frame() %>% filter(site == "site1")
test_dat <- site1 %>% as.data.frame() %>% filter(type == "random")

coordinates(site1) <- c("lon", "lat")
coordinates(site1_grid) <- c("lon", "lat")

#Ordinary kriging 


Mgsqrt_vgm <- autofitVariogram(MgSqrt ~ 1,  site1_grid)
plot(Mgsqrt_vgm)

Mgsqrt_krige <- autoKrige(MgSqrt ~ 1, site1_grid, site1)
## [using ordinary kriging]
pred <- Mgsqrt_krige[1] %>% as.data.frame() %>%  left_join(test_dat, Mgsqrt_krige, by = c("krige_output.lon" = "lon")) %>% filter(type == "random")

mod <- lm(pred$krige_output.var1.pred ~ pred$MgSqrt)
summary(mod)
## 
## Call:
## lm(formula = pred$krige_output.var1.pred ~ pred$MgSqrt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.54139 -0.27745 -0.05362  0.39217  0.74813 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   1.0667     0.3135   3.402  0.00590 **
## pred$MgSqrt   0.5624     0.1330   4.230  0.00141 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4529 on 11 degrees of freedom
## Multiple R-squared:  0.6192, Adjusted R-squared:  0.5846 
## F-statistic: 17.89 on 1 and 11 DF,  p-value: 0.001413
plot(pred$krige_output.var1.pred ~ pred$MgSqrt, xlab = "Observed", ylab = "Predicted")
abline(0,1)
abline(mod, col = "blue")

Mgsqrt_krige[1] %>% as.data.frame() %>% ggplot(aes(krige_output.lon, krige_output.lat, colour = krige_output.var1.pred)) +
  geom_point(size = 4) +
  my_colour

R squared is 0.6192, which is similar to doing all three plots at once

Co-kriging site 1

Mgsqrt_vgm <- autofitVariogram(MgSqrt ~ my_hue + value + chroma,  site1_grid)
plot(Mgsqrt_vgm)

Mgsqrt_krige <- autoKrige(MgSqrt ~ my_hue + value + chroma, site1_grid, site1)
## [using universal kriging]
pred <- Mgsqrt_krige[1] %>% as.data.frame() %>%  left_join(test_dat, Mgsqrt_krige, by = c("krige_output.lon" = "lon")) %>% filter(type == "random")

mod <- lm(pred$krige_output.var1.pred ~ pred$MgSqrt)
summary(mod)
## 
## Call:
## lm(formula = pred$krige_output.var1.pred ~ pred$MgSqrt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.58422 -0.21003 -0.06766  0.07946  1.19763 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.8276     0.3176   5.755 0.000127 ***
## pred$MgSqrt   0.2459     0.1347   1.825 0.095222 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4588 on 11 degrees of freedom
## Multiple R-squared:  0.2324, Adjusted R-squared:  0.1627 
## F-statistic: 3.331 on 1 and 11 DF,  p-value: 0.09522
plot(pred$krige_output.var1.pred ~ pred$MgSqrt, xlab = "Observed", ylab = "Predicted")
abline(0,1)
abline(mod, col = "blue")

Mgsqrt_krige[1] %>% as.data.frame() %>% ggplot(aes(krige_output.lon, krige_output.lat, colour = krige_output.var1.pred)) +
  geom_point(size = 4) +
  my_colour

R squared is bad = 0.1627

hue only = 0.6526
value only = 0.1801
chroma only = 0.6166

Site 2

site2<- my_soil_df %>% as.data.frame() %>% filter(site == "site2")
site2_grid <- my_grid %>% as.data.frame() %>% filter(site == "site2")
test_dat <- site2 %>% as.data.frame() %>% filter(type == "random")

coordinates(site2) <- c("lon", "lat")
coordinates(site2_grid) <- c("lon", "lat")

#Ordinary kriging 


Mgsqrt_vgm <- autofitVariogram(MgSqrt ~ 1,  site2_grid)
plot(Mgsqrt_vgm)

Mgsqrt_krige <- autoKrige(MgSqrt ~ 1, site2_grid, site2)
## [using ordinary kriging]
pred <- Mgsqrt_krige[1] %>% as.data.frame() %>%  left_join(test_dat, Mgsqrt_krige, by = c("krige_output.lon" = "lon")) %>% filter(type == "random")

mod <- lm(pred$krige_output.var1.pred ~ pred$MgSqrt)
summary(mod)
## 
## Call:
## lm(formula = pred$krige_output.var1.pred ~ pred$MgSqrt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.77469 -0.34841 -0.05937  0.35239  0.76224 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.3015     0.3189  -0.945    0.359    
## pred$MgSqrt   1.0578     0.1455   7.271 2.74e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4399 on 15 degrees of freedom
## Multiple R-squared:  0.779,  Adjusted R-squared:  0.7643 
## F-statistic: 52.87 on 1 and 15 DF,  p-value: 2.735e-06
plot(pred$krige_output.var1.pred ~ pred$MgSqrt, xlab = "Observed", ylab = "Predicted")
abline(0,1)
abline(mod, col = "blue")

Mgsqrt_krige[1] %>% as.data.frame() %>% ggplot(aes(krige_output.lon, krige_output.lat, colour = krige_output.var1.pred)) +
  geom_point(size = 4) +
  my_colour

R squared is 0.779

Co-kriging site 2

Mgsqrt_vgm <- autofitVariogram(MgSqrt ~ my_hue + value + chroma,  site2_grid)
plot(Mgsqrt_vgm)

Mgsqrt_krige <- autoKrige(MgSqrt ~ my_hue + value + chroma, site2_grid, site2)
## [using universal kriging]
pred <- Mgsqrt_krige[1] %>% as.data.frame() %>%  left_join(test_dat, Mgsqrt_krige, by = c("krige_output.lon" = "lon")) %>% filter(type == "random")

mod <- lm(pred$krige_output.var1.pred ~ pred$MgSqrt)
summary(mod)
## 
## Call:
## lm(formula = pred$krige_output.var1.pred ~ pred$MgSqrt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.04195 -0.24275  0.00743  0.32932  1.13423 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -0.03032    0.37048  -0.082    0.936    
## pred$MgSqrt  0.95426    0.16903   5.646 4.65e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.5111 on 15 degrees of freedom
## Multiple R-squared:   0.68,  Adjusted R-squared:  0.6587 
## F-statistic: 31.87 on 1 and 15 DF,  p-value: 4.653e-05
plot(pred$krige_output.var1.pred ~ pred$MgSqrt, xlab = "Observed", ylab = "Predicted")
abline(0,1)
abline(mod, col = "blue")

Mgsqrt_krige[1] %>% as.data.frame() %>% ggplot(aes(krige_output.lon, krige_output.lat, colour = krige_output.var1.pred)) +
  geom_point(size = 4) +
  my_colour

r squared = 0.6587

hue only = 0.7771
value only = 0.7168
chroma only = 0.6869

Site 3

site3<- my_soil_df %>% as.data.frame() %>% filter(site == "site3")
site3_grid <- my_grid %>% as.data.frame() %>% filter(site == "site3")
test_dat <- site3 %>% as.data.frame() %>% filter(type == "random")

coordinates(site3) <- c("lon", "lat")
coordinates(site3_grid) <- c("lon", "lat")

#Ordinary kriging 


Mgsqrt_vgm <- autofitVariogram(MgSqrt ~ 1,  site3_grid)
plot(Mgsqrt_vgm)

Mgsqrt_krige <- autoKrige(MgSqrt ~ 1, site3_grid, site3)
## [using ordinary kriging]
pred <- Mgsqrt_krige[1] %>% as.data.frame() %>%  left_join(test_dat, Mgsqrt_krige, by = c("krige_output.lon" = "lon")) %>% filter(type == "random")

mod <- lm(pred$krige_output.var1.pred ~ pred$MgSqrt)
summary(mod)
## 
## Call:
## lm(formula = pred$krige_output.var1.pred ~ pred$MgSqrt)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8008 -0.2184  0.1063  0.3090  0.5636 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   2.7304     0.5377   5.078 0.000356 ***
## pred$MgSqrt   0.1715     0.1568   1.094 0.297341    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.4262 on 11 degrees of freedom
## Multiple R-squared:  0.09812,    Adjusted R-squared:  0.01613 
## F-statistic: 1.197 on 1 and 11 DF,  p-value: 0.2973
plot(pred$krige_output.var1.pred ~ pred$MgSqrt, xlab = "Observed", ylab = "Predicted")
abline(0,1)
abline(mod, col = "blue")

Mgsqrt_krige[1] %>% as.data.frame() %>% ggplot(aes(krige_output.lon, krige_output.lat, colour = krige_output.var1.pred)) +
  geom_point(size = 4) +
  my_colour

R squared = 0.09812, which is pretty terrible

Co-kriging site 3

Mgsqrt_vgm <- autofitVariogram(MgSqrt ~ my_hue + value + chroma,  site3_grid)
plot(Mgsqrt_vgm)

Mgsqrt_krige <- autoKrige(MgSqrt ~ my_hue + value + chroma, site3_grid, site3)
## [using universal kriging]
pred <- Mgsqrt_krige[1] %>% as.data.frame() %>%  left_join(test_dat, Mgsqrt_krige, by = c("krige_output.lon" = "lon")) %>% filter(type == "random")

mod <- lm(pred$krige_output.var1.pred ~ pred$MgSqrt)
summary(mod)
## 
## Call:
## lm(formula = pred$krige_output.var1.pred ~ pred$MgSqrt)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.29625 -0.19738  0.08494  0.13519  0.24824 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.280492   0.253432  12.944 5.32e-08 ***
## pred$MgSqrt -0.001321   0.073895  -0.018    0.986    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2009 on 11 degrees of freedom
## Multiple R-squared:  2.907e-05,  Adjusted R-squared:  -0.09088 
## F-statistic: 0.0003197 on 1 and 11 DF,  p-value: 0.9861
plot(pred$krige_output.var1.pred ~ pred$MgSqrt, xlab = "Observed", ylab = "Predicted")
abline(0,1)
abline(mod, col = "blue")

Mgsqrt_krige[1] %>% as.data.frame() %>% ggplot(aes(krige_output.lon, krige_output.lat, colour = krige_output.var1.pred)) +
  geom_point(size = 4) +
  my_colour

This fits truly badly, but then the variogram does look weird. R squared = -0.09088

hue only = -0.08056
value only = 0.09475
chroma only = 0.09621