#Environmental Data Stats
Set-up workspace
library(sp)
library(gstat)
library(dplyr)
library(ggplot2)
library(ggpubr)
library(rcompanion)
#Read in environmental data
data <- read.csv("./soils_data_w2_parse.csv")
#Save the summary data as a csv file in output
sum <- summary(data)
write.csv(sum, "./output/enviro_summary_data.csv")
#Remove all data we dont need (like cluster, name, elements below detection levels)
vars <- c("lon","lat","pH","EC","GWC","per_clay","per_sand","per_silt",
"Al","Ca","Co","Cr","Cu","Fe","K","Mg","Mn","Na","Ni","P","S",
"Ti","Zn","per_som","PerN","PerC","CNRatio","Ele")
sd.full <- data[,vars]
#Create a SPDF class object so that the data is clearly distinguished from the coordinates
coordinates(sd.full) <- ~ lon + lat
#Check the distribution of each variable - looking for normality
There are several variables that have non-normal distributions. The first step to resolve this before applying a variogram is to normalize our data. We can start by log transforming the ones that appear non-normal and re-running our normality checks.
#For each soil property, run this program, changing the names accordingly
check.normality <- function(var.test, var.name)
{ plot(var.test,
main = var.name,
xlab = "Sample Number",
ylab = var.name)
hist(var.test,
main = paste("Histogram of", var.name),
breaks = 50)
c <- ggdensity(var.test,
main = paste("Density of", var.name),
xlab = paste(var.name))
d <- ggqqplot(var.test,
main = paste("QQ plot for", var.name),
xlab = paste(var.name))
plot(c)
plot(d)
print(shapiro.test(var.test))}
#For those that need to be transformed, run this code
trans.data <- function(var.test, var.name)
{##Check untransformed variable
#check.normality(var.test, var.name)
#Log transformation:
var.log <- log(var.test)
check.normality(var.log, paste("Log of", var.name))
#Square root transformation:
var.sq <- sqrt(sd.full$Cr)
check.normality(var.sq, paste("Square Root of", var.name))
#Cube root transformation
var.cub <- sign(var.test) * abs(var.test)^(1/3)
check.normality(var.cub, paste("Cube Root of", var.name))
#Tukey's Ladder of Powers transformation
var.tuk <- transformTukey(var.test, plotit = FALSE)
check.normality(var.tuk, paste("Tukey Transformed", var.name))
#Still looks a little suspect, but it passed the normality check.
}
rr ##Check pH check.normality(sd.full$pH, )
Shapiro-Wilk normality test
data: var.test
W = 0.99152, p-value = 0.5124
rr #pH has a normal distribution
rr ##Check EC check.normality(sd.full$EC, )
Shapiro-Wilk normality test
data: var.test
W = 0.83261, p-value = 8.399e-12
rr #EC has a non-normal distribution trans.data(sd.full$EC, )
Shapiro-Wilk normality test
data: var.test
W = 0.91545, p-value = 1.091e-07
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.94079, p-value = 6.061e-06
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.94207, p-value = 7.592e-06
rr #Everything failed, need to look into outlier issues!!!
rr ##Check GWC check.normality(sd.full$GWC, )
Shapiro-Wilk normality test
data: var.test
W = 0.9821, p-value = 0.04813
rr #Faild normality check with a p = 0.04813 trans.data(sd.full$GWC, )
Shapiro-Wilk normality test
data: var.test
W = 0.96059, p-value = 0.0002779
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.97662, p-value = 0.01165
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.98347, p-value = 0.06912
rr #Tukey Ladder of powers worked, but looks a little suspect
rr ##Check per_clay check.normality(sd.full$per_clay, % Clay)
Shapiro-Wilk normality test
data: var.test
W = 0.93231, p-value = 1.443e-06
rr #Clay has a non-normal distribution trans.data(sd.full$per_clay, % Clay)
Shapiro-Wilk normality test
data: var.test
W = 0.97108, p-value = 0.002974
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.99235, p-value = 0.6042
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.99242, p-value = 0.6126
rr #Tukey Ladder of Powers worked, need to figure out how to un-transform data for later
rr ##Check per_sand check.normality(sd.full$per_sand, % Sand)
Shapiro-Wilk normality test
data: var.test
W = 0.96327, p-value = 0.0004959
rr #Sand has a non-normal distribution trans.data(sd.full$per_sand, % Sand)
Shapiro-Wilk normality test
data: var.test
W = 0.98237, p-value = 0.05167
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.9792, p-value = 0.02257
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.98265, p-value = 0.05566
rr #log transformation results IN NORMALITY
rr ##Check per_silt check.normality(sd.full$per_silt, % Silt)
Shapiro-Wilk normality test
data: var.test
W = 0.97573, p-value = 0.009288
rr #Silt has a non-normal distribution trans.data(sd.full$per_silt, % Silt)
Shapiro-Wilk normality test
data: var.test
W = 0.94649, p-value = 1.694e-05
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.95801, p-value = 0.0001615
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.99406, p-value = 0.7988
rr #Tukey’s ladder resulted in NORMALITY, but I need to work out how to untransform data later
rr ##Check Al check.normality(sd.full$Al, )
Shapiro-Wilk normality test
data: var.test
W = 0.98898, p-value = 0.2863
rr #Al has a normal distribution
rr ##Check Ca check.normality(sd.full$Ca, )
Shapiro-Wilk normality test
data: var.test
W = 0.98503, p-value = 0.1041
rr #Ca has a normal distribution
rr ##Check Co check.normality(sd.full$Co, )
Shapiro-Wilk normality test
data: var.test
W = 0.95129, p-value = 4.198e-05
rr #Co has a non-normal distribution trans.data(sd.full$Co, )
Shapiro-Wilk normality test
data: var.test
W = 0.98408, p-value = 0.08117
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.99471, p-value = 0.8639
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.99467, p-value = 0.8605
rr #log transformation results IN NORMALITY (but the QQ plot looks a little odd)
rr ##Check Cr check.normality(sd.full$Cr, )
Shapiro-Wilk normality test
data: var.test
W = 0.83472, p-value = 1.023e-11
rr #Cr has a non-normal distribution trans.data(sd.full$Cr, )
Shapiro-Wilk normality test
data: var.test
W = 0.97805, p-value = 0.01675
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.95149, p-value = 4.364e-05
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.9841, p-value = 0.08154
rr #Tukey transformation works - Still looks a little suspect, but it passed the normality check.
rr ##Check Cu check.normality(sd.full$Cu, )
Shapiro-Wilk normality test
data: var.test
W = 0.90465, p-value = 2.442e-08
rr #Cu has a non-normal distribution trans.data(sd.full$Cu, )
Shapiro-Wilk normality test
data: var.test
W = 0.90823, p-value = 3.964e-08
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.94613, p-value = 1.583e-05
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.94998, p-value = 3.261e-05
rr #Still non-normal after Tukey Transformation… there is also a suspicous cluster and some outliers to check out
rr ##Check Fe check.normality(sd.full$Fe, )
Shapiro-Wilk normality test
data: var.test
W = 0.97387, p-value = 0.005856
rr #Fe has a non-normal distribution trans.data(sd.full$Fe, )
Shapiro-Wilk normality test
data: var.test
W = 0.98979, p-value = 0.3484
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.98858, p-value = 0.2598
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.98986, p-value = 0.3535
rr #log transformation results IN NORMALITY
rr ##Check K check.normality(sd.full$K, )
Shapiro-Wilk normality test
data: var.test
W = 0.62872, p-value < 2.2e-16
rr #K has a non-normal distribution and one possible outlier trans.data(sd.full$K, )
Shapiro-Wilk normality test
data: var.test
W = 0.93145, p-value = 1.254e-06
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.86667, p-value = 2.557e-10
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.9619, p-value = 0.0003677
rr #Still non-normal after Tukey Transformation… there is also a suspicous cluster and some outliers to check out
rr ##Check Mg check.normality(sd.full$Mg, )
Shapiro-Wilk normality test
data: var.test
W = 0.97901, p-value = 0.02147
rr #Mg has a non-normal distribution trans.data(sd.full$Mg, )
Shapiro-Wilk normality test
data: var.test
W = 0.9548, p-value = 8.387e-05
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.97968, p-value = 0.02555
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.98689, p-value = 0.1693
rr ##Check Mn check.normality(sd.full$Mn, )
Shapiro-Wilk normality test
data: var.test
W = 0.90674, p-value = 3.236e-08
rr #Mn has a non-normal distribution trans.data(sd.full$Mn, )
Shapiro-Wilk normality test
data: var.test
W = 0.99082, p-value = 0.4402
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.97561, p-value = 0.009016
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.993, p-value = 0.6786
rr #log transformation results IN NORMALITY
rr ##Check Na check.normality(sd.full$Na, )
Shapiro-Wilk normality test
data: var.test
W = 0.96158, p-value = 0.0003434
rr #Na has a non-normal distribution trans.data(sd.full$Na, )
Shapiro-Wilk normality test
data: var.test
W = 0.97797, p-value = 0.01642
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.9905, p-value = 0.4102
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.99076, p-value = 0.4351
rr ##Check Ni check.normality(sd.full$Ni, )
Shapiro-Wilk normality test
data: var.test
W = 0.87582, p-value = 7.047e-10
rr #Ni has a non-normal distribution trans.data(sd.full$Ni, )
Shapiro-Wilk normality test
data: var.test
W = 0.96649, p-value = 0.001017
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.94271, p-value = 8.516e-06
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.99185, p-value = 0.5484
rr ##Check P check.normality(sd.full$P, )
Shapiro-Wilk normality test
data: var.test
W = 0.77093, p-value = 4.937e-14
rr #P has a non-normal distribution trans.data(sd.full$P, )
Shapiro-Wilk normality test
data: var.test
W = 0.95551, p-value = 9.669e-05
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.91057, p-value = 5.477e-08
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.97156, p-value = 0.003332
rr ##Check S check.normality(sd.full$S, )
Shapiro-Wilk normality test
data: var.test
W = 0.64874, p-value < 2.2e-16
rr #S has a non-normal distribution trans.data(sd.full$S, )
Shapiro-Wilk normality test
data: var.test
W = 0.86506, p-value = 2.148e-10
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.80334, p-value = 6.39e-13
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.95928, p-value = 0.0002104
rr #log transformation does not result in normality, very right skewed/almost bimodal
rr ##Check Ti check.normality(sd.full$Ti, )
Shapiro-Wilk normality test
data: var.test
W = 0.76838, p-value = 4.082e-14
rr #Ti has a non-normal distribution, possible outlier trans.data(sd.full$Ti, )
Shapiro-Wilk normality test
data: var.test
W = 0.92648, p-value = 5.694e-07
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.88947, p-value = 3.51e-09
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.98058, p-value = 0.0323
rr ##Check Zn check.normality(sd.full$Zn, )
Shapiro-Wilk normality test
data: var.test
W = 0.97875, p-value = 0.02007
rr #Zn has a non-normal distribution trans.data(sd.full$Zn, )
Shapiro-Wilk normality test
data: var.test
W = 0.96273, p-value = 0.0004405
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.98088, p-value = 0.03494
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.98486, p-value = 0.09957
rr #log transformation does not result in normality, ONE EXTREMELY SUSPECT OUTLIER
rr ##Check per_som check.normality(sd.full$per_som, % SOM)
Shapiro-Wilk normality test
data: var.test
W = 0.9842, p-value = 0.08371
rr #per_som has a normal distribution
rr ##Check PerN check.normality(sd.full$PerN, % N)
Shapiro-Wilk normality test
data: var.test
W = 0.95913, p-value = 0.0002041
rr #PerN has a non-normal distribution, one data point looks slightly suspect (outlier) trans.data(sd.full$PerN, % N)
Shapiro-Wilk normality test
data: var.test
W = 0.89562, p-value = 7.536e-09
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.92663, p-value = 5.824e-07
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.96338, p-value = 0.0005083
rr ##Check PerC check.normality(sd.full$PerC, % C)
Shapiro-Wilk normality test
data: var.test
W = 0.95882, p-value = 0.000191
rr #PerC has a non-normal distribution, one data point looks slightly suspect (outlier) trans.data(sd.full$PerC, % C)
Shapiro-Wilk normality test
data: var.test
W = 0.8817, p-value = 1.387e-09
Shapiro-Wilk normality test
data: var.test
W = 0.92996, p-value = 9.86e-07
Shapiro-Wilk normality test
data: var.test
W = 0.9195, p-value = 1.971e-07
if (lambda > 0){TRANS = x ^ lambda}
if (lambda == 0){TRANS = log(x)}
if (lambda < 0){TRANS = -1 * x ^ lambda}
Shapiro-Wilk normality test
data: var.test
W = 0.96472, p-value = 0.0006828
rr ##Check CNRatio check.normality(sd.full$CNRatio, /N)
Shapiro-Wilk normality test
data: var.test
W = 0.98989, p-value = 0.3564
rr #CNRatio has a normal distribution
#Create variogram models for each variable
rr #There are several variogram models to choose from #vgm(c(,, , , , , , , , , , , , , , , )))
fit.vario <- function(var, dataframe, var.name) { lzn.vgm.var <- variogram((var)~lon+lat, sd.full) lzn.fit.var <- fit.variogram(lzn.vgm.var, model= vgm(c(,, , , , ))) a <- plot(main=var.name, lzn.vgm.var, lzn.fit.var) print(a) assign(paste(, .var, sep = .), lzn.fit.var, .GlobalEnv) }
fit.vario(sd.full$pH, sd.full, )
No convergence after 200 iterations: try different initial values?No convergence after 200 iterations: try different initial values?No convergence after 200 iterations: try different initial values?No convergence after 200 iterations: try different initial values?singular model in variogram fit
rr lzn.vgm.var <- variogram((sd.full$pH)~lon+lat, sd.full) lzn.fit.var <- fit.variogram(lzn.vgm.var, model= vgm(c(,, , , , )))
No convergence after 200 iterations: try different initial values?No convergence after 200 iterations: try different initial values?No convergence after 200 iterations: try different initial values?No convergence after 200 iterations: try different initial values?singular model in variogram fit
rr # a <- plot(main=, lzn.vgm.var, lzn.fit.var) # print(lzn.fit.var) # print(a)
rr #Create grids from the variograms to perfrom ordinary kriging - this will help us map the variables in QGIS ##Determine range of lat and lon bbox(sd.full)
min max
lon -94.23546 -94.23402
lat 36.06563 36.06665
rr ##Create a grid to esimate values over x_range <- as.numeric(c(-94.23546, -94.23402)) y_range <- as.numeric(c(36.06563, 36.06665)) # create an empty grid of values ranging from the xmin-xmax, ymin-ymax sample.grid <- expand.grid(x = seq(from = x_range[1], to = x_range[2], length.out=30), y = seq(from = y_range[1], to = y_range[2], length.out=30)) # expand points to grid class(sample.grid)
[1] \data.frame\
rr sample.grid$number=seq(from=1, to=900, length.out=900) plot1 <- sd.full %>% as.data.frame %>% ggplot(aes(lat, lon)) + geom_point(size=.25) + coord_equal() + ggtitle(with measurements) # this is clearly gridded over the region of interest plot2 <- sample.grid %>% as.data.frame %>% ggplot(aes(y, x)) + geom_point(size=.25) + coord_equal() + ggtitle(at which to estimate) library(gridExtra) grid.arrange(plot1, plot2, ncol=2)
rr
coordinates(sample.grid) <- ~ x + y
lzn.kriged.ph <- krige(pH~1, sd.full, sample.grid, model=lzn.fit.var)
[using ordinary kriging]
rr lzn.kriged.ph %>% as.data.frame %>% ggplot(aes(x=x, y=y)) + geom_tile(aes(fill=var1.pred)) + coord_equal() + scale_fill_gradient(low = , high=) + scale_x_continuous() + scale_y_continuous() + theme_bw()
rr
kriged.ph <- as.data.frame(lzn.kriged.ph) write.csv(kriged.ph, ./output/kriged_ph.csv, row.names=FALSE)