#Environmental Data Stats

Set-up workspace

library(sp)
library(gstat)
library(dplyr)
library(ggplot2)
library(ggpubr)
library(rcompanion)

#Read in environmental data
data <- read.csv("./soils_data_w2_parse.csv")

#Save the summary data as a csv file in output
sum <- summary(data)
write.csv(sum, "./output/enviro_summary_data.csv")

#Remove all data we dont need (like cluster, name, elements below detection levels)
vars <- c("lon","lat","pH","EC","GWC","per_clay","per_sand","per_silt",
          "Al","Ca","Co","Cr","Cu","Fe","K","Mg","Mn","Na","Ni","P","S",
          "Ti","Zn","per_som","PerN","PerC","CNRatio","Ele")
sd.full <- data[,vars]

#Create a SPDF class object so that the data is clearly distinguished from the coordinates
coordinates(sd.full) <- ~ lon + lat

#Check the distribution of each variable - looking for normality

There are several variables that have non-normal distributions. The first step to resolve this before applying a variogram is to normalize our data. We can start by log transforming the ones that appear non-normal and re-running our normality checks.

#For each soil property, run this program, changing the names accordingly
check.normality <- function(var.test, var.name)
{  plot(var.test,
       main = var.name,
       xlab = "Sample Number",
       ylab = var.name)
  hist(var.test,
       main = paste("Histogram of", var.name),
       breaks = 50)
  c <- ggdensity(var.test,
            main = paste("Density of", var.name),
            xlab = paste(var.name))
  d <- ggqqplot(var.test,
            main = paste("QQ plot for", var.name),
            xlab = paste(var.name))
  plot(c)
  plot(d)
  print(shapiro.test(var.test))}

#For those that need to be transformed, run this code
trans.data <- function(var.test, var.name)
{##Check untransformed variable
#check.normality(var.test, var.name)
#Log transformation:
var.log <- log(var.test)
check.normality(var.log, paste("Log of", var.name))
#Square root transformation:
var.sq <- sqrt(sd.full$Cr)
check.normality(var.sq, paste("Square Root of", var.name))
#Cube root transformation
var.cub <- sign(var.test) * abs(var.test)^(1/3)
check.normality(var.cub, paste("Cube Root of", var.name))
#Tukey's Ladder of Powers transformation
var.tuk <- transformTukey(var.test, plotit = FALSE)
check.normality(var.tuk, paste("Tukey Transformed", var.name))
#Still looks a little suspect, but it passed the normality check.
}

rr ##Check pH check.normality(sd.full$pH, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.99152, p-value = 0.5124

rr #pH has a normal distribution

rr ##Check EC check.normality(sd.full$EC, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.83261, p-value = 8.399e-12

rr #EC has a non-normal distribution trans.data(sd.full$EC, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.91545, p-value = 1.091e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.94079, p-value = 6.061e-06

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.94207, p-value = 7.592e-06

rr #Everything failed, need to look into outlier issues!!!

rr ##Check GWC check.normality(sd.full$GWC, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.9821, p-value = 0.04813

rr #Faild normality check with a p = 0.04813 trans.data(sd.full$GWC, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.96059, p-value = 0.0002779


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.97662, p-value = 0.01165

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.98347, p-value = 0.06912

rr #Tukey Ladder of powers worked, but looks a little suspect

rr ##Check per_clay check.normality(sd.full$per_clay, % Clay)


    Shapiro-Wilk normality test

data:  var.test
W = 0.93231, p-value = 1.443e-06

rr #Clay has a non-normal distribution trans.data(sd.full$per_clay, % Clay)


    Shapiro-Wilk normality test

data:  var.test
W = 0.97108, p-value = 0.002974


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.99235, p-value = 0.6042

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.99242, p-value = 0.6126

rr #Tukey Ladder of Powers worked, need to figure out how to un-transform data for later

rr ##Check per_sand check.normality(sd.full$per_sand, % Sand)


    Shapiro-Wilk normality test

data:  var.test
W = 0.96327, p-value = 0.0004959

rr #Sand has a non-normal distribution trans.data(sd.full$per_sand, % Sand)


    Shapiro-Wilk normality test

data:  var.test
W = 0.98237, p-value = 0.05167


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.9792, p-value = 0.02257

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.98265, p-value = 0.05566

rr #log transformation results IN NORMALITY

rr ##Check per_silt check.normality(sd.full$per_silt, % Silt)


    Shapiro-Wilk normality test

data:  var.test
W = 0.97573, p-value = 0.009288

rr #Silt has a non-normal distribution trans.data(sd.full$per_silt, % Silt)


    Shapiro-Wilk normality test

data:  var.test
W = 0.94649, p-value = 1.694e-05


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.95801, p-value = 0.0001615

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.99406, p-value = 0.7988

rr #Tukey’s ladder resulted in NORMALITY, but I need to work out how to untransform data later

rr ##Check Al check.normality(sd.full$Al, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.98898, p-value = 0.2863

rr #Al has a normal distribution

rr ##Check Ca check.normality(sd.full$Ca, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.98503, p-value = 0.1041

rr #Ca has a normal distribution

rr ##Check Co check.normality(sd.full$Co, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.95129, p-value = 4.198e-05

rr #Co has a non-normal distribution trans.data(sd.full$Co, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.98408, p-value = 0.08117


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.99471, p-value = 0.8639

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.99467, p-value = 0.8605

rr #log transformation results IN NORMALITY (but the QQ plot looks a little odd)

rr ##Check Cr check.normality(sd.full$Cr, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.83472, p-value = 1.023e-11

rr #Cr has a non-normal distribution trans.data(sd.full$Cr, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.97805, p-value = 0.01675


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.95149, p-value = 4.364e-05

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.9841, p-value = 0.08154

rr #Tukey transformation works - Still looks a little suspect, but it passed the normality check.

rr ##Check Cu check.normality(sd.full$Cu, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.90465, p-value = 2.442e-08

rr #Cu has a non-normal distribution trans.data(sd.full$Cu, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.90823, p-value = 3.964e-08


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.94613, p-value = 1.583e-05

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.94998, p-value = 3.261e-05

rr #Still non-normal after Tukey Transformation… there is also a suspicous cluster and some outliers to check out

rr ##Check Fe check.normality(sd.full$Fe, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.97387, p-value = 0.005856

rr #Fe has a non-normal distribution trans.data(sd.full$Fe, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.98979, p-value = 0.3484


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.98858, p-value = 0.2598

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.98986, p-value = 0.3535

rr #log transformation results IN NORMALITY

rr ##Check K check.normality(sd.full$K, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.62872, p-value < 2.2e-16

rr #K has a non-normal distribution and one possible outlier trans.data(sd.full$K, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.93145, p-value = 1.254e-06


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.86667, p-value = 2.557e-10

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.9619, p-value = 0.0003677

rr #Still non-normal after Tukey Transformation… there is also a suspicous cluster and some outliers to check out

rr ##Check Mg check.normality(sd.full$Mg, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.97901, p-value = 0.02147

rr #Mg has a non-normal distribution trans.data(sd.full$Mg, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.9548, p-value = 8.387e-05


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.97968, p-value = 0.02555

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.98689, p-value = 0.1693

rr ##Check Mn check.normality(sd.full$Mn, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.90674, p-value = 3.236e-08

rr #Mn has a non-normal distribution trans.data(sd.full$Mn, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.99082, p-value = 0.4402


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.97561, p-value = 0.009016

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.993, p-value = 0.6786

rr #log transformation results IN NORMALITY

rr ##Check Na check.normality(sd.full$Na, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.96158, p-value = 0.0003434

rr #Na has a non-normal distribution trans.data(sd.full$Na, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.97797, p-value = 0.01642


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.9905, p-value = 0.4102

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.99076, p-value = 0.4351

rr ##Check Ni check.normality(sd.full$Ni, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.87582, p-value = 7.047e-10

rr #Ni has a non-normal distribution trans.data(sd.full$Ni, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.96649, p-value = 0.001017


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.94271, p-value = 8.516e-06

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.99185, p-value = 0.5484

rr ##Check P check.normality(sd.full$P, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.77093, p-value = 4.937e-14

rr #P has a non-normal distribution trans.data(sd.full$P, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.95551, p-value = 9.669e-05


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.91057, p-value = 5.477e-08

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.97156, p-value = 0.003332

rr ##Check S check.normality(sd.full$S, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.64874, p-value < 2.2e-16

rr #S has a non-normal distribution trans.data(sd.full$S, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.86506, p-value = 2.148e-10


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.80334, p-value = 6.39e-13

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.95928, p-value = 0.0002104

rr #log transformation does not result in normality, very right skewed/almost bimodal

rr ##Check Ti check.normality(sd.full$Ti, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.76838, p-value = 4.082e-14

rr #Ti has a non-normal distribution, possible outlier trans.data(sd.full$Ti, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.92648, p-value = 5.694e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.88947, p-value = 3.51e-09

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.98058, p-value = 0.0323

rr ##Check Zn check.normality(sd.full$Zn, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.97875, p-value = 0.02007

rr #Zn has a non-normal distribution trans.data(sd.full$Zn, )


    Shapiro-Wilk normality test

data:  var.test
W = 0.96273, p-value = 0.0004405


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.98088, p-value = 0.03494

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.98486, p-value = 0.09957

rr #log transformation does not result in normality, ONE EXTREMELY SUSPECT OUTLIER

rr ##Check per_som check.normality(sd.full$per_som, % SOM)


    Shapiro-Wilk normality test

data:  var.test
W = 0.9842, p-value = 0.08371

rr #per_som has a normal distribution

rr ##Check PerN check.normality(sd.full$PerN, % N)


    Shapiro-Wilk normality test

data:  var.test
W = 0.95913, p-value = 0.0002041

rr #PerN has a non-normal distribution, one data point looks slightly suspect (outlier) trans.data(sd.full$PerN, % N)


    Shapiro-Wilk normality test

data:  var.test
W = 0.89562, p-value = 7.536e-09


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.92663, p-value = 5.824e-07

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.96338, p-value = 0.0005083

rr ##Check PerC check.normality(sd.full$PerC, % C)


    Shapiro-Wilk normality test

data:  var.test
W = 0.95882, p-value = 0.000191

rr #PerC has a non-normal distribution, one data point looks slightly suspect (outlier) trans.data(sd.full$PerC, % C)


    Shapiro-Wilk normality test

data:  var.test
W = 0.8817, p-value = 1.387e-09


    Shapiro-Wilk normality test

data:  var.test
W = 0.92996, p-value = 9.86e-07


    Shapiro-Wilk normality test

data:  var.test
W = 0.9195, p-value = 1.971e-07

if (lambda >  0){TRANS = x ^ lambda} 
if (lambda == 0){TRANS = log(x)} 
if (lambda <  0){TRANS = -1 * x ^ lambda} 


    Shapiro-Wilk normality test

data:  var.test
W = 0.96472, p-value = 0.0006828

rr ##Check CNRatio check.normality(sd.full$CNRatio, /N)


    Shapiro-Wilk normality test

data:  var.test
W = 0.98989, p-value = 0.3564

rr #CNRatio has a normal distribution

#Create variogram models for each variable

rr #There are several variogram models to choose from #vgm(c(,, , , , , , , , , , , , , , , )))

fit.vario <- function(var, dataframe, var.name) { lzn.vgm.var <- variogram((var)~lon+lat, sd.full) lzn.fit.var <- fit.variogram(lzn.vgm.var, model= vgm(c(,, , , , ))) a <- plot(main=var.name, lzn.vgm.var, lzn.fit.var) print(a) assign(paste(, .var, sep = .), lzn.fit.var, .GlobalEnv) }

fit.vario(sd.full$pH, sd.full, )

No convergence after 200 iterations: try different initial values?No convergence after 200 iterations: try different initial values?No convergence after 200 iterations: try different initial values?No convergence after 200 iterations: try different initial values?singular model in variogram fit

rr lzn.vgm.var <- variogram((sd.full$pH)~lon+lat, sd.full) lzn.fit.var <- fit.variogram(lzn.vgm.var, model= vgm(c(,, , , , )))

No convergence after 200 iterations: try different initial values?No convergence after 200 iterations: try different initial values?No convergence after 200 iterations: try different initial values?No convergence after 200 iterations: try different initial values?singular model in variogram fit

rr # a <- plot(main=, lzn.vgm.var, lzn.fit.var) # print(lzn.fit.var) # print(a)

rr #Create grids from the variograms to perfrom ordinary kriging - this will help us map the variables in QGIS ##Determine range of lat and lon bbox(sd.full)

          min       max
lon -94.23546 -94.23402
lat  36.06563  36.06665

rr ##Create a grid to esimate values over x_range <- as.numeric(c(-94.23546, -94.23402)) y_range <- as.numeric(c(36.06563, 36.06665)) # create an empty grid of values ranging from the xmin-xmax, ymin-ymax sample.grid <- expand.grid(x = seq(from = x_range[1], to = x_range[2], length.out=30), y = seq(from = y_range[1], to = y_range[2], length.out=30)) # expand points to grid class(sample.grid)

[1] \data.frame\

rr sample.grid$number=seq(from=1, to=900, length.out=900) plot1 <- sd.full %>% as.data.frame %>% ggplot(aes(lat, lon)) + geom_point(size=.25) + coord_equal() + ggtitle(with measurements) # this is clearly gridded over the region of interest plot2 <- sample.grid %>% as.data.frame %>% ggplot(aes(y, x)) + geom_point(size=.25) + coord_equal() + ggtitle(at which to estimate) library(gridExtra) grid.arrange(plot1, plot2, ncol=2)

rr

coordinates(sample.grid) <- ~ x + y

lzn.kriged.ph <- krige(pH~1, sd.full, sample.grid, model=lzn.fit.var)

[using ordinary kriging]

rr lzn.kriged.ph %>% as.data.frame %>% ggplot(aes(x=x, y=y)) + geom_tile(aes(fill=var1.pred)) + coord_equal() + scale_fill_gradient(low = , high=) + scale_x_continuous() + scale_y_continuous() + theme_bw()

rr

kriged.ph <- as.data.frame(lzn.kriged.ph) write.csv(kriged.ph, ./output/kriged_ph.csv, row.names=FALSE)

---
title: "03_01_Explore_Enviro"
output: html_notebook
---

#Environmental Data Stats

Set-up workspace
```{r}
library(sp)
library(gstat)
library(dplyr)
library(ggplot2)
library(ggpubr)
library(rcompanion)

#Read in environmental data
data <- read.csv("./soils_data_w2_parse.csv")

#Save the summary data as a csv file in output
sum <- summary(data)
write.csv(sum, "./output/enviro_summary_data.csv")

#Remove all data we dont need (like cluster, name, elements below detection levels)
vars <- c("lon","lat","pH","EC","GWC","per_clay","per_sand","per_silt",
          "Al","Ca","Co","Cr","Cu","Fe","K","Mg","Mn","Na","Ni","P","S",
          "Ti","Zn","per_som","PerN","PerC","CNRatio","Ele")
sd.full <- data[,vars]

#Create a SPDF class object so that the data is clearly distinguished from the coordinates
coordinates(sd.full) <- ~ lon + lat
```


#Check the distribution of each variable - looking for normality

There are several variables that have non-normal distributions. The first step to resolve this before applying a variogram is to normalize our data. We can start by log transforming the ones that appear non-normal and re-running our normality checks. 


```{r}
#For each soil property, run this program, changing the names accordingly
check.normality <- function(var.test, var.name)
{  plot(var.test,
       main = var.name,
       xlab = "Sample Number",
       ylab = var.name)
  hist(var.test,
       main = paste("Histogram of", var.name),
       breaks = 50)
  c <- ggdensity(var.test,
            main = paste("Density of", var.name),
            xlab = paste(var.name))
  d <- ggqqplot(var.test,
            main = paste("QQ plot for", var.name),
            xlab = paste(var.name))
  plot(c)
  plot(d)
  print(shapiro.test(var.test))}

#For those that need to be transformed, run this code
trans.data <- function(var.test, var.name)
{##Check untransformed variable
#check.normality(var.test, var.name)
#Log transformation:
var.log <- log(var.test)
check.normality(var.log, paste("Log of", var.name))
#Square root transformation:
var.sq <- sqrt(sd.full$Cr)
check.normality(var.sq, paste("Square Root of", var.name))
#Cube root transformation
var.cub <- sign(var.test) * abs(var.test)^(1/3)
check.normality(var.cub, paste("Cube Root of", var.name))
#Tukey's Ladder of Powers transformation
var.tuk <- transformTukey(var.test, plotit = FALSE)
check.normality(var.tuk, paste("Tukey Transformed", var.name))
#Still looks a little suspect, but it passed the normality check.
}
```

```{r}
##Check pH
check.normality(sd.full$pH, "pH")
#pH has a normal distribution
```

```{r}
##Check EC
check.normality(sd.full$EC, "EC")
#EC has a non-normal distribution
trans.data(sd.full$EC, "EC")
#Everything failed, need to look into outlier issues!!!
```

```{r}
##Check GWC
check.normality(sd.full$GWC, "GWC")
#Faild normality check with a p = 0.04813
trans.data(sd.full$GWC, "GWC")
#Tukey Ladder of powers worked, but looks a little suspect
```

```{r}
##Check per_clay
check.normality(sd.full$per_clay, "% Clay")
#Clay has a non-normal distribution
trans.data(sd.full$per_clay, "% Clay")
#Tukey Ladder of Powers worked, need to figure out how to un-transform data for later
```


```{r}
##Check per_sand
check.normality(sd.full$per_sand, "% Sand")
#Sand has a non-normal distribution
trans.data(sd.full$per_sand, "% Sand")
#log transformation results IN NORMALITY
```

```{r}
##Check per_silt
check.normality(sd.full$per_silt, "% Silt")
#Silt has a non-normal distribution
trans.data(sd.full$per_silt, "% Silt")
#Tukey's ladder resulted in NORMALITY, but I need to work out how to untransform data later
```

```{r}
##Check Al
check.normality(sd.full$Al, "Al")
#Al has a normal distribution
```

```{r}
##Check Ca
check.normality(sd.full$Ca, "Ca")
#Ca has a normal distribution
```

```{r}
##Check Co
check.normality(sd.full$Co, "Co")
#Co has a non-normal distribution
trans.data(sd.full$Co, "Co")
#log transformation results IN NORMALITY (but the QQ plot looks a little odd)
```

```{r}
##Check Cr
check.normality(sd.full$Cr, "Cr")
#Cr has a non-normal distribution
trans.data(sd.full$Cr, "Cr")
#Tukey transformation works - Still looks a little suspect, but it passed the normality check.
```

```{r}
##Check Cu
check.normality(sd.full$Cu, "Cu")
#Cu has a non-normal distribution
trans.data(sd.full$Cu, "Cu")
#Still non-normal after Tukey Transformation... there is also a suspicous cluster and some outliers to check out
```

```{r}
##Check Fe
check.normality(sd.full$Fe, "Fe")
#Fe has a non-normal distribution
trans.data(sd.full$Fe, "Fe")
#log transformation results IN NORMALITY
```

```{r}
##Check K
check.normality(sd.full$K, "K")
#K has a non-normal distribution and one possible outlier
trans.data(sd.full$K, "K")
#Still non-normal after Tukey Transformation... there is also a suspicous cluster and some outliers to check out
```

```{r}
##Check Mg
check.normality(sd.full$Mg, "Mg")
#Mg has a non-normal distribution
trans.data(sd.full$Mg, "Mg")
#Tukey Transformation results in NORMALITY
```

```{r}
##Check Mn
check.normality(sd.full$Mn, "Mn")
#Mn has a non-normal distribution
trans.data(sd.full$Mn, "Mn")
#log transformation results IN NORMALITY
```

```{r}
##Check Na
check.normality(sd.full$Na, "Na")
#Na has a non-normal distribution
trans.data(sd.full$Na, "Na")
#Cube root transformation works to acheive NORMALITY
```

```{r}
##Check Ni
check.normality(sd.full$Ni, "Ni")
#Ni has a non-normal distribution
trans.data(sd.full$Ni, "Ni")
#Tukey Transformation results in NORMALITY
```

```{r}
##Check P
check.normality(sd.full$P, "P")
#P has a non-normal distribution
trans.data(sd.full$P, "P")
#Nothing results in normality, some very suspicous outliers
```

```{r}
##Check S
check.normality(sd.full$S, "S")
#S has a non-normal distribution
trans.data(sd.full$S, "S")
#Nothing works to acheive normality, some suspicous outliers
```

```{r}
##Check Ti
check.normality(sd.full$Ti, "Ti")
#Ti has a non-normal distribution, possible outlier
trans.data(sd.full$Ti, "Ti")
#Nothing results in normality... There is one EXTREMELY suspect outlier
```

```{r}
##Check Zn
check.normality(sd.full$Zn, "Zn")
#Zn has a non-normal distribution
trans.data(sd.full$Zn, "Zn")
#No transformation results in normality, ONE SUSPECT OUTLIER
```

```{r}
##Check per_som
check.normality(sd.full$per_som, "% SOM")
#per_som has a normal distribution
```

```{r}
##Check PerN
check.normality(sd.full$PerN, "% N")
#PerN has a non-normal distribution, one data point looks slightly suspect (outlier)
trans.data(sd.full$PerN, "% N")
#None of the transformations worked, really need to do an outlier check
```

```{r}
##Check PerC
check.normality(sd.full$PerC, "% C")
#PerC has a non-normal distribution, one data point looks slightly suspect (outlier)
trans.data(sd.full$PerC, "% C")
#None of the transformations resulted in normality. Need to check for outliers. 
```

```{r}
##Check CNRatio
check.normality(sd.full$CNRatio, "C/N")
#CNRatio has a normal distribution
```




#Create variogram models for each variable

```{r}
#There are several variogram models to choose from
#vgm(c("Exp","Sph", "Gau", "Mat", "Nug", "Exc", "Ste", "Cir", "Lin", "Bes", "Pen", "Per", "Wav", "Hol", "Log", "Pow", "Spl")))

fit.vario <- function(var, dataframe, var.name)
{
lzn.vgm.var <- variogram((var)~lon+lat, sd.full)
lzn.fit.var <- fit.variogram(lzn.vgm.var, model=
                              vgm(c("Exp","Sph", "Gau", "Mat", "Nug", "Exc")))
a <- plot(main=var.name, lzn.vgm.var, lzn.fit.var)
print(a)
assign(paste("pH", "fit.var", sep = "."), lzn.fit.var, .GlobalEnv)
}

fit.vario(sd.full$pH, sd.full, "pH")

 lzn.vgm.var <- variogram((sd.full$pH)~lon+lat, sd.full)
 lzn.fit.var <- fit.variogram(lzn.vgm.var, model=
                               vgm(c("Exp","Sph", "Gau", "Mat", "Nug", "Exc")))
# a <- plot(main="pH", lzn.vgm.var, lzn.fit.var)
# print(lzn.fit.var)
# print(a)

```

```{r}
#Create grids from the variograms to perfrom ordinary kriging - this will help us map the variables in QGIS
##Determine range of lat and lon
bbox(sd.full)

##Create a grid to esimate values over
x_range <- as.numeric(c(-94.23546, -94.23402))
y_range <- as.numeric(c(36.06563, 36.06665))
# create an empty grid of values ranging from the xmin-xmax, ymin-ymax
sample.grid <- expand.grid(x = seq(from = x_range[1],
                                   to = x_range[2], 
                                   length.out=30),
                           y = seq(from = y_range[1],                                           to = y_range[2], 
                                   length.out=30))  # expand points to grid
class(sample.grid)

sample.grid$number=seq(from=1, to=900, length.out=900)
plot1 <- sd.full %>% as.data.frame %>%
  ggplot(aes(lat, lon)) + geom_point(size=.25) + coord_equal() + 
  ggtitle("Points with measurements")
# this is clearly gridded over the region of interest
plot2 <- sample.grid %>% as.data.frame %>%
  ggplot(aes(y, x)) + geom_point(size=.25) + coord_equal() + 
  ggtitle("Points at which to estimate")
library(gridExtra)
grid.arrange(plot1, plot2, ncol=2)

coordinates(sample.grid) <- ~ x + y

lzn.kriged.ph <- krige(pH~1, sd.full, sample.grid, model=lzn.fit.var)

lzn.kriged.ph %>% as.data.frame %>%
  ggplot(aes(x=x, y=y)) + geom_tile(aes(fill=var1.pred)) + coord_equal() +
  scale_fill_gradient(low = "yellow", high="red") +
  scale_x_continuous() + scale_y_continuous() +
  theme_bw()

kriged.ph <- as.data.frame(lzn.kriged.ph)
write.csv(kriged.ph, "./output/kriged_ph.csv", row.names=FALSE)
```

