##data analysis
setwd("C:/Users/ywang/Dropbox/R/Deptford/RIR")
fig<-read.csv("Soil_Site.csv")
#only select interested elements, aluminum,arsenic,beryllium,iron,lead,vanadium
fig.bg<-fig[c(2,4,9,28,11,24, 8, 17)]
    
#obtain summary statistics 
#install.packages("psych")
library("psych")
## Warning: package 'psych' was built under R version 3.2.4
#summary statistics for all data
summary<-describe(fig.bg) 
summary
##            vars   n     mean       sd   median  trimmed      mad    min
## FieldID*      1 120    60.50    34.79    60.50    60.50    44.48    1.0
## StartDepth    2 120     2.55     3.89     1.00     1.60     1.48    0.0
## Arsenic       3 112    15.36    16.83    11.40    12.68     9.79    2.5
## Vanadium      4 112    44.22    21.97    41.80    42.03    26.61   14.7
## Beryllium     5 114     0.58     0.28     0.57     0.57     0.38    0.2
## Lead          6 115    95.03   193.48    12.60    47.96    13.05    2.0
## Aluminum      7 112  8721.88  4532.85  7875.00  8358.56  4959.30 2080.0
## Iron          8 112 22545.71 12511.06 21400.00 21342.11 13714.05 5660.0
##                max   range skew kurtosis      se
## FieldID*     120.0   119.0 0.00    -1.23    3.18
## StartDepth    21.0    21.0 2.51     6.32    0.35
## Arsenic      148.0   145.5 4.74    32.52    1.59
## Vanadium     118.0   103.3 0.80     0.28    2.08
## Beryllium      1.2     1.0 0.28    -1.17    0.03
## Lead        1240.0  1238.0 3.49    14.07   18.04
## Aluminum   22800.0 20720.0 0.62    -0.36  428.31
## Iron       72000.0 66340.0 1.05     1.57 1182.18
write.csv(summary,"summary.csv")

#outlier test
#using the boxplot function in car library to make the plot showing the outliers 
library(car)
## Warning: package 'car' was built under R version 3.2.4
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
par(mfrow=c(2,3),oma=c(1,1,1,1),mar=c(2,4,2,1))
for (i in 3:ncol(fig.bg))
{
Boxplot(fig.bg[,i], id.method=c("y"),ylab=colnames(fig.bg[i]))
}

# using rosnerTest in EnvStats package.
library(EnvStats)
## Warning: package 'EnvStats' was built under R version 3.2.4
## 
## Attaching package: 'EnvStats'
## The following object is masked from 'package:car':
## 
##     qqPlot
## The following objects are masked from 'package:stats':
## 
##     predict, predict.lm
## The following object is masked from 'package:base':
## 
##     print.default
i=3 # arsenic
  rosnerTest(fig.bg[,i], k = 6, alpha = 0.05, warn = TRUE)
## Warning in rosnerTest(fig.bg[, i], k = 6, alpha = 0.05, warn = TRUE): 8
## observations with NA/NaN/Inf in 'x' removed.
## 
## Results of Outlier Test
## -------------------------
## 
## Test Method:                     Rosner's Test for Outliers
## 
## Hypothesized Distribution:       Normal
## 
## Data:                            fig.bg[, i]
## 
## Number NA/NaN/Inf's Removed:     8
## 
## Sample Size:                     112
## 
## Test Statistics:                 R.1 = 7.880343
##                                  R.2 = 4.251184
##                                  R.3 = 3.747481
##                                  R.4 = 3.671153
##                                  R.5 = 3.789347
##                                  R.6 = 2.585476
## 
## Test Statistic Parameter:        k = 6
## 
## Alternative Hypothesis:          Up to 6 observations are not
##                                  from the same Distribution.
## 
## Type I Error:                    5%
## 
## Number of Outliers Detected:     5
## 
##   i   Mean.i      SD.i Value Obs.Num    R.i+1 lambda.i+1 Outlier
## 1 0 15.35982 16.831777 148.0      78 7.880343   3.422302    TRUE
## 2 1 14.16486 11.158101  61.6      79 4.251184   3.419309    TRUE
## 3 2 13.73364 10.237907  52.1      14 3.747481   3.416284    TRUE
## 4 3 13.38165  9.593267  48.6      75 3.671153   3.413225    TRUE
## 5 4 13.05556  9.010641  47.2      28 3.789347   3.410133    TRUE
## 6 5 12.73645  8.417618  34.5      45 2.585476   3.407006   FALSE
i=4 # vanadium
  rosnerTest(fig.bg[,i], k = 6, alpha = 0.05, warn = TRUE)
## Warning in rosnerTest(fig.bg[, i], k = 6, alpha = 0.05, warn = TRUE): 8
## observations with NA/NaN/Inf in 'x' removed.
## 
## Results of Outlier Test
## -------------------------
## 
## Test Method:                     Rosner's Test for Outliers
## 
## Hypothesized Distribution:       Normal
## 
## Data:                            fig.bg[, i]
## 
## Number NA/NaN/Inf's Removed:     8
## 
## Sample Size:                     112
## 
## Test Statistics:                 R.1 = 3.357739
##                                  R.2 = 2.842768
##                                  R.3 = 2.656002
##                                  R.4 = 2.479551
##                                  R.5 = 2.492585
##                                  R.6 = 2.446946
## 
## Test Statistic Parameter:        k = 6
## 
## Alternative Hypothesis:          Up to 6 observations are not
##                                  from the same Distribution.
## 
## Type I Error:                    5%
## 
## Number of Outliers Detected:     0
## 
##   i   Mean.i     SD.i Value Obs.Num    R.i+1 lambda.i+1 Outlier
## 1 0 44.21607 21.97429 118.0      62 3.357739   3.422302   FALSE
## 2 1 43.55135 20.91224 103.0      26 2.842768   3.419309   FALSE
## 3 2 43.01091 20.21425  96.7      92 2.656002   3.416284   FALSE
## 4 3 42.51835 19.63325  91.2      28 2.479551   3.413225   FALSE
## 5 4 42.06759 19.14976  89.8      64 2.492585   3.410133   FALSE
## 6 5 41.62150 18.66756  87.3      74 2.446946   3.407006   FALSE
# Arsenic Outlier: as row 78,79, 14,75,28; sample MW-5A, MW-5B, AOC-1-14B,AOC-1-22, MW-3C
# outliers for aluminum and iron were not removed since these are crustal elemetns.
# Outliers for lead was not removed because it is believed from site source.
# remove the outlier data for arsenic only

fig.bg1<-fig.bg [-c(78,79, 14,75,28),]

#obtain summary statistics for arsenic without outliers
library("psych")
mat.as<-describe(fig.bg1[,3]) 
mat.as
##   vars   n  mean   sd median trimmed  mad min  max range skew kurtosis
## 1    1 107 12.74 8.42   10.9   11.92 9.34 2.5 34.5    32 0.68    -0.61
##     se
## 1 0.81
write.csv(mat.as,"mat.as.csv")

#plot to see the inter-relationship among elements
pairs.panels(fig.bg[c(3,4,5,6,7,8)], gap = 0) # all data

pairs.panels(fig.bg1[c(3,4,5,6,7,8)], gap = 0) # after removing outliers

#there are good correlations between aluminum,iron, beryllium, Arsenic and Vanadium


cor.test(fig.bg1$Arsenic,fig.bg1$Aluminum)
## 
## Results of Hypothesis Test
## --------------------------
## 
## Null Hypothesis:                 correlation = 0
## 
## Alternative Hypothesis:          True correlation is not equal to 0
## 
## Test Name:                       Pearson's product-moment correlation
## 
## Estimated Parameter(s):          cor = 0.8822266
## 
## Data:                            fig.bg1$Arsenic and fig.bg1$Aluminum
## 
## Test Statistic:                  t = 19.20062
## 
## Test Statistic Parameter:        df = 105
## 
## P-value:                         0
## 
## 95% Confidence Interval:         LCL = 0.8316716
##                                  UCL = 0.9182756
cor.test(fig.bg1$Arsenic,fig.bg1$Iron)
## 
## Results of Hypothesis Test
## --------------------------
## 
## Null Hypothesis:                 correlation = 0
## 
## Alternative Hypothesis:          True correlation is not equal to 0
## 
## Test Name:                       Pearson's product-moment correlation
## 
## Estimated Parameter(s):          cor = 0.7584484
## 
## Data:                            fig.bg1$Arsenic and fig.bg1$Iron
## 
## Test Statistic:                  t = 11.92479
## 
## Test Statistic Parameter:        df = 105
## 
## P-value:                         0
## 
## 95% Confidence Interval:         LCL = 0.6642389
##                                  UCL = 0.8289411
cor.test(fig.bg1$Vanadium,fig.bg1$Aluminum)
## 
## Results of Hypothesis Test
## --------------------------
## 
## Null Hypothesis:                 correlation = 0
## 
## Alternative Hypothesis:          True correlation is not equal to 0
## 
## Test Name:                       Pearson's product-moment correlation
## 
## Estimated Parameter(s):          cor = 0.838206
## 
## Data:                            fig.bg1$Vanadium and fig.bg1$Aluminum
## 
## Test Statistic:                  t = 15.74951
## 
## Test Statistic Parameter:        df = 105
## 
## P-value:                         0
## 
## 95% Confidence Interval:         LCL = 0.7710534
##                                  UCL = 0.8869196
cor.test(fig.bg1$Vanadium,fig.bg1$Iron)
## 
## Results of Hypothesis Test
## --------------------------
## 
## Null Hypothesis:                 correlation = 0
## 
## Alternative Hypothesis:          True correlation is not equal to 0
## 
## Test Name:                       Pearson's product-moment correlation
## 
## Estimated Parameter(s):          cor = 0.7770273
## 
## Data:                            fig.bg1$Vanadium and fig.bg1$Iron
## 
## Test Statistic:                  t = 12.64905
## 
## Test Statistic Parameter:        df = 105
## 
## P-value:                         0
## 
## 95% Confidence Interval:         LCL = 0.6887815
##                                  UCL = 0.8425834
cor.test(fig.bg1$Beryllium,fig.bg1$Aluminum)
## 
## Results of Hypothesis Test
## --------------------------
## 
## Null Hypothesis:                 correlation = 0
## 
## Alternative Hypothesis:          True correlation is not equal to 0
## 
## Test Name:                       Pearson's product-moment correlation
## 
## Estimated Parameter(s):          cor = 0.8332856
## 
## Data:                            fig.bg1$Beryllium and fig.bg1$Aluminum
## 
## Test Statistic:                  t = 15.44496
## 
## Test Statistic Parameter:        df = 105
## 
## P-value:                         0
## 
## 95% Confidence Interval:         LCL = 0.7643525
##                                  UCL = 0.8833867
cor.test(fig.bg1$Beryllium,fig.bg1$Iron)
## 
## Results of Hypothesis Test
## --------------------------
## 
## Null Hypothesis:                 correlation = 0
## 
## Alternative Hypothesis:          True correlation is not equal to 0
## 
## Test Name:                       Pearson's product-moment correlation
## 
## Estimated Parameter(s):          cor = 0.7631734
## 
## Data:                            fig.bg1$Beryllium and fig.bg1$Iron
## 
## Test Statistic:                  t = 12.10194
## 
## Test Statistic Parameter:        df = 105
## 
## P-value:                         0
## 
## 95% Confidence Interval:         LCL = 0.6704614
##                                  UCL = 0.8324186
#perform a linear fit and plot
#------------Arsenic vs. Aluminum---------------------
fit.as<-lm(Arsenic~Aluminum,fig.bg1)
anova(fit.as)
## Analysis of Variance Table
## 
## Response: Arsenic
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Aluminum    1 5845.8  5845.8  368.66 < 2.2e-16 ***
## Residuals 105 1665.0    15.9                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modsum.as = summary(fit.as)
modsum.as
## 
## Call:
## lm(formula = Arsenic ~ Aluminum, data = fig.bg1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.6626  -2.2157  -0.3195   2.1092  11.8036 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -1.353e+00  8.286e-01  -1.633    0.106    
## Aluminum     1.629e-03  8.485e-05  19.201   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.982 on 105 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.7783, Adjusted R-squared:  0.7762 
## F-statistic: 368.7 on 1 and 105 DF,  p-value: < 2.2e-16
opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
plot(fit.as, las = 1)

par(mfrow=c(1,1),oma=c(0.5,0.5,0.5,0.5),mar=c(4,4,2,1))  
plot(Arsenic~Aluminum,fig.bg1,main="Linear Relationship",xlab="Aluminum (mg/Kg)",ylab="Arsenic (mg/Kg)",xlim=c(1500,25000),ylim=c(0,50))
abline(fit.as,col="red",lwd=1.5)

#putting linear equation, adjusted r2 and p-value on the plot
lm_coef <- round(coef(fit.as), 4) # extract coefficients
text(10000,45,bquote(As == .(lm_coef[2]) * Al + .(lm_coef[1])))
r2 = modsum.as$adj.r.squared
my.p = modsum.as$coefficients[2,4]
mylabel = bquote(italic(R)^2 == .(format(r2, digits = 3)))
text(10000,40, labels = mylabel)
plabel = bquote(italic(p) == .(format(my.p, digits = 3)))
text(10000,35, plabel)

#------------Vanadium vs. Aluminum----------------------
fit.v<-lm(Vanadium~Aluminum,fig.bg1)
anova(fit.v)
## Analysis of Variance Table
## 
## Response: Vanadium
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Aluminum    1  34270   34270  248.05 < 2.2e-16 ***
## Residuals 105  14507     138                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modsum.v = summary(fit.v)
modsum.v
## 
## Call:
## lm(formula = Vanadium ~ Aluminum, data = fig.bg1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -26.022  -7.846  -1.064   6.532  33.201 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 9.2656897  2.4459865   3.788 0.000253 ***
## Aluminum    0.0039445  0.0002505  15.750  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.75 on 105 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.7026, Adjusted R-squared:  0.6998 
## F-statistic:   248 on 1 and 105 DF,  p-value: < 2.2e-16
opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
plot(fit.v, las = 1)

par(mfrow=c(1,1),oma=c(0.5,0.5,0.5,0.5),mar=c(4,4,2,1)) 
plot(Vanadium~Aluminum,fig.bg1,main="Linear Relationship",xlab="Aluminum (mg/Kg)",ylab="Vanadium (mg/Kg)",xlim=c(1500,25000),ylim=c(0,150))
abline(fit.v,col="red",lwd=1.5)
#putting linear equation, adjusted r2 and p-value on the plot
lm_coef <- round(coef(fit.v), 4) # extract coefficients
text(10000,140,bquote(V == .(lm_coef[2]) * Al + .(lm_coef[1])))
r2 = modsum.v$adj.r.squared
my.p = modsum.v$coefficients[2,4]
mylabel = bquote(italic(R)^2 == .(format(r2, digits = 3)))
text(10000,128, labels = mylabel)
plabel = bquote(italic(p) == .(format(my.p, digits = 3)))
text(10000,113, plabel)

#------------Beryllium vs. Aluminum----------------------
fit.b<-lm(Beryllium~Aluminum,fig.bg1)
anova(fit.b)
## Analysis of Variance Table
## 
## Response: Beryllium
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Aluminum    1 5.6790  5.6790  238.55 < 2.2e-16 ***
## Residuals 105 2.4997  0.0238                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modsum.b = summary(fit.b)
modsum.b
## 
## Call:
## lm(formula = Beryllium ~ Aluminum, data = fig.bg1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.29341 -0.09764 -0.03076  0.07693  0.57235 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.321e-01  3.211e-02   4.113 7.77e-05 ***
## Aluminum    5.078e-05  3.288e-06  15.445  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1543 on 105 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.6944, Adjusted R-squared:  0.6915 
## F-statistic: 238.5 on 1 and 105 DF,  p-value: < 2.2e-16
opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
plot(fit.b, las = 1)

par(mfrow=c(1,1),oma=c(0.5,0.5,0.5,0.5),mar=c(4,4,2,1)) 
plot(Beryllium~Aluminum,fig.bg1,main="Linear Relationship",xlab="Aluminum (mg/Kg)",ylab="Beryllium (mg/Kg)",xlim=c(1500,25000),ylim=c(0,1.5))
abline(fit.b,col="red",lwd=1.5)
#putting linear equation, adjusted r2 and p-value on the plot
lm_coef <- round(coef(fit.b), 4) # extract coefficients
text(17500,0.4,bquote(Be == .(lm_coef[2]) * Al + .(lm_coef[1])))
r2 = modsum.b$adj.r.squared
my.p = modsum.b$coefficients[2,4]
mylabel = bquote(italic(R)^2 == .(format(r2, digits = 3)))
text(17500,0.25, labels = mylabel)
plabel = bquote(italic(p) == .(format(my.p, digits = 3)))
text(17500,0.1, plabel)

#perform a linear fit and plot
#------------Arsenic vs. Iron----------------------
fit.as<-lm(Arsenic~Iron,fig.bg1)
anova(fit.as)
## Analysis of Variance Table
## 
## Response: Arsenic
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Iron        1 4320.5  4320.5   142.2 < 2.2e-16 ***
## Residuals 105 3190.2    30.4                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modsum.as = summary(fit.as)
modsum.as
## 
## Call:
## lm(formula = Arsenic ~ Iron, data = fig.bg1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -23.3066  -3.0997  -0.3697   2.5342  14.5111 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 8.131e-01  1.133e+00   0.718    0.475    
## Iron        5.448e-04  4.568e-05  11.925   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.512 on 105 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.5752, Adjusted R-squared:  0.5712 
## F-statistic: 142.2 on 1 and 105 DF,  p-value: < 2.2e-16
opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
plot(fit.as, las = 1)

par(mfrow=c(1,1),oma=c(0.5,0.5,0.5,0.5),mar=c(4,4,2,1)) 
plot(Arsenic~Iron,fig.bg1,main="Linear Relationship",xlab="Iron (mg/Kg)",ylab="Arsenic (mg/Kg)",xlim=c(4000,72000),ylim=c(0,50))
abline(fit.as,col="red",lwd=1.5)
#putting linear equation, adjusted r2 and p-value on the plot
lm_coef <- round(coef(fit.as), 4) # extract coefficients
text(27000,49,bquote(As == .(lm_coef[2]) * Fe + .(lm_coef[1])))
r2 = modsum.as$adj.r.squared
my.p = modsum.as$coefficients[2,4]
mylabel = bquote(italic(R)^2 == .(format(r2, digits = 3)))
text(27000,44, labels = mylabel)
plabel = bquote(italic(p) == .(format(my.p, digits = 3)))
text(27000,39, plabel)

#------------Vanadium vs. Iron----------------------
fit.v<-lm(Vanadium~Iron,fig.bg1)
anova(fit.v)
## Analysis of Variance Table
## 
## Response: Vanadium
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Iron        1  29450 29450.0     160 < 2.2e-16 ***
## Residuals 105  19327   184.1                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modsum.v = summary(fit.v)
modsum.v
## 
## Call:
## lm(formula = Vanadium ~ Iron, data = fig.bg1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -42.381  -6.862  -0.502   7.755  38.653 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.225e+01  2.789e+00   4.393 2.68e-05 ***
## Iron        1.422e-03  1.124e-04  12.649  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 13.57 on 105 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.6038, Adjusted R-squared:    0.6 
## F-statistic:   160 on 1 and 105 DF,  p-value: < 2.2e-16
opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
plot(fit.v, las = 1)

par(mfrow=c(1,1),oma=c(0.5,0.5,0.5,0.5),mar=c(4,4,2,1)) 
plot(Vanadium~Iron,fig.bg1,main="Linear Relationship",xlab="Iron (mg/Kg)",ylab="Vanadium (mg/Kg)",xlim=c(4000,72000),ylim=c(0,150))
abline(fit.v,col="red",lwd=1.5)
#putting linear equation, adjusted r2 and p-value on the plot
lm_coef <- round(coef(fit.v), 4) # extract coefficients
text(28000,140,bquote(V == .(lm_coef[2]) * Fe + .(lm_coef[1])))
r2 = modsum.v$adj.r.squared
my.p = modsum.v$coefficients[2,4]
mylabel = bquote(italic(R)^2 == .(format(r2, digits = 3)))
text(28000,125, labels = mylabel)
plabel = bquote(italic(p) == .(format(my.p, digits = 3)))
text(28000,110, plabel)

#------------Beryllium vs. Iron----------------------
fit.b<-lm(Beryllium~Iron,fig.bg1)
anova(fit.b)
## Analysis of Variance Table
## 
## Response: Beryllium
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## Iron        1 4.7636  4.7636  146.46 < 2.2e-16 ***
## Residuals 105 3.4152  0.0325                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modsum.b = summary(fit.b)
modsum.b
## 
## Call:
## lm(formula = Beryllium ~ Iron, data = fig.bg1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.80404 -0.09217 -0.01237  0.09053  0.61770 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 1.753e-01  3.707e-02   4.729 7.05e-06 ***
## Iron        1.809e-05  1.495e-06  12.102  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1803 on 105 degrees of freedom
##   (8 observations deleted due to missingness)
## Multiple R-squared:  0.5824, Adjusted R-squared:  0.5785 
## F-statistic: 146.5 on 1 and 105 DF,  p-value: < 2.2e-16
opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
plot(fit.b, las = 1)

par(mfrow=c(1,1),oma=c(0.5,0.5,0.5,0.5),mar=c(4,4,2,1)) 
plot(Beryllium~Iron,fig.bg1,main="Linear Relationship",xlab="Iron (mg/Kg)",ylab="Beryllium (mg/Kg)",xlim=c(4000,72000),ylim=c(-0.1,1.5))
abline(fit.b,col="red",lwd=1.5)
#putting linear equation, adjusted r2 and p-value on the plot
lm_coef <- round(coef(fit.b), 5) # extract coefficients
text(50000,0.25,bquote(Be == .(lm_coef[2]) * Fe + .(lm_coef[1])))
r2 = modsum.b$adj.r.squared
my.p = modsum.b$coefficients[2,4]
mylabel = bquote(italic(R)^2 == .(format(r2, digits = 3)))
text(50000,0.10, labels = mylabel)
plabel = bquote(italic(p) == .(format(my.p, digits = 3)))
text(50000,-0.05, plabel)

Evaluate the dependence of conc. vs. depth

par(mfrow=c(1,1),oma=c(0.2,0.2,0.2,0.2),mar=c(4,4,4,1),mgp=c(2.5,1,0)) 
plot(Arsenic~StartDepth,data=fig.bg1,ylab="Arsenic (mg/Kg)", xlab="Sample Depth (ft bgs)",main="Variation of Concentration \n with Sampling Depth")
abline(h=19,col="red",lwd=1.5)
text(18, 21, "19 mg/Kg", cex=0.7, col="red") 

plot(Vanadium~StartDepth,data=fig.bg1,ylab="Vanadium (mg/Kg)",xlab="Sample Depth (ft bgs)",main="Variation of Concentration \n with Sampling Depth")
abline(h=78,col="red",lwd=1.5)
text(18, 83, "78 mg/Kg", cex=0.7, col="red") 

plot(Beryllium~StartDepth,data=fig.bg1,ylab="Beryllium (mg/Kg)",xlab="Sample Depth (ft bgs)",main="Variation of Concentration \n with Sampling Depth")
abline(h=0.7,col="red",lwd=1.5)
text(18, 0.75, "0.7 mg/Kg", cex=0.7, col="red") 

plot(Lead~StartDepth,data=fig.bg1,ylab="Lead (mg/Kg)",xlab="Sample Depth (ft bgs)",main="Variation of Concentration \n with Sampling Depth")
abline(h=800,col="red",lwd=1.5)
abline(h=400,col="purple",lwd=1.5)
abline(h=90,col="blue",lwd=1.5)

text(13, 850, "Non-Residential: 800 mg/Kg", cex=0.7, col="red") 
text(13, 450, "Residential: 400 mg/Kg", cex=0.7, col="purple") 
text(13, 140, "Default IGWSSL: 90 mg/Kg", cex=0.7, col="blue") 

Compare results from the freehold soil sammples reported by NJ Geological Survey Investigation Report on the Baseline Concentrations of Arsenic, Beryllium and Associated Elements in Glauconite and Glauconitic Soils in the New Jersey Coastal Plain (NJDEP 2001). Data were provided in Table 23 of the above report.

fr<-read.csv("freehold.csv")

#plot to see the inter-relationship among elements for background freehold soil
pairs.panels(fr[c(2,4,6,12,13,23)], gap = 0) # all data # no correlation

#summary statistics for As, V, Be
fr.sta<-describe(fr[,c(4,6,23)])
fr.sta
##    vars  n  mean    sd median trimmed  mad  min   max  range skew kurtosis
## As    1 15 39.54 94.11   8.90   16.88 5.04 2.70 371.0 368.30 2.88     7.27
## Be    2 15  0.81  0.77   0.55    0.70 0.33 0.21   2.8   2.59 1.51     0.94
## V     3 15 24.52 12.37  20.20   23.97 8.75 8.10  48.1  40.00 0.57    -1.16
##       se
## As 24.30
## Be  0.20
## V   3.19
write.csv(fr.sta,"fr.sta.csv")
#outlier test for arsenic in freehold soil samples
#using rosnerTest in EnvStats package.
library(EnvStats)
i=4 # arsenic
  rosnerTest(fr[,i], k = 6, alpha = 0.05, warn = TRUE)
## Warning in rosnerTest(fr[, i], k = 6, alpha = 0.05, warn = TRUE): The true
## Type I error may be larger than assumed. See the help file for 'rosnerTest'
## for a table with information on the estimated Type I error level.
## 
## Results of Outlier Test
## -------------------------
## 
## Test Method:                     Rosner's Test for Outliers
## 
## Hypothesized Distribution:       Normal
## 
## Data:                            fr[, i]
## 
## Sample Size:                     15
## 
## Test Statistics:                 R.1 = 3.522002
##                                  R.2 = 2.585153
##                                  R.3 = 3.264377
##                                  R.4 = 1.788635
##                                  R.5 = 1.530281
##                                  R.6 = 1.485969
## 
## Test Statistic Parameter:        k = 6
## 
## Alternative Hypothesis:          Up to 6 observations are not
##                                  from the same Distribution.
## 
## Type I Error:                    5%
## 
## Number of Outliers Detected:     3
## 
##   i    Mean.i      SD.i Value Obs.Num    R.i+1 lambda.i+1 Outlier
## 1 0 39.540000 94.111240 371.0      13 3.522002   2.548308    TRUE
## 2 1 15.864286 21.985436  72.7      12 2.585153   2.507321    TRUE
## 3 2 11.492308 15.288583  61.4      15 3.264377   2.462033    TRUE
## 4 3  7.333333  3.112244  12.9       2 1.788635   2.411560   FALSE
## 5 4  6.827273  2.697069   2.7      11 1.530281   2.354730   FALSE
## 6 5  7.240000  2.449580   3.6       3 1.485969   2.289954   FALSE
#remove outlier rows 12, 13, 15 
fr.out<-fr[-c(12,13,15),]
fr.out.sta<-describe(fr.out[,4])
fr.out.sta
##   vars  n mean   sd median trimmed  mad min  max range skew kurtosis  se
## 1    1 12 7.33 3.11   7.65    7.24 3.11 2.7 12.9  10.2 0.08     -1.3 0.9
# independent two-sample t-test to compare concentrations of As, V, Be between site soil and freehold soil samples
combine<-read.csv("site.freehold.csv")
t.test(combine$Arsenic ~ combine$Type)
## 
## Results of Hypothesis Test
## --------------------------
## 
## Null Hypothesis:                 difference in means = 0
## 
## Alternative Hypothesis:          True difference in means is not equal to 0
## 
## Test Name:                       Welch Two Sample t-test
## 
## Estimated Parameter(s):          mean in group Freehold = 39.54000
##                                  mean in group Site     = 15.35982
## 
## Data:                            combine$Arsenic by combine$Type
## 
## Test Statistic:                  t = 0.9929683
## 
## Test Statistic Parameter:        df = 14.12018
## 
## P-value:                         0.3374377
## 
## 95% Confidence Interval:         LCL = -28.00673
##                                  UCL =  76.36709
t.test(combine$Beryllium ~ combine$Type)
## 
## Results of Hypothesis Test
## --------------------------
## 
## Null Hypothesis:                 difference in means = 0
## 
## Alternative Hypothesis:          True difference in means is not equal to 0
## 
## Test Name:                       Welch Two Sample t-test
## 
## Estimated Parameter(s):          mean in group Freehold = 0.8093333
##                                  mean in group Site     = 0.5847368
## 
## Data:                            combine$Beryllium by combine$Type
## 
## Test Statistic:                  t = 1.114119
## 
## Test Statistic Parameter:        df = 14.4948
## 
## P-value:                         0.2833618
## 
## 95% Confidence Interval:         LCL = -0.2063928
##                                  UCL =  0.6555857
t.test(combine$Vanadium ~ combine$Type)
## 
## Results of Hypothesis Test
## --------------------------
## 
## Null Hypothesis:                 difference in means = 0
## 
## Alternative Hypothesis:          True difference in means is not equal to 0
## 
## Test Name:                       Welch Two Sample t-test
## 
## Estimated Parameter(s):          mean in group Freehold = 24.52000
##                                  mean in group Site     = 44.21607
## 
## Data:                            combine$Vanadium by combine$Type
## 
## Test Statistic:                  t = -5.16901
## 
## Test Statistic Parameter:        df = 27.69998
## 
## P-value:                         1.80051e-05
## 
## 95% Confidence Interval:         LCL = -27.50516
##                                  UCL = -11.88698