##data analysis
setwd("C:/Users/ywang/Dropbox/R/Deptford/RIR")
fig<-read.csv("Soil_Site.csv")
#only select interested elements, aluminum,arsenic,beryllium,iron,lead,vanadium
fig.bg<-fig[c(2,4,9,28,11,24, 8, 17)]
#obtain summary statistics
#install.packages("psych")
library("psych")
## Warning: package 'psych' was built under R version 3.2.4
#summary statistics for all data
summary<-describe(fig.bg)
summary
## vars n mean sd median trimmed mad min
## FieldID* 1 120 60.50 34.79 60.50 60.50 44.48 1.0
## StartDepth 2 120 2.55 3.89 1.00 1.60 1.48 0.0
## Arsenic 3 112 15.36 16.83 11.40 12.68 9.79 2.5
## Vanadium 4 112 44.22 21.97 41.80 42.03 26.61 14.7
## Beryllium 5 114 0.58 0.28 0.57 0.57 0.38 0.2
## Lead 6 115 95.03 193.48 12.60 47.96 13.05 2.0
## Aluminum 7 112 8721.88 4532.85 7875.00 8358.56 4959.30 2080.0
## Iron 8 112 22545.71 12511.06 21400.00 21342.11 13714.05 5660.0
## max range skew kurtosis se
## FieldID* 120.0 119.0 0.00 -1.23 3.18
## StartDepth 21.0 21.0 2.51 6.32 0.35
## Arsenic 148.0 145.5 4.74 32.52 1.59
## Vanadium 118.0 103.3 0.80 0.28 2.08
## Beryllium 1.2 1.0 0.28 -1.17 0.03
## Lead 1240.0 1238.0 3.49 14.07 18.04
## Aluminum 22800.0 20720.0 0.62 -0.36 428.31
## Iron 72000.0 66340.0 1.05 1.57 1182.18
write.csv(summary,"summary.csv")
#outlier test
#using the boxplot function in car library to make the plot showing the outliers
library(car)
## Warning: package 'car' was built under R version 3.2.4
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
par(mfrow=c(2,3),oma=c(1,1,1,1),mar=c(2,4,2,1))
for (i in 3:ncol(fig.bg))
{
Boxplot(fig.bg[,i], id.method=c("y"),ylab=colnames(fig.bg[i]))
}
# using rosnerTest in EnvStats package.
library(EnvStats)
## Warning: package 'EnvStats' was built under R version 3.2.4
##
## Attaching package: 'EnvStats'
## The following object is masked from 'package:car':
##
## qqPlot
## The following objects are masked from 'package:stats':
##
## predict, predict.lm
## The following object is masked from 'package:base':
##
## print.default
i=3 # arsenic
rosnerTest(fig.bg[,i], k = 6, alpha = 0.05, warn = TRUE)
## Warning in rosnerTest(fig.bg[, i], k = 6, alpha = 0.05, warn = TRUE): 8
## observations with NA/NaN/Inf in 'x' removed.
##
## Results of Outlier Test
## -------------------------
##
## Test Method: Rosner's Test for Outliers
##
## Hypothesized Distribution: Normal
##
## Data: fig.bg[, i]
##
## Number NA/NaN/Inf's Removed: 8
##
## Sample Size: 112
##
## Test Statistics: R.1 = 7.880343
## R.2 = 4.251184
## R.3 = 3.747481
## R.4 = 3.671153
## R.5 = 3.789347
## R.6 = 2.585476
##
## Test Statistic Parameter: k = 6
##
## Alternative Hypothesis: Up to 6 observations are not
## from the same Distribution.
##
## Type I Error: 5%
##
## Number of Outliers Detected: 5
##
## i Mean.i SD.i Value Obs.Num R.i+1 lambda.i+1 Outlier
## 1 0 15.35982 16.831777 148.0 78 7.880343 3.422302 TRUE
## 2 1 14.16486 11.158101 61.6 79 4.251184 3.419309 TRUE
## 3 2 13.73364 10.237907 52.1 14 3.747481 3.416284 TRUE
## 4 3 13.38165 9.593267 48.6 75 3.671153 3.413225 TRUE
## 5 4 13.05556 9.010641 47.2 28 3.789347 3.410133 TRUE
## 6 5 12.73645 8.417618 34.5 45 2.585476 3.407006 FALSE
i=4 # vanadium
rosnerTest(fig.bg[,i], k = 6, alpha = 0.05, warn = TRUE)
## Warning in rosnerTest(fig.bg[, i], k = 6, alpha = 0.05, warn = TRUE): 8
## observations with NA/NaN/Inf in 'x' removed.
##
## Results of Outlier Test
## -------------------------
##
## Test Method: Rosner's Test for Outliers
##
## Hypothesized Distribution: Normal
##
## Data: fig.bg[, i]
##
## Number NA/NaN/Inf's Removed: 8
##
## Sample Size: 112
##
## Test Statistics: R.1 = 3.357739
## R.2 = 2.842768
## R.3 = 2.656002
## R.4 = 2.479551
## R.5 = 2.492585
## R.6 = 2.446946
##
## Test Statistic Parameter: k = 6
##
## Alternative Hypothesis: Up to 6 observations are not
## from the same Distribution.
##
## Type I Error: 5%
##
## Number of Outliers Detected: 0
##
## i Mean.i SD.i Value Obs.Num R.i+1 lambda.i+1 Outlier
## 1 0 44.21607 21.97429 118.0 62 3.357739 3.422302 FALSE
## 2 1 43.55135 20.91224 103.0 26 2.842768 3.419309 FALSE
## 3 2 43.01091 20.21425 96.7 92 2.656002 3.416284 FALSE
## 4 3 42.51835 19.63325 91.2 28 2.479551 3.413225 FALSE
## 5 4 42.06759 19.14976 89.8 64 2.492585 3.410133 FALSE
## 6 5 41.62150 18.66756 87.3 74 2.446946 3.407006 FALSE
# Arsenic Outlier: as row 78,79, 14,75,28; sample MW-5A, MW-5B, AOC-1-14B,AOC-1-22, MW-3C
# outliers for aluminum and iron were not removed since these are crustal elemetns.
# Outliers for lead was not removed because it is believed from site source.
# remove the outlier data for arsenic only
fig.bg1<-fig.bg [-c(78,79, 14,75,28),]
#obtain summary statistics for arsenic without outliers
library("psych")
mat.as<-describe(fig.bg1[,3])
mat.as
## vars n mean sd median trimmed mad min max range skew kurtosis
## 1 1 107 12.74 8.42 10.9 11.92 9.34 2.5 34.5 32 0.68 -0.61
## se
## 1 0.81
write.csv(mat.as,"mat.as.csv")
#plot to see the inter-relationship among elements
pairs.panels(fig.bg[c(3,4,5,6,7,8)], gap = 0) # all data
pairs.panels(fig.bg1[c(3,4,5,6,7,8)], gap = 0) # after removing outliers
#there are good correlations between aluminum,iron, beryllium, Arsenic and Vanadium
cor.test(fig.bg1$Arsenic,fig.bg1$Aluminum)
##
## Results of Hypothesis Test
## --------------------------
##
## Null Hypothesis: correlation = 0
##
## Alternative Hypothesis: True correlation is not equal to 0
##
## Test Name: Pearson's product-moment correlation
##
## Estimated Parameter(s): cor = 0.8822266
##
## Data: fig.bg1$Arsenic and fig.bg1$Aluminum
##
## Test Statistic: t = 19.20062
##
## Test Statistic Parameter: df = 105
##
## P-value: 0
##
## 95% Confidence Interval: LCL = 0.8316716
## UCL = 0.9182756
cor.test(fig.bg1$Arsenic,fig.bg1$Iron)
##
## Results of Hypothesis Test
## --------------------------
##
## Null Hypothesis: correlation = 0
##
## Alternative Hypothesis: True correlation is not equal to 0
##
## Test Name: Pearson's product-moment correlation
##
## Estimated Parameter(s): cor = 0.7584484
##
## Data: fig.bg1$Arsenic and fig.bg1$Iron
##
## Test Statistic: t = 11.92479
##
## Test Statistic Parameter: df = 105
##
## P-value: 0
##
## 95% Confidence Interval: LCL = 0.6642389
## UCL = 0.8289411
cor.test(fig.bg1$Vanadium,fig.bg1$Aluminum)
##
## Results of Hypothesis Test
## --------------------------
##
## Null Hypothesis: correlation = 0
##
## Alternative Hypothesis: True correlation is not equal to 0
##
## Test Name: Pearson's product-moment correlation
##
## Estimated Parameter(s): cor = 0.838206
##
## Data: fig.bg1$Vanadium and fig.bg1$Aluminum
##
## Test Statistic: t = 15.74951
##
## Test Statistic Parameter: df = 105
##
## P-value: 0
##
## 95% Confidence Interval: LCL = 0.7710534
## UCL = 0.8869196
cor.test(fig.bg1$Vanadium,fig.bg1$Iron)
##
## Results of Hypothesis Test
## --------------------------
##
## Null Hypothesis: correlation = 0
##
## Alternative Hypothesis: True correlation is not equal to 0
##
## Test Name: Pearson's product-moment correlation
##
## Estimated Parameter(s): cor = 0.7770273
##
## Data: fig.bg1$Vanadium and fig.bg1$Iron
##
## Test Statistic: t = 12.64905
##
## Test Statistic Parameter: df = 105
##
## P-value: 0
##
## 95% Confidence Interval: LCL = 0.6887815
## UCL = 0.8425834
cor.test(fig.bg1$Beryllium,fig.bg1$Aluminum)
##
## Results of Hypothesis Test
## --------------------------
##
## Null Hypothesis: correlation = 0
##
## Alternative Hypothesis: True correlation is not equal to 0
##
## Test Name: Pearson's product-moment correlation
##
## Estimated Parameter(s): cor = 0.8332856
##
## Data: fig.bg1$Beryllium and fig.bg1$Aluminum
##
## Test Statistic: t = 15.44496
##
## Test Statistic Parameter: df = 105
##
## P-value: 0
##
## 95% Confidence Interval: LCL = 0.7643525
## UCL = 0.8833867
cor.test(fig.bg1$Beryllium,fig.bg1$Iron)
##
## Results of Hypothesis Test
## --------------------------
##
## Null Hypothesis: correlation = 0
##
## Alternative Hypothesis: True correlation is not equal to 0
##
## Test Name: Pearson's product-moment correlation
##
## Estimated Parameter(s): cor = 0.7631734
##
## Data: fig.bg1$Beryllium and fig.bg1$Iron
##
## Test Statistic: t = 12.10194
##
## Test Statistic Parameter: df = 105
##
## P-value: 0
##
## 95% Confidence Interval: LCL = 0.6704614
## UCL = 0.8324186
#perform a linear fit and plot
#------------Arsenic vs. Aluminum---------------------
fit.as<-lm(Arsenic~Aluminum,fig.bg1)
anova(fit.as)
## Analysis of Variance Table
##
## Response: Arsenic
## Df Sum Sq Mean Sq F value Pr(>F)
## Aluminum 1 5845.8 5845.8 368.66 < 2.2e-16 ***
## Residuals 105 1665.0 15.9
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modsum.as = summary(fit.as)
modsum.as
##
## Call:
## lm(formula = Arsenic ~ Aluminum, data = fig.bg1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.6626 -2.2157 -0.3195 2.1092 11.8036
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.353e+00 8.286e-01 -1.633 0.106
## Aluminum 1.629e-03 8.485e-05 19.201 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 3.982 on 105 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.7783, Adjusted R-squared: 0.7762
## F-statistic: 368.7 on 1 and 105 DF, p-value: < 2.2e-16
opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
plot(fit.as, las = 1)
par(mfrow=c(1,1),oma=c(0.5,0.5,0.5,0.5),mar=c(4,4,2,1))
plot(Arsenic~Aluminum,fig.bg1,main="Linear Relationship",xlab="Aluminum (mg/Kg)",ylab="Arsenic (mg/Kg)",xlim=c(1500,25000),ylim=c(0,50))
abline(fit.as,col="red",lwd=1.5)
#putting linear equation, adjusted r2 and p-value on the plot
lm_coef <- round(coef(fit.as), 4) # extract coefficients
text(10000,45,bquote(As == .(lm_coef[2]) * Al + .(lm_coef[1])))
r2 = modsum.as$adj.r.squared
my.p = modsum.as$coefficients[2,4]
mylabel = bquote(italic(R)^2 == .(format(r2, digits = 3)))
text(10000,40, labels = mylabel)
plabel = bquote(italic(p) == .(format(my.p, digits = 3)))
text(10000,35, plabel)
#------------Vanadium vs. Aluminum----------------------
fit.v<-lm(Vanadium~Aluminum,fig.bg1)
anova(fit.v)
## Analysis of Variance Table
##
## Response: Vanadium
## Df Sum Sq Mean Sq F value Pr(>F)
## Aluminum 1 34270 34270 248.05 < 2.2e-16 ***
## Residuals 105 14507 138
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modsum.v = summary(fit.v)
modsum.v
##
## Call:
## lm(formula = Vanadium ~ Aluminum, data = fig.bg1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -26.022 -7.846 -1.064 6.532 33.201
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.2656897 2.4459865 3.788 0.000253 ***
## Aluminum 0.0039445 0.0002505 15.750 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.75 on 105 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.7026, Adjusted R-squared: 0.6998
## F-statistic: 248 on 1 and 105 DF, p-value: < 2.2e-16
opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
plot(fit.v, las = 1)
par(mfrow=c(1,1),oma=c(0.5,0.5,0.5,0.5),mar=c(4,4,2,1))
plot(Vanadium~Aluminum,fig.bg1,main="Linear Relationship",xlab="Aluminum (mg/Kg)",ylab="Vanadium (mg/Kg)",xlim=c(1500,25000),ylim=c(0,150))
abline(fit.v,col="red",lwd=1.5)
#putting linear equation, adjusted r2 and p-value on the plot
lm_coef <- round(coef(fit.v), 4) # extract coefficients
text(10000,140,bquote(V == .(lm_coef[2]) * Al + .(lm_coef[1])))
r2 = modsum.v$adj.r.squared
my.p = modsum.v$coefficients[2,4]
mylabel = bquote(italic(R)^2 == .(format(r2, digits = 3)))
text(10000,128, labels = mylabel)
plabel = bquote(italic(p) == .(format(my.p, digits = 3)))
text(10000,113, plabel)
#------------Beryllium vs. Aluminum----------------------
fit.b<-lm(Beryllium~Aluminum,fig.bg1)
anova(fit.b)
## Analysis of Variance Table
##
## Response: Beryllium
## Df Sum Sq Mean Sq F value Pr(>F)
## Aluminum 1 5.6790 5.6790 238.55 < 2.2e-16 ***
## Residuals 105 2.4997 0.0238
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modsum.b = summary(fit.b)
modsum.b
##
## Call:
## lm(formula = Beryllium ~ Aluminum, data = fig.bg1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.29341 -0.09764 -0.03076 0.07693 0.57235
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.321e-01 3.211e-02 4.113 7.77e-05 ***
## Aluminum 5.078e-05 3.288e-06 15.445 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1543 on 105 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.6944, Adjusted R-squared: 0.6915
## F-statistic: 238.5 on 1 and 105 DF, p-value: < 2.2e-16
opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
plot(fit.b, las = 1)
par(mfrow=c(1,1),oma=c(0.5,0.5,0.5,0.5),mar=c(4,4,2,1))
plot(Beryllium~Aluminum,fig.bg1,main="Linear Relationship",xlab="Aluminum (mg/Kg)",ylab="Beryllium (mg/Kg)",xlim=c(1500,25000),ylim=c(0,1.5))
abline(fit.b,col="red",lwd=1.5)
#putting linear equation, adjusted r2 and p-value on the plot
lm_coef <- round(coef(fit.b), 4) # extract coefficients
text(17500,0.4,bquote(Be == .(lm_coef[2]) * Al + .(lm_coef[1])))
r2 = modsum.b$adj.r.squared
my.p = modsum.b$coefficients[2,4]
mylabel = bquote(italic(R)^2 == .(format(r2, digits = 3)))
text(17500,0.25, labels = mylabel)
plabel = bquote(italic(p) == .(format(my.p, digits = 3)))
text(17500,0.1, plabel)
#perform a linear fit and plot
#------------Arsenic vs. Iron----------------------
fit.as<-lm(Arsenic~Iron,fig.bg1)
anova(fit.as)
## Analysis of Variance Table
##
## Response: Arsenic
## Df Sum Sq Mean Sq F value Pr(>F)
## Iron 1 4320.5 4320.5 142.2 < 2.2e-16 ***
## Residuals 105 3190.2 30.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modsum.as = summary(fit.as)
modsum.as
##
## Call:
## lm(formula = Arsenic ~ Iron, data = fig.bg1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.3066 -3.0997 -0.3697 2.5342 14.5111
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.131e-01 1.133e+00 0.718 0.475
## Iron 5.448e-04 4.568e-05 11.925 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.512 on 105 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.5752, Adjusted R-squared: 0.5712
## F-statistic: 142.2 on 1 and 105 DF, p-value: < 2.2e-16
opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
plot(fit.as, las = 1)
par(mfrow=c(1,1),oma=c(0.5,0.5,0.5,0.5),mar=c(4,4,2,1))
plot(Arsenic~Iron,fig.bg1,main="Linear Relationship",xlab="Iron (mg/Kg)",ylab="Arsenic (mg/Kg)",xlim=c(4000,72000),ylim=c(0,50))
abline(fit.as,col="red",lwd=1.5)
#putting linear equation, adjusted r2 and p-value on the plot
lm_coef <- round(coef(fit.as), 4) # extract coefficients
text(27000,49,bquote(As == .(lm_coef[2]) * Fe + .(lm_coef[1])))
r2 = modsum.as$adj.r.squared
my.p = modsum.as$coefficients[2,4]
mylabel = bquote(italic(R)^2 == .(format(r2, digits = 3)))
text(27000,44, labels = mylabel)
plabel = bquote(italic(p) == .(format(my.p, digits = 3)))
text(27000,39, plabel)
#------------Vanadium vs. Iron----------------------
fit.v<-lm(Vanadium~Iron,fig.bg1)
anova(fit.v)
## Analysis of Variance Table
##
## Response: Vanadium
## Df Sum Sq Mean Sq F value Pr(>F)
## Iron 1 29450 29450.0 160 < 2.2e-16 ***
## Residuals 105 19327 184.1
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modsum.v = summary(fit.v)
modsum.v
##
## Call:
## lm(formula = Vanadium ~ Iron, data = fig.bg1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -42.381 -6.862 -0.502 7.755 38.653
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.225e+01 2.789e+00 4.393 2.68e-05 ***
## Iron 1.422e-03 1.124e-04 12.649 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 13.57 on 105 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.6038, Adjusted R-squared: 0.6
## F-statistic: 160 on 1 and 105 DF, p-value: < 2.2e-16
opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
plot(fit.v, las = 1)
par(mfrow=c(1,1),oma=c(0.5,0.5,0.5,0.5),mar=c(4,4,2,1))
plot(Vanadium~Iron,fig.bg1,main="Linear Relationship",xlab="Iron (mg/Kg)",ylab="Vanadium (mg/Kg)",xlim=c(4000,72000),ylim=c(0,150))
abline(fit.v,col="red",lwd=1.5)
#putting linear equation, adjusted r2 and p-value on the plot
lm_coef <- round(coef(fit.v), 4) # extract coefficients
text(28000,140,bquote(V == .(lm_coef[2]) * Fe + .(lm_coef[1])))
r2 = modsum.v$adj.r.squared
my.p = modsum.v$coefficients[2,4]
mylabel = bquote(italic(R)^2 == .(format(r2, digits = 3)))
text(28000,125, labels = mylabel)
plabel = bquote(italic(p) == .(format(my.p, digits = 3)))
text(28000,110, plabel)
#------------Beryllium vs. Iron----------------------
fit.b<-lm(Beryllium~Iron,fig.bg1)
anova(fit.b)
## Analysis of Variance Table
##
## Response: Beryllium
## Df Sum Sq Mean Sq F value Pr(>F)
## Iron 1 4.7636 4.7636 146.46 < 2.2e-16 ***
## Residuals 105 3.4152 0.0325
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
modsum.b = summary(fit.b)
modsum.b
##
## Call:
## lm(formula = Beryllium ~ Iron, data = fig.bg1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.80404 -0.09217 -0.01237 0.09053 0.61770
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.753e-01 3.707e-02 4.729 7.05e-06 ***
## Iron 1.809e-05 1.495e-06 12.102 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1803 on 105 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.5824, Adjusted R-squared: 0.5785
## F-statistic: 146.5 on 1 and 105 DF, p-value: < 2.2e-16
opar <- par(mfrow = c(2,2), oma = c(0, 0, 1.1, 0))
plot(fit.b, las = 1)
par(mfrow=c(1,1),oma=c(0.5,0.5,0.5,0.5),mar=c(4,4,2,1))
plot(Beryllium~Iron,fig.bg1,main="Linear Relationship",xlab="Iron (mg/Kg)",ylab="Beryllium (mg/Kg)",xlim=c(4000,72000),ylim=c(-0.1,1.5))
abline(fit.b,col="red",lwd=1.5)
#putting linear equation, adjusted r2 and p-value on the plot
lm_coef <- round(coef(fit.b), 5) # extract coefficients
text(50000,0.25,bquote(Be == .(lm_coef[2]) * Fe + .(lm_coef[1])))
r2 = modsum.b$adj.r.squared
my.p = modsum.b$coefficients[2,4]
mylabel = bquote(italic(R)^2 == .(format(r2, digits = 3)))
text(50000,0.10, labels = mylabel)
plabel = bquote(italic(p) == .(format(my.p, digits = 3)))
text(50000,-0.05, plabel)
Evaluate the dependence of conc. vs. depth
par(mfrow=c(1,1),oma=c(0.2,0.2,0.2,0.2),mar=c(4,4,4,1),mgp=c(2.5,1,0))
plot(Arsenic~StartDepth,data=fig.bg1,ylab="Arsenic (mg/Kg)", xlab="Sample Depth (ft bgs)",main="Variation of Concentration \n with Sampling Depth")
abline(h=19,col="red",lwd=1.5)
text(18, 21, "19 mg/Kg", cex=0.7, col="red")
plot(Vanadium~StartDepth,data=fig.bg1,ylab="Vanadium (mg/Kg)",xlab="Sample Depth (ft bgs)",main="Variation of Concentration \n with Sampling Depth")
abline(h=78,col="red",lwd=1.5)
text(18, 83, "78 mg/Kg", cex=0.7, col="red")
plot(Beryllium~StartDepth,data=fig.bg1,ylab="Beryllium (mg/Kg)",xlab="Sample Depth (ft bgs)",main="Variation of Concentration \n with Sampling Depth")
abline(h=0.7,col="red",lwd=1.5)
text(18, 0.75, "0.7 mg/Kg", cex=0.7, col="red")
plot(Lead~StartDepth,data=fig.bg1,ylab="Lead (mg/Kg)",xlab="Sample Depth (ft bgs)",main="Variation of Concentration \n with Sampling Depth")
abline(h=800,col="red",lwd=1.5)
abline(h=400,col="purple",lwd=1.5)
abline(h=90,col="blue",lwd=1.5)
text(13, 850, "Non-Residential: 800 mg/Kg", cex=0.7, col="red")
text(13, 450, "Residential: 400 mg/Kg", cex=0.7, col="purple")
text(13, 140, "Default IGWSSL: 90 mg/Kg", cex=0.7, col="blue")
Compare results from the freehold soil sammples reported by NJ Geological Survey Investigation Report on the Baseline Concentrations of Arsenic, Beryllium and Associated Elements in Glauconite and Glauconitic Soils in the New Jersey Coastal Plain (NJDEP 2001). Data were provided in Table 23 of the above report.
fr<-read.csv("freehold.csv")
#plot to see the inter-relationship among elements for background freehold soil
pairs.panels(fr[c(2,4,6,12,13,23)], gap = 0) # all data # no correlation
#summary statistics for As, V, Be
fr.sta<-describe(fr[,c(4,6,23)])
fr.sta
## vars n mean sd median trimmed mad min max range skew kurtosis
## As 1 15 39.54 94.11 8.90 16.88 5.04 2.70 371.0 368.30 2.88 7.27
## Be 2 15 0.81 0.77 0.55 0.70 0.33 0.21 2.8 2.59 1.51 0.94
## V 3 15 24.52 12.37 20.20 23.97 8.75 8.10 48.1 40.00 0.57 -1.16
## se
## As 24.30
## Be 0.20
## V 3.19
write.csv(fr.sta,"fr.sta.csv")
#outlier test for arsenic in freehold soil samples
#using rosnerTest in EnvStats package.
library(EnvStats)
i=4 # arsenic
rosnerTest(fr[,i], k = 6, alpha = 0.05, warn = TRUE)
## Warning in rosnerTest(fr[, i], k = 6, alpha = 0.05, warn = TRUE): The true
## Type I error may be larger than assumed. See the help file for 'rosnerTest'
## for a table with information on the estimated Type I error level.
##
## Results of Outlier Test
## -------------------------
##
## Test Method: Rosner's Test for Outliers
##
## Hypothesized Distribution: Normal
##
## Data: fr[, i]
##
## Sample Size: 15
##
## Test Statistics: R.1 = 3.522002
## R.2 = 2.585153
## R.3 = 3.264377
## R.4 = 1.788635
## R.5 = 1.530281
## R.6 = 1.485969
##
## Test Statistic Parameter: k = 6
##
## Alternative Hypothesis: Up to 6 observations are not
## from the same Distribution.
##
## Type I Error: 5%
##
## Number of Outliers Detected: 3
##
## i Mean.i SD.i Value Obs.Num R.i+1 lambda.i+1 Outlier
## 1 0 39.540000 94.111240 371.0 13 3.522002 2.548308 TRUE
## 2 1 15.864286 21.985436 72.7 12 2.585153 2.507321 TRUE
## 3 2 11.492308 15.288583 61.4 15 3.264377 2.462033 TRUE
## 4 3 7.333333 3.112244 12.9 2 1.788635 2.411560 FALSE
## 5 4 6.827273 2.697069 2.7 11 1.530281 2.354730 FALSE
## 6 5 7.240000 2.449580 3.6 3 1.485969 2.289954 FALSE
#remove outlier rows 12, 13, 15
fr.out<-fr[-c(12,13,15),]
fr.out.sta<-describe(fr.out[,4])
fr.out.sta
## vars n mean sd median trimmed mad min max range skew kurtosis se
## 1 1 12 7.33 3.11 7.65 7.24 3.11 2.7 12.9 10.2 0.08 -1.3 0.9
# independent two-sample t-test to compare concentrations of As, V, Be between site soil and freehold soil samples
combine<-read.csv("site.freehold.csv")
t.test(combine$Arsenic ~ combine$Type)
##
## Results of Hypothesis Test
## --------------------------
##
## Null Hypothesis: difference in means = 0
##
## Alternative Hypothesis: True difference in means is not equal to 0
##
## Test Name: Welch Two Sample t-test
##
## Estimated Parameter(s): mean in group Freehold = 39.54000
## mean in group Site = 15.35982
##
## Data: combine$Arsenic by combine$Type
##
## Test Statistic: t = 0.9929683
##
## Test Statistic Parameter: df = 14.12018
##
## P-value: 0.3374377
##
## 95% Confidence Interval: LCL = -28.00673
## UCL = 76.36709
t.test(combine$Beryllium ~ combine$Type)
##
## Results of Hypothesis Test
## --------------------------
##
## Null Hypothesis: difference in means = 0
##
## Alternative Hypothesis: True difference in means is not equal to 0
##
## Test Name: Welch Two Sample t-test
##
## Estimated Parameter(s): mean in group Freehold = 0.8093333
## mean in group Site = 0.5847368
##
## Data: combine$Beryllium by combine$Type
##
## Test Statistic: t = 1.114119
##
## Test Statistic Parameter: df = 14.4948
##
## P-value: 0.2833618
##
## 95% Confidence Interval: LCL = -0.2063928
## UCL = 0.6555857
t.test(combine$Vanadium ~ combine$Type)
##
## Results of Hypothesis Test
## --------------------------
##
## Null Hypothesis: difference in means = 0
##
## Alternative Hypothesis: True difference in means is not equal to 0
##
## Test Name: Welch Two Sample t-test
##
## Estimated Parameter(s): mean in group Freehold = 24.52000
## mean in group Site = 44.21607
##
## Data: combine$Vanadium by combine$Type
##
## Test Statistic: t = -5.16901
##
## Test Statistic Parameter: df = 27.69998
##
## P-value: 1.80051e-05
##
## 95% Confidence Interval: LCL = -27.50516
## UCL = -11.88698