# read in our data (wide format)
Cd.BeetBarley<- data.frame(
redbeet= c(18, 5, 10, 8, 16, 12, 8, 8, 11, 5, 6, 8, 9, 21, 9),
barley= c(8, 5, 10, 19, 15, 18, 11, 8, 9, 4, 5, 13, 7, 5, 7))
# three ways to look at the data structure
str(Cd.BeetBarley)
## 'data.frame': 15 obs. of 2 variables:
## $ redbeet: num 18 5 10 8 16 12 8 8 11 5 ...
## $ barley : num 8 5 10 19 15 18 11 8 9 4 ...
#> 'data.frame': 15 obs. of 2 variables:
#> $ redbeet: num 18 5 10 8 16 12 8 8 11 5 ...
#> $ barley : num 8 5 10 19 15 18 11 8 9 4 ...
summary(Cd.BeetBarley)
## redbeet barley
## Min. : 5.00 Min. : 4.0
## 1st Qu.: 8.00 1st Qu.: 6.0
## Median : 9.00 Median : 8.0
## Mean :10.27 Mean : 9.6
## 3rd Qu.:11.50 3rd Qu.:12.0
## Max. :21.00 Max. :19.0
#> redbeet barley
#> Min. : 5.0 Min. : 4.0
#> 1st Qu.: 8.0 1st Qu.: 6.0
#> Median : 9.0 Median : 8.0
#> Mean :10.3 Mean : 9.6
#> 3rd Qu.:11.5 3rd Qu.:12.0
#> Max. :21.0 Max. :19.0
head(Cd.BeetBarley)
## redbeet barley
## 1 18 8
## 2 5 5
## 3 10 10
## 4 8 19
## 5 16 15
## 6 12 18
#> redbeet barley
#> 1 18 8
#> 2 5 5
#> 3 10 10
#> 4 8 19
#> 5 16 15
#> 6 12 18
with(Cd.BeetBarley,boxplot(redbeet,barley,
col= "red",
main= "Phytoremediation Efficiency of Crop Plants",
xlab= "Crop type", ylab= "Cadmium reduction (%)",
names= c("Redbeet","Barley"),
ylim= c(0,25), las= 1,
boxwex=0.6))
Figure 12.1: Box plot comparing the phytoremediation efficiency of
redbeet and barley crop plants for removing cadmium from contaminated
soil at depths of 0 to 20 cm
with(Cd.BeetBarley, var.test(redbeet, barley))
##
## F test to compare two variances
##
## data: redbeet and barley
## F = 0.97888, num df = 14, denom df = 14, p-value = 0.9687
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.3286377 2.9156685
## sample estimates:
## ratio of variances
## 0.9788762
#>
#> F test to compare two variances
#>
#> data: redbeet and barley
#> F = 1, num df = 14, denom df = 14, p-value = 1
#> alternative hypothesis: true ratio of variances is not equal to 1
#> 95 percent confidence interval:
#> 0.329 2.916
#> sample estimates:
#> ratio of variances
#> 0.979
with(Cd.BeetBarley, t.test(redbeet, barley, var.equal = TRUE))
##
## Two Sample t-test
##
## data: redbeet and barley
## t = 0.38658, df = 28, p-value = 0.702
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.865852 4.199185
## sample estimates:
## mean of x mean of y
## 10.26667 9.60000
#>
#> Two Sample t-test
#>
#> data: redbeet and barley
#> t = 0.4, df = 28, p-value = 0.7
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#> -2.87 4.20
#> sample estimates:
#> mean of x mean of y
#> 10.3 9.6
# read in our data (long format)
Cd.CabbageMaize <- data.frame(remed.pcnt = c(46, 50, 44, 44, 43, 52, 48, 24, 51,
29, 53, 32, 61, 59, 35, 34, 26, 44, 17, 34, 19, 34, 34, 43, 18, 34, 27, 27, 53,
30), plt.typ = c(rep("cabbage", times = 15), rep("maize", times = 15)))
# get summary & check data structure
str(Cd.CabbageMaize)
## 'data.frame': 30 obs. of 2 variables:
## $ remed.pcnt: num 46 50 44 44 43 52 48 24 51 29 ...
## $ plt.typ : chr "cabbage" "cabbage" "cabbage" "cabbage" ...
#> 'data.frame': 30 obs. of 2 variables:
#> $ remed.pcnt: num 46 50 44 44 43 52 48 24 51 29 ...
#> $ plt.typ : chr "cabbage" "cabbage" "cabbage" "cabbage" ...
summary(Cd.CabbageMaize)
## remed.pcnt plt.typ
## Min. :17.00 Length:30
## 1st Qu.:29.25 Class :character
## Median :34.50 Mode :character
## Mean :38.17
## 3rd Qu.:47.50
## Max. :61.00
#> remed.pcnt plt.typ
#> Min. :17.0 Length:30
#> 1st Qu.:29.2 Class :character
#> Median :34.5 Mode :character
#> Mean :38.2
#> 3rd Qu.:47.5
#> Max. :61.0
head(Cd.CabbageMaize)
## remed.pcnt plt.typ
## 1 46 cabbage
## 2 50 cabbage
## 3 44 cabbage
## 4 44 cabbage
## 5 43 cabbage
## 6 52 cabbage
#> remed.pcnt plt.typ
#> 1 46 cabbage
#> 2 50 cabbage
#> 3 44 cabbage
#> 4 44 cabbage
#> 5 43 cabbage
#> 6 52 cabbage
# Note we don't NEED to give names for boxes if the data is in long format
# This is an advantage of the long format approach
with(Cd.CabbageMaize,boxplot(remed.pcnt~plt.typ,
col= "pink",
main= "Phytoremediation Efficiency of Crop Plants",
xlab= "Crop type", ylab= "Cadmium reduction (%)",
ylim= c(10,70), las= 1, boxwex=.6))
Figure 12.2: Box plot comparing the phytoremediation efficiency of
cabbage and maize crop plants for removing cadmium from contaminated
soil at depths of 20 to 40 cm.
with(Cd.CabbageMaize, var.test(remed.pcnt ~ plt.typ)) # long format, use ~ not ,
##
## F test to compare two variances
##
## data: remed.pcnt by plt.typ
## F = 1.1449, num df = 14, denom df = 14, p-value = 0.8037
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.3843653 3.4100823
## sample estimates:
## ratio of variances
## 1.144866
#>
#> F test to compare two variances
#>
#> data: remed.pcnt by plt.typ
#> F = 1, num df = 14, denom df = 14, p-value = 0.8
#> alternative hypothesis: true ratio of variances is not equal to 1
#> 95 percent confidence interval:
#> 0.384 3.410
#> sample estimates:
#> ratio of variances
#> 1.14
with(Cd.CabbageMaize, t.test(remed.pcnt ~ plt.typ, var.equal = TRUE))
##
## Two Sample t-test
##
## data: remed.pcnt by plt.typ
## t = 3.4687, df = 28, p-value = 0.00171
## alternative hypothesis: true difference in means between group cabbage and group maize is not equal to 0
## 95 percent confidence interval:
## 5.377502 20.889165
## sample estimates:
## mean in group cabbage mean in group maize
## 44.73333 31.60000
#>
#> Two Sample t-test
#>
#> data: remed.pcnt by plt.typ
#> t = 3, df = 28, p-value = 0.002
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#> 5.38 20.89
#> sample estimates:
#> mean in group cabbage mean in group maize
#> 44.7 31.6