Chapter 10 - Two Sample t-Test

12.1 Example with Wide Format Data

# read in our data (wide format) 
Cd.BeetBarley<- data.frame(
  redbeet= c(18, 5, 10, 8, 16, 12, 8, 8, 11, 5, 6, 8, 9, 21, 9), 
  barley= c(8, 5, 10, 19, 15, 18, 11, 8, 9, 4, 5, 13, 7, 5, 7))
# three ways to look at the data structure
str(Cd.BeetBarley)
## 'data.frame':    15 obs. of  2 variables:
##  $ redbeet: num  18 5 10 8 16 12 8 8 11 5 ...
##  $ barley : num  8 5 10 19 15 18 11 8 9 4 ...
#> 'data.frame':    15 obs. of  2 variables:
#>  $ redbeet: num  18 5 10 8 16 12 8 8 11 5 ...
#>  $ barley : num  8 5 10 19 15 18 11 8 9 4 ...
summary(Cd.BeetBarley)
##     redbeet          barley    
##  Min.   : 5.00   Min.   : 4.0  
##  1st Qu.: 8.00   1st Qu.: 6.0  
##  Median : 9.00   Median : 8.0  
##  Mean   :10.27   Mean   : 9.6  
##  3rd Qu.:11.50   3rd Qu.:12.0  
##  Max.   :21.00   Max.   :19.0
#>     redbeet         barley    
#>  Min.   : 5.0   Min.   : 4.0  
#>  1st Qu.: 8.0   1st Qu.: 6.0  
#>  Median : 9.0   Median : 8.0  
#>  Mean   :10.3   Mean   : 9.6  
#>  3rd Qu.:11.5   3rd Qu.:12.0  
#>  Max.   :21.0   Max.   :19.0
head(Cd.BeetBarley) 
##   redbeet barley
## 1      18      8
## 2       5      5
## 3      10     10
## 4       8     19
## 5      16     15
## 6      12     18
#>   redbeet barley
#> 1      18      8
#> 2       5      5
#> 3      10     10
#> 4       8     19
#> 5      16     15
#> 6      12     18
with(Cd.BeetBarley,boxplot(redbeet,barley,
     col= "red",   
     main= "Phytoremediation Efficiency of Crop Plants",
     xlab= "Crop type", ylab= "Cadmium reduction (%)", 
     names= c("Redbeet","Barley"),
     ylim=  c(0,25), las= 1,
     boxwex=0.6)) 

Figure 12.1: Box plot comparing the phytoremediation efficiency of redbeet and barley crop plants for removing cadmium from contaminated soil at depths of 0 to 20 cm

with(Cd.BeetBarley, var.test(redbeet, barley))
## 
##  F test to compare two variances
## 
## data:  redbeet and barley
## F = 0.97888, num df = 14, denom df = 14, p-value = 0.9687
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.3286377 2.9156685
## sample estimates:
## ratio of variances 
##          0.9788762
#> 
#>  F test to compare two variances
#> 
#> data:  redbeet and barley
#> F = 1, num df = 14, denom df = 14, p-value = 1
#> alternative hypothesis: true ratio of variances is not equal to 1
#> 95 percent confidence interval:
#>  0.329 2.916
#> sample estimates:
#> ratio of variances 
#>              0.979
with(Cd.BeetBarley, t.test(redbeet, barley, var.equal = TRUE))
## 
##  Two Sample t-test
## 
## data:  redbeet and barley
## t = 0.38658, df = 28, p-value = 0.702
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -2.865852  4.199185
## sample estimates:
## mean of x mean of y 
##  10.26667   9.60000
#> 
#>    Two Sample t-test
#> 
#> data:  redbeet and barley
#> t = 0.4, df = 28, p-value = 0.7
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#>  -2.87  4.20
#> sample estimates:
#> mean of x mean of y 
#>      10.3       9.6
# read in our data (long format)
Cd.CabbageMaize <- data.frame(remed.pcnt = c(46, 50, 44, 44, 43, 52, 48, 24, 51, 
    29, 53, 32, 61, 59, 35, 34, 26, 44, 17, 34, 19, 34, 34, 43, 18, 34, 27, 27, 53, 
    30), plt.typ = c(rep("cabbage", times = 15), rep("maize", times = 15)))
# get summary & check data structure
str(Cd.CabbageMaize)
## 'data.frame':    30 obs. of  2 variables:
##  $ remed.pcnt: num  46 50 44 44 43 52 48 24 51 29 ...
##  $ plt.typ   : chr  "cabbage" "cabbage" "cabbage" "cabbage" ...
#> 'data.frame':    30 obs. of  2 variables:
#>  $ remed.pcnt: num  46 50 44 44 43 52 48 24 51 29 ...
#>  $ plt.typ   : chr  "cabbage" "cabbage" "cabbage" "cabbage" ...
summary(Cd.CabbageMaize)
##    remed.pcnt      plt.typ         
##  Min.   :17.00   Length:30         
##  1st Qu.:29.25   Class :character  
##  Median :34.50   Mode  :character  
##  Mean   :38.17                     
##  3rd Qu.:47.50                     
##  Max.   :61.00
#>    remed.pcnt     plt.typ         
#>  Min.   :17.0   Length:30         
#>  1st Qu.:29.2   Class :character  
#>  Median :34.5   Mode  :character  
#>  Mean   :38.2                     
#>  3rd Qu.:47.5                     
#>  Max.   :61.0
head(Cd.CabbageMaize)
##   remed.pcnt plt.typ
## 1         46 cabbage
## 2         50 cabbage
## 3         44 cabbage
## 4         44 cabbage
## 5         43 cabbage
## 6         52 cabbage
#>   remed.pcnt plt.typ
#> 1         46 cabbage
#> 2         50 cabbage
#> 3         44 cabbage
#> 4         44 cabbage
#> 5         43 cabbage
#> 6         52 cabbage
# Note we don't NEED to give names for boxes if the data is in long format
# This is an advantage of the long format approach
with(Cd.CabbageMaize,boxplot(remed.pcnt~plt.typ, 
     col= "pink",                          
     main= "Phytoremediation Efficiency of Crop Plants",
     xlab= "Crop type", ylab= "Cadmium reduction (%)",
     ylim=  c(10,70), las= 1, boxwex=.6)) 

Figure 12.2: Box plot comparing the phytoremediation efficiency of cabbage and maize crop plants for removing cadmium from contaminated soil at depths of 20 to 40 cm.

with(Cd.CabbageMaize, var.test(remed.pcnt ~ plt.typ))  # long format, use ~ not ,
## 
##  F test to compare two variances
## 
## data:  remed.pcnt by plt.typ
## F = 1.1449, num df = 14, denom df = 14, p-value = 0.8037
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.3843653 3.4100823
## sample estimates:
## ratio of variances 
##           1.144866
#> 
#>  F test to compare two variances
#> 
#> data:  remed.pcnt by plt.typ
#> F = 1, num df = 14, denom df = 14, p-value = 0.8
#> alternative hypothesis: true ratio of variances is not equal to 1
#> 95 percent confidence interval:
#>  0.384 3.410
#> sample estimates:
#> ratio of variances 
#>               1.14
with(Cd.CabbageMaize, t.test(remed.pcnt ~ plt.typ, var.equal = TRUE))
## 
##  Two Sample t-test
## 
## data:  remed.pcnt by plt.typ
## t = 3.4687, df = 28, p-value = 0.00171
## alternative hypothesis: true difference in means between group cabbage and group maize is not equal to 0
## 95 percent confidence interval:
##   5.377502 20.889165
## sample estimates:
## mean in group cabbage   mean in group maize 
##              44.73333              31.60000
#> 
#>  Two Sample t-test
#> 
#> data:  remed.pcnt by plt.typ
#> t = 3, df = 28, p-value = 0.002
#> alternative hypothesis: true difference in means is not equal to 0
#> 95 percent confidence interval:
#>   5.38 20.89
#> sample estimates:
#> mean in group cabbage   mean in group maize 
#>                  44.7                  31.6