library(dplyr)
library(knitr)
library(kableExtra)
library(ggplot2)

Problem 3.1

x1 <- c(5.60, 6.80, 8.32, 8.70, 7.64, 7.44, 7.48, 7.80, 7.72, 8.40, 6.98, 8.00)
x2 <- c(5.04, 7.38, 5.56, 6.96, 8.30, 6.86, 5.62, 7.22, 5.72, 6.40, 7.54, 7.50)
x3 <- c(8.36, 7.04, 6.92, 8.18, 6.20, 6.10, 2.75, 8.14, 9.00, 8.64, 6.60, 8.18)
x4 <- c(8.30, 8.54, 7.68, 8.92, 8.46, 7.38, 8.08, 8.12, 8.68, 8.24, 8.09, 8.06)
x <- c(x1, x2, x3, x4)
operators <- as.factor(rep(c("Johnson", "Gina", "Charles", "Eva"), each = 12))
y <- data.frame(ww = x, op = operators)
ggplot(y) + geom_boxplot(aes(x = op,y = ww), fill = c("#FF8888","#9999FF","#FFDD55","#99FF33"))+ 
  xlab("Operators")+
  ylab("shear strengths")

lm(x ~ operators)
## 
## Call:
## lm(formula = x ~ operators)
## 
## Coefficients:
##      (Intercept)      operatorsEva     operatorsGina  operatorsJohnson  
##           7.1758            1.0367           -0.5008            0.3975
summary(lm(x ~ operators)) 
## 
## Call:
## lm(formula = x ~ operators)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4258 -0.5433  0.0771  0.7173  1.8242 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        7.1758     0.3153  22.755   <2e-16 ***
## operatorsEva       1.0367     0.4460   2.325   0.0248 *  
## operatorsGina     -0.5008     0.4460  -1.123   0.2675    
## operatorsJohnson   0.3975     0.4460   0.891   0.3776    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.092 on 44 degrees of freedom
## Multiple R-squared:  0.2244, Adjusted R-squared:  0.1715 
## F-statistic: 4.243 on 3 and 44 DF,  p-value: 0.0102
p31 <- anova(lm(x ~ operators))
p31 %>% kable(., "html") %>%
  kable_styling(bootstrap_options = "striped", full_width = F)
Df Sum Sq Mean Sq F value Pr(>F)
operators 3 15.18888 5.062961 4.24272 0.0101966
Residuals 44 52.50648 1.193329 NA NA

Problem 3.2

N <- c(35,37,49,46,63,39,46,56,63,65,56,65,70,63,65,70,77,81,86,70,70,77,77,81,77)
p1 <- c(40,37,44,47,47,47,68,47,54,61,71,75,89,58,59,62,79,96,58,62,70,72,75,96,75)
v1 <- c(46,42,65,46,58,42,48,58,50,80,63,65,70,70,72,97,46,56,70,70,72,76,90,76,92)
p8 <- c(21,40,44,54,36,40,56,60,48,53,60,60,65,68,60,81,81,48,48,56,68,75,81,48,68)
v8 <- c(16,19,19,32,33,33,30,42,42,33,26,30,40,54,34,34,47,47,42,47,54,54,56,60,44)
x <- c(N,p1,v1,p8,v8)
flys <- as.factor(rep(c("None", "1 pregnant", "1 virgin", "8 pregnant", "8 virgin"), each = 25))
y <- data.frame(ww = x, op = flys)
ggplot(y) + geom_boxplot(aes(x = op,y = ww), fill = c("#FF8888","#9999FF","#FFDD55","#99FF33","#D28EFF"))+ 
  xlab("Companions")+
  ylab("Longevity")

lm(x ~ flys)
## 
## Call:
## lm(formula = x ~ flys)
## 
## Coefficients:
##    (Intercept)    flys1 virgin  flys8 pregnant    flys8 virgin  
##          63.56            1.24           -6.80          -24.84  
##       flysNone  
##          -0.20
summary(lm(x ~ flys))
## 
## Call:
## lm(formula = x ~ flys)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -35.76  -8.76   0.20  11.20  32.44 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      63.560      2.962  21.461  < 2e-16 ***
## flys1 virgin      1.240      4.188   0.296    0.768    
## flys8 pregnant   -6.800      4.188  -1.624    0.107    
## flys8 virgin    -24.840      4.188  -5.931 2.98e-08 ***
## flysNone         -0.200      4.188  -0.048    0.962    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.81 on 120 degrees of freedom
## Multiple R-squared:  0.3121, Adjusted R-squared:  0.2892 
## F-statistic: 13.61 on 4 and 120 DF,  p-value: 3.516e-09
anova(lm(x ~ flys)) %>% kable(., "html") %>%
  kable_styling(bootstrap_options = "striped", full_width = F)
Df Sum Sq Mean Sq F value Pr(>F)
flys 4 11939.28 2984.8200 13.61195 0
Residuals 120 26313.52 219.2793 NA NA

Exercise 4.3

Refer to the data in Problem 3.1. Workers 1 and 2 were experienced, whereas workers 3 and 4 were novices. Find a contrast to compare the experienced and novice workers and test the null hypothesis that experienced and novice works produce the same average shear strength.

x1 <- c(5.60, 6.80, 8.32, 8.70, 7.64, 7.44, 7.48, 7.80, 7.72, 8.40, 6.98, 8.00)
x2 <- c(5.04, 7.38, 5.56, 6.96, 8.30, 6.86, 5.62, 7.22, 5.72, 6.40, 7.54, 7.50)
x3 <- c(8.36, 7.04, 6.92, 8.18, 6.20, 6.10, 2.75, 8.14, 9.00, 8.64, 6.60, 8.18)
x4 <- c(8.30, 8.54, 7.68, 8.92, 8.46, 7.38, 8.08, 8.12, 8.68, 8.24, 8.09, 8.06)
x <- c(x1, x2, x3, x4)
operators <- as.factor(rep(c("Johnson", "Gina", "Charles", "Eva"), each = 12))
boxplot(x ~ operators, ylab = "Strength of the pins", xlab = "Operators")

lm(x ~ operators)
## 
## Call:
## lm(formula = x ~ operators)
## 
## Coefficients:
##      (Intercept)      operatorsEva     operatorsGina  operatorsJohnson  
##           7.1758            1.0367           -0.5008            0.3975
summary(lm(x ~ operators)) 
## 
## Call:
## lm(formula = x ~ operators)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -4.4258 -0.5433  0.0771  0.7173  1.8242 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        7.1758     0.3153  22.755   <2e-16 ***
## operatorsEva       1.0367     0.4460   2.325   0.0248 *  
## operatorsGina     -0.5008     0.4460  -1.123   0.2675    
## operatorsJohnson   0.3975     0.4460   0.891   0.3776    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.092 on 44 degrees of freedom
## Multiple R-squared:  0.2244, Adjusted R-squared:  0.1715 
## F-statistic: 4.243 on 3 and 44 DF,  p-value: 0.0102
p31 <- anova(lm(x ~ operators))
w <- c(1, 1, -1, -1)
mse <- p31$`Mean Sq`[2]
x <- matrix(x,ncol=12,byrow = T) %>% apply(.,1,mean)
t0 <- (w %*% x)/sqrt(mse*sum(w^2/12))
t0
##           [,1]
## [1,] -1.807529
quantt <- qt(0.975,48-4)
quantt
## [1] 2.015368
# test 
abs(t0) >= quantt
##       [,1]
## [1,] FALSE
# do not reject
ifelse(abs(t0) >= quantt,print(paste0('t = ',t0, ">=",quantt,'. Reject H0')),print(paste0('t = ',t0, "<",quantt,'. Do not reject H0')))
## [1] "t = -1.807529320835<2.01536757444376. Do not reject H0"
##      [,1]                                                    
## [1,] "t = -1.807529320835<2.01536757444376. Do not reject H0"

Problem 4.2 Fly away~~

(a) 1 vs 8 female fly

p1 <- c(40,37,44,47,47,47,68,47,54,61,71,75,89,58,59,62,79,96,58,62,70,72,75,96,75)
v1 <- c(46,42,65,46,58,42,48,58,50,80,63,65,70,70,72,97,46,56,70,70,72,76,90,76,92)
p8 <- c(21,40,44,54,36,40,56,60,48,53,60,60,65,68,60,81,81,48,48,56,68,75,81,48,68)
v8 <- c(16,19,19,32,33,33,30,42,42,33,26,30,40,54,34,34,47,47,42,47,54,54,56,60,44)
x <- c(p1,v1,p8,v8)
flys <- as.factor(rep(c("1 pregnant", "1 virgin", "8 pregnant", "8 virgin"), each = 25))
boxplot(x ~ flys, ylab = "Longevity (days)", xlab = "Companions")

lm(x ~ flys)
## 
## Call:
## lm(formula = x ~ flys)
## 
## Coefficients:
##    (Intercept)    flys1 virgin  flys8 pregnant    flys8 virgin  
##          63.56            1.24           -6.80          -24.84
summary(lm(x ~ flys))
## 
## Call:
## lm(formula = x ~ flys)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -35.76  -8.77  -0.28   9.13  32.44 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      63.560      2.975  21.366  < 2e-16 ***
## flys1 virgin      1.240      4.207   0.295    0.769    
## flys8 pregnant   -6.800      4.207  -1.616    0.109    
## flys8 virgin    -24.840      4.207  -5.904 5.34e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.87 on 96 degrees of freedom
## Multiple R-squared:  0.338,  Adjusted R-squared:  0.3173 
## F-statistic: 16.34 on 3 and 96 DF,  p-value: 1.177e-08
pfly <- anova(lm(x ~ flys)) 
pfly%>% kable(., "html") %>%
  kable_styling(bootstrap_options = "striped", full_width = F)
Df Sum Sq Mean Sq F value Pr(>F)
flys 3 10844.08 3614.6933 16.33778 0
Residuals 96 21239.76 221.2475 NA NA
w <- c(1, 1, -1, -1)
mse <- pfly$`Mean Sq`[2]
x <- matrix(x,ncol=25,byrow = T) %>% apply(.,1,mean)
t0 <- (w %*% x)/sqrt(mse*sum(w^2/25))
t0
##          [,1]
## [1,] 5.526277
quantt <- qt(0.975,100-4)
quantt
## [1] 1.984984
# test 
abs(t0) >= quantt
##      [,1]
## [1,] TRUE
ifelse(abs(t0) >= quantt,print(paste0('t = ',t0, ">=",quantt,'. Reject H0')),print(paste0('t = ',t0, "<",quantt,'. Do not reject H0')))
## [1] "t = 5.52627676761298>=1.98498431152246. Reject H0"
##      [,1]                                               
## [1,] "t = 5.52627676761298>=1.98498431152246. Reject H0"

(b) virgin vs pregant

p1 <- c(40,37,44,47,47,47,68,47,54,61,71,75,89,58,59,62,79,96,58,62,70,72,75,96,75)
v1 <- c(46,42,65,46,58,42,48,58,50,80,63,65,70,70,72,97,46,56,70,70,72,76,90,76,92)
p8 <- c(21,40,44,54,36,40,56,60,48,53,60,60,65,68,60,81,81,48,48,56,68,75,81,48,68)
v8 <- c(16,19,19,32,33,33,30,42,42,33,26,30,40,54,34,34,47,47,42,47,54,54,56,60,44)
x <- c(p1,v1,p8,v8)
flys <- as.factor(rep(c("1 pregnant", "1 virgin", "8 pregnant", "8 virgin"), each = 25))
boxplot(x ~ flys, ylab = "Longevity (days)", xlab = "Companions")

lm(x ~ flys)
## 
## Call:
## lm(formula = x ~ flys)
## 
## Coefficients:
##    (Intercept)    flys1 virgin  flys8 pregnant    flys8 virgin  
##          63.56            1.24           -6.80          -24.84
summary(lm(x ~ flys))
## 
## Call:
## lm(formula = x ~ flys)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -35.76  -8.77  -0.28   9.13  32.44 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      63.560      2.975  21.366  < 2e-16 ***
## flys1 virgin      1.240      4.207   0.295    0.769    
## flys8 pregnant   -6.800      4.207  -1.616    0.109    
## flys8 virgin    -24.840      4.207  -5.904 5.34e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.87 on 96 degrees of freedom
## Multiple R-squared:  0.338,  Adjusted R-squared:  0.3173 
## F-statistic: 16.34 on 3 and 96 DF,  p-value: 1.177e-08
pfly <- anova(lm(x ~ flys)) 
pfly%>% kable(., "html") %>%
  kable_styling(bootstrap_options = "striped", full_width = F)
Df Sum Sq Mean Sq F value Pr(>F)
flys 3 10844.08 3614.6933 16.33778 0
Residuals 96 21239.76 221.2475 NA NA
w <- c(1, -1, 1, -1)
mse <- pfly$`Mean Sq`[2]
x <- matrix(x,ncol=25,byrow = T) %>% apply(.,1,mean)
t0 <- (w %*% x)/sqrt(mse*sum(w^2/25))
t0
##          [,1]
## [1,] 2.823645
quantt <- qt(0.975,100-4)
quantt
## [1] 1.984984
# test 
abs(t0) >= quantt
##      [,1]
## [1,] TRUE
# reject
ifelse(abs(t0) >= quantt,print(paste0('t = ',t0, ">=",quantt,'. Reject H0')),print(paste0('t = ',t0, "<",quantt,'. Do not reject H0')))
## [1] "t = 2.82364506374386>=1.98498431152246. Reject H0"
##      [,1]                                               
## [1,] "t = 2.82364506374386>=1.98498431152246. Reject H0"

(c) non vs more

N <- c(35,37,49,46,63,39,46,56,63,65,56,65,70,63,65,70,77,81,86,70,70,77,77,81,77)
p1 <- c(40,37,44,47,47,47,68,47,54,61,71,75,89,58,59,62,79,96,58,62,70,72,75,96,75)
v1 <- c(46,42,65,46,58,42,48,58,50,80,63,65,70,70,72,97,46,56,70,70,72,76,90,76,92)
p8 <- c(21,40,44,54,36,40,56,60,48,53,60,60,65,68,60,81,81,48,48,56,68,75,81,48,68)
v8 <- c(16,19,19,32,33,33,30,42,42,33,26,30,40,54,34,34,47,47,42,47,54,54,56,60,44)
x <- c(N,p1,v1,p8,v8)
flys <- as.factor(rep(c("None", "1 pregnant", "1 virgin", "8 pregnant", "8 virgin"), each = 25))
boxplot(x ~ flys, ylab = "Longevity (days)", xlab = "Companions")

lm(x ~ flys)
## 
## Call:
## lm(formula = x ~ flys)
## 
## Coefficients:
##    (Intercept)    flys1 virgin  flys8 pregnant    flys8 virgin  
##          63.56            1.24           -6.80          -24.84  
##       flysNone  
##          -0.20
summary(lm(x ~ flys))
## 
## Call:
## lm(formula = x ~ flys)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -35.76  -8.76   0.20  11.20  32.44 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      63.560      2.962  21.461  < 2e-16 ***
## flys1 virgin      1.240      4.188   0.296    0.768    
## flys8 pregnant   -6.800      4.188  -1.624    0.107    
## flys8 virgin    -24.840      4.188  -5.931 2.98e-08 ***
## flysNone         -0.200      4.188  -0.048    0.962    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 14.81 on 120 degrees of freedom
## Multiple R-squared:  0.3121, Adjusted R-squared:  0.2892 
## F-statistic: 13.61 on 4 and 120 DF,  p-value: 3.516e-09
pfly <- anova(lm(x ~ flys)) 
pfly%>% kable(., "html") %>%
  kable_styling(bootstrap_options = "striped", full_width = F)
Df Sum Sq Mean Sq F value Pr(>F)
flys 4 11939.28 2984.8200 13.61195 0
Residuals 120 26313.52 219.2793 NA NA
w <- c(1,-1/4,-1/4,-1/4,-1/4)
mse <- pfly$`Mean Sq`[2]
x <- matrix(x,ncol=25,byrow = T) %>% apply(.,1,mean)
t0 <- (w %*% x)/sqrt(mse*sum(w^2/25))
t0
##          [,1]
## [1,] 2.234847
quantt <- qt(0.975,125-5)
quantt
## [1] 1.97993
# test 
abs(t0) >= quantt
##      [,1]
## [1,] TRUE
# reject
ifelse(abs(t0) >= quantt,print(paste0('t = ',t0, ">=",quantt,'. Reject H0')),print(paste0('t = ',t0, "<",quantt,'. Do not reject H0')))
## [1] "t = 2.23484736153612>=1.97993040508242. Reject H0"
##      [,1]                                               
## [1,] "t = 2.23484736153612>=1.97993040508242. Reject H0"