Design Homework 12

library(car)

## Loading required package: carData

Dat1 <- read.table("https://raw.githubusercontent.com/athienit/STA4211material/main/KutnerData/Chapter%2026%20Data%20Sets/CH26PR04.txt",
    header = T)
colnames(Dat1) = c("Cases", "Machine", "Operator", "Day")

Dat2 <- data.frame(Machine = factor(Dat1$Machine, labels = c("M1",
    "M2", "M3")), Operator = factor(Dat1$Operator, labels = c("Op1",
    "Op2", "Op3", "Op4")), Day = factor(Dat1$Day, levels = c("Day 1",
    "Day 2", "Day 3", "Day 4", "Day 5")), Cases = Dat1$Cases)
attach(Dat2)

Anova1 <- aov(Cases ~ Machine + Machine:Operator)
Anova1$residuals

##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
## -3.0  2.0 -4.0  5.0  0.2 -5.8  7.2 -3.8  2.2 -6.6  2.4 -4.6  7.4  1.4 -7.6  3.4 
##   17   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32 
##  1.4 -4.6  7.4 -1.8  5.2  0.2  4.2 -7.8 -6.2  0.8  4.8  2.8 -2.2 -3.8  0.2  6.2 
##   33   34   35   36   37   38   39   40   41   42   43   44   45   46   47   48 
##  2.2 -4.8 -4.0  1.0  6.0 -2.0 -1.0 -7.8  6.2 -2.8  1.2  3.2 -6.6  0.4  2.4 -1.6 
##   49   50   51   52   53   54   55   56   57   58   59 
##  5.4  6.6 -2.4 -1.4  1.6 -4.4 -6.4  5.6 -0.4  3.6 -2.4

Standard1 <- rstandard(Anova1)
qqnorm(Standard1, datax = TRUE)
qqline(Standard1, datax = TRUE)

shapiro.test(Standard1)

## 
##  Shapiro-Wilk normality test
## 
## data:  Standard1
## W = 0.96285, p-value = 0.06886

### The P-value for the normality test of .06866 is larger
### than .05 but smaller than .1 so we have some evidence
### for and against normality. If we proceed conservatively
### we would reject the assumption of normality and
### conclude that this this model is not well suited. We
### could refit using a different model and method. ###

plot(Standard1[1:19])  ### Machine 1 ###

plot(Standard1[20:39])  ### Machine 2 ###

plot(Standard1[40:59])  ### Machine 3 ###

### No they can not since the model is nested. You have the same group of operators working on the same machine on the same shift so the effect from shift and operator should be roughly the same ###

### Plot data
plot(Machine, Cases, ylab = "Caser per Hour", xlab = "Machines")
abline(h = mean(Cases), col = 2)

summary(Anova1)

##                  Df Sum Sq Mean Sq F value   Pr(>F)    
## Machine           2   1698   848.9   35.63 3.83e-10 ***
## Machine:Operator  9   2270   252.2   10.58 9.76e-09 ***
## Residuals        47   1120    23.8                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

### The interaction between Machine and operator is
### significant. ###

$H_0: \forall i \in [1,2,3] \alpha_i = 0$ vs. $H_a: \exists i \in [1,2,3] : \alpha_i \ne 0$.

$TS = \frac {MSA}{MSE}$ $H_0 \sim F(df_a, df_e)$. $F(1-\alpha, df_a, df_e)$.

$TS = \frac {847.8}{23.6} = 35.92$. $F^* = qf(0.99, 2, 48) = 5.076664$. TS > F*. We reject the null hypothesis with a p-value of [1 - pf(35.92, 2, 48) =] $2.906 * 10^{-10}$ Conclude that $\exists i \in [1,2,3] : \alpha_i \ne 0$.

$H_0:$ all $\beta_{j(i)} = 0$ vs. $H_a:$ not all $\beta_{j(i)} = 0$.

$TS = \frac {MSB(A)}{MSE}$ $H_0 \sim F(df_{B(A)}, df_e)$. We reject the null if TS > F* = $F(1-\alpha, df_{B(A)}, df_e)$.

$TS = \frac {252.5}{23.6} = 10.699$. $F^* = qf(0.99, 9, 48) = 2.801816$. TS > F*. We reject the null hypothesis with a p-value of [1 - pf(10.699, 9, 48) =] $6.985 * 10^{-9}$ and conclude that not all $\beta_{j(i)} = 0$.

Machine three is statistically different.

Operator.As.Factor <- factor(Dat1$Operator, levels = 1:4)
Machine.As.Factor <- factor(Dat1$Machine, levels = 1:3)
Day.As.Factor <- factor(Dat1$Day, levels = 1:5)
Cases2 <- Dat1$Cases
Dat3 <- data.frame(Cases2, Operator.As.Factor, Machine.As.Factor,
    Day.As.Factor)

Anova2 <- anova(aov(Cases2 ~ Operator.As.Factor, data = subset(Dat3,
    sub = Machine.As.Factor == 1)))
Anova2

## Analysis of Variance Table
## 
## Response: Cases2
##                    Df Sum Sq Mean Sq F value   Pr(>F)   
## Operator.As.Factor  3  596.8  198.93  6.8883 0.003879 **
## Residuals          15  433.2   28.88                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(Anova2)

##        Df         Sum Sq         Mean Sq          F value     
##  Min.   : 3   Min.   :433.2   Min.   : 28.88   Min.   :6.888  
##  1st Qu.: 6   1st Qu.:474.1   1st Qu.: 71.39   1st Qu.:6.888  
##  Median : 9   Median :515.0   Median :113.91   Median :6.888  
##  Mean   : 9   Mean   :515.0   Mean   :113.91   Mean   :6.888  
##  3rd Qu.:12   3rd Qu.:555.9   3rd Qu.:156.42   3rd Qu.:6.888  
##  Max.   :15   Max.   :596.8   Max.   :198.93   Max.   :6.888  
##                                                NA's   :1      
##      Pr(>F)        
##  Min.   :0.003879  
##  1st Qu.:0.003879  
##  Median :0.003879  
##  Mean   :0.003879  
##  3rd Qu.:0.003879  
##  Max.   :0.003879  
##  NA's   :1

Anova3 <- anova(aov(Cases2 ~ Operator.As.Factor, data = subset(Dat3,
    sub = Machine.As.Factor == 2)))
Anova3

## Analysis of Variance Table
## 
## Response: Cases2
##                    Df Sum Sq Mean Sq F value    Pr(>F)    
## Operator.As.Factor  3 1538.5  512.85  25.452 2.492e-06 ***
## Residuals          16  322.4   20.15                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

summary(Anova3)

##        Df            Sum Sq          Mean Sq          F value     
##  Min.   : 3.00   Min.   : 322.4   Min.   : 20.15   Min.   :25.45  
##  1st Qu.: 6.25   1st Qu.: 626.4   1st Qu.:143.32   1st Qu.:25.45  
##  Median : 9.50   Median : 930.5   Median :266.50   Median :25.45  
##  Mean   : 9.50   Mean   : 930.5   Mean   :266.50   Mean   :25.45  
##  3rd Qu.:12.75   3rd Qu.:1234.5   3rd Qu.:389.68   3rd Qu.:25.45  
##  Max.   :16.00   Max.   :1538.5   Max.   :512.85   Max.   :25.45  
##                                                    NA's   :1      
##      Pr(>F)       
##  Min.   :2.5e-06  
##  1st Qu.:2.5e-06  
##  Median :2.5e-06  
##  Mean   :2.5e-06  
##  3rd Qu.:2.5e-06  
##  Max.   :2.5e-06  
##  NA's   :1

Anova4 <- anova(aov(Cases2 ~ Operator.As.Factor, data = subset(Dat3,
    sub = Machine.As.Factor == 3)))
Anova4

## Analysis of Variance Table
## 
## Response: Cases2
##                    Df Sum Sq Mean Sq F value Pr(>F)
## Operator.As.Factor  3 134.55  44.850  1.9693 0.1593
## Residuals          16 364.40  22.775

summary(Anova4)

##        Df            Sum Sq         Mean Sq         F value     
##  Min.   : 3.00   Min.   :134.6   Min.   :22.77   Min.   :1.969  
##  1st Qu.: 6.25   1st Qu.:192.0   1st Qu.:28.29   1st Qu.:1.969  
##  Median : 9.50   Median :249.5   Median :33.81   Median :1.969  
##  Mean   : 9.50   Mean   :249.5   Mean   :33.81   Mean   :1.969  
##  3rd Qu.:12.75   3rd Qu.:306.9   3rd Qu.:39.33   3rd Qu.:1.969  
##  Max.   :16.00   Max.   :364.4   Max.   :44.85   Max.   :1.969  
##                                                  NA's   :1      
##      Pr(>F)      
##  Min.   :0.1593  
##  1st Qu.:0.1593  
##  Median :0.1593  
##  Mean   :0.1593  
##  3rd Qu.:0.1593  
##  Max.   :0.1593  
##  NA's   :1

$H_0: \forall i \in [1,2,3,4] \Beta_j(1) = 0$ vs. $H_a: \exists i \in [1,2,3,4] : \Beta_j(1) \ne 0$.

$TS = \frac {MSB(1)}{MSE}$ $H_0 \sim F(df_b(1), df_e)$. $F(1-\alpha, df_b(1), df_e)$.

$TS = \frac {199.733}{23.6} = 8.46$. $F^* = qf(0.99, 3, 48) = 4.218$. TS > F*. We reject the null hypothesis with a p-value of [1 - pf(4.218, 3, 48) =] $.01$ Conclude that $\exists i \in [1,2,3,4] : \Beta_j(1) \ne 0$.

$H_0: \forall i \in [1,2,3,4] \Beta_j(2) = 0$ vs. $H_a: \exists i \in [1,2,3,4] : \Beta_j(2) \ne 0$.

$TS = \frac {MSB(2)}{MSE}$ $H_0 \sim F(df_b(2), df_e)$. $F(1-\alpha, df_b(2), df_e)$.

$TS = \frac {512.85}{23.6} = 21.73$. $F^* = qf(0.99, 3, 48) = 4.218$. TS > F. We reject the null hypothesis with a p-value of [1 - pf(21.73, 3, 48) =] $ 4.94 10^{-9}$ Conclude that $\exists i \in [1,2,3,4] : \Beta_j(2) \ne 0$.

$H_0: \forall i \in [1,2,3,4] \Beta_j(3) = 0$ vs. $H_a: \exists i \in [1,2,3,4] : \Beta_j(3) \ne 0$.

$TS = \frac {MSB(3)}{MSE}$ $H_0 \sim F(df_b(3), df_e)$. $F(1-\alpha, df_b(3), df_e)$.

$TS = \frac {44.85}{23.6} = 1.9$. $F^* = qf(0.99, 3, 48) = 4.218$. TS > F*. We fail to reject the null hypothesis in this case since the F-value is lower than our decision rule.

$\alpha_E \le g\alpha_1$. 5 comparisons at $\alpha_1 = 0.01$. So $\alpha_E \le 0.05$.

Ybar_1 = (Anova1$fitted.values[1] + Anova1$fitted.values[6] +
    Anova1$fitted.values[11] + Anova1$fitted.values[16])/4
Ybar_2 = (Anova1$fitted.values[21] + Anova1$fitted.values[26] +
    Anova1$fitted.values[31] + Anova1$fitted.values[36])/4
Ybar_3 = (Anova1$fitted.values[41] + Anova1$fitted.values[46] +
    Anova1$fitted.values[51] + Anova1$fitted.values[56])/4

Criticalvalue = qtukey(0.95, 3, 48)/sqrt(2)
SE = sqrt(23.6/5 * 2/4)

Ybar_1 - Ybar_2 + c(-1, 1) * Criticalvalue * SE

## [1] -13.665351  -6.234649

Ybar_1 - Ybar_3 + c(-1, 1) * Criticalvalue * SE

## [1] -16.265351  -8.834649

Ybar_2 - Ybar_3 + c(-1, 1) * Criticalvalue * SE

## [1] -6.315351  1.115351

Machine 1 Vs. machine 2 ($\hat{L}_1 = \bar{Y}_{1..} - \bar{Y}_{2..}$) (-13.46, -6.034) does not contain 0.

Machine 1 Vs. Machine 3 ($ 2 = {Y}{1..} - {Y}_{3..}$) (-16.06, -8.63) does not contain 0.

Machine 2 Vs. Machine 3 ($ 3 = {Y}{2..} - {Y}_{3..}$) (-6.31, 1.115) does not contain 0.

At 95% family confidence, there is a significant difference between machines 1 and 2 and 1 and 3 but not between machines 1 and 3.

Ybar_4 = Anova1$fitted.values[1]
Ybar_5 = Anova1$fitted.values[6]
Ybar_6 = Anova1$fitted.values[11]
Ybar_7 = Anova1$fitted.values[16]

Critval2 = qt((1 - (0.05/12)), (60 - 12))
SE2 = sqrt(2 * 23.6/5)

Ybar_4 - Ybar_5 + c(-1, 1) * Critval2 * SE2

## [1] -15.255477   1.655477

Ybar_4 - Ybar_6 + c(-1, 1) * Critval2 * SE2

## [1] -10.055477   6.855477

Ybar_4 - Ybar_7 + c(-1, 1) * Critval2 * SE2

## [1] -0.05547672 16.85547672

Ybar_5 - Ybar_6 + c(-1, 1) * Critval2 * SE2

## [1] -3.255477 13.655477

Ybar_5 - Ybar_7 + c(-1, 1) * Critval2 * SE2

## [1]  6.744523 23.655477

Ybar_4 - Ybar_5 + c(-1, 1) * Critval2 * SE2

## [1] -15.255477   1.655477

Operator 1 with Machine 1 vs. Operator 2 with Mahchine 1 (-14.455477, 2.455477) contains 0. ($\hat{L}_1 = \bar{Y}_{11.} - \bar{Y}_{12.}$)

Operator 1 with Machine 1 Vs. Operator 3 with Machine 1 (-9.255477, 7.655477) contains 0. ($\hat{L}_2 = \bar{Y}_{11.} - \bar{Y}_{13.}$)

Operator 1 with Machine 1 Vs. Operator 4 with Machine 1 (0.7445233, 17.6554767) does not contain 0. ($ 3 = {Y}{11.} - {Y}_{14.}$)

Operator 2 with Machine 1 Vs. Operator 3 with Machine 1 (-3.255477, 13.655477) contains 0. ($\hat{L}_4 = \bar{Y}_{12.} - \bar{Y}_{13.}$)

Operator 2 with Machine 1 Vs. Operator 4 with Machine 1 (6.744523, 23.655477) does not contain 0. ($\hat{L}_5 = \bar{Y}_{12.} - \bar{Y}_{14.}$)

Operator 3 with Machine 1 Vs. Operator 4 with Machine 1 (-14.455477, 2.455477) contains 0. ($\hat{L}_6 = \bar{Y}_{13.} - \bar{Y}_{14.}$)

Operator 1 with Machine 1 Vs. Operator 4 with Machine 4, and Operator 2 with Machine 1 Vs. Operator 4 with Machine 1 were the only two signifigant contrasts as their intervals did not contain 0.

Critvalue3 = qt((1 - (0.01/2)), (60 - 12))
SE3 = sqrt(23.6/5 * ((1/3)^2 + (1/3)^2 + (1/3)^2 + (-1)^2))
((Ybar_4 + Ybar_5 + Ybar_6)/3) - Ybar_7 + c(-1, 1) * Critvalue3 *
    SE3

## [1]  4.471284 17.928716

We are 99% confident that the difference between the experience level of the operators is (4.737951, 18.195382).