library(car)
## Loading required package: carData
Dat1 <- read.table("https://raw.githubusercontent.com/athienit/STA4211material/main/KutnerData/Chapter%2026%20Data%20Sets/CH26PR04.txt",
header = T)
colnames(Dat1) = c("Cases", "Machine", "Operator", "Day")
Dat2 <- data.frame(Machine = factor(Dat1$Machine, labels = c("M1",
"M2", "M3")), Operator = factor(Dat1$Operator, labels = c("Op1",
"Op2", "Op3", "Op4")), Day = factor(Dat1$Day, levels = c("Day 1",
"Day 2", "Day 3", "Day 4", "Day 5")), Cases = Dat1$Cases)
attach(Dat2)
Anova1 <- aov(Cases ~ Machine + Machine:Operator)
Anova1$residuals
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## -3.0 2.0 -4.0 5.0 0.2 -5.8 7.2 -3.8 2.2 -6.6 2.4 -4.6 7.4 1.4 -7.6 3.4
## 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32
## 1.4 -4.6 7.4 -1.8 5.2 0.2 4.2 -7.8 -6.2 0.8 4.8 2.8 -2.2 -3.8 0.2 6.2
## 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
## 2.2 -4.8 -4.0 1.0 6.0 -2.0 -1.0 -7.8 6.2 -2.8 1.2 3.2 -6.6 0.4 2.4 -1.6
## 49 50 51 52 53 54 55 56 57 58 59
## 5.4 6.6 -2.4 -1.4 1.6 -4.4 -6.4 5.6 -0.4 3.6 -2.4
Standard1 <- rstandard(Anova1)
qqnorm(Standard1, datax = TRUE)
qqline(Standard1, datax = TRUE)
shapiro.test(Standard1)
##
## Shapiro-Wilk normality test
##
## data: Standard1
## W = 0.96285, p-value = 0.06886
### The P-value for the normality test of .06866 is larger
### than .05 but smaller than .1 so we have some evidence
### for and against normality. If we proceed conservatively
### we would reject the assumption of normality and
### conclude that this this model is not well suited. We
### could refit using a different model and method. ###
plot(Standard1[1:19]) ### Machine 1 ###
plot(Standard1[20:39]) ### Machine 2 ###
plot(Standard1[40:59]) ### Machine 3 ###
### No they can not since the model is nested. You have the same group of operators working on the same machine on the same shift so the effect from shift and operator should be roughly the same ###
### Plot data
plot(Machine, Cases, ylab = "Caser per Hour", xlab = "Machines")
abline(h = mean(Cases), col = 2)
summary(Anova1)
## Df Sum Sq Mean Sq F value Pr(>F)
## Machine 2 1698 848.9 35.63 3.83e-10 ***
## Machine:Operator 9 2270 252.2 10.58 9.76e-09 ***
## Residuals 47 1120 23.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
### The interaction between Machine and operator is
### significant. ###
\(H_0: \forall i \in [1,2,3] \alpha_i = 0\) vs. \(H_a: \exists i \in [1,2,3] : \alpha_i \ne 0\).
\(TS = \frac {MSA}{MSE}\) \(H_0 \sim F(df_a, df_e)\). \(F(1-\alpha, df_a, df_e)\).
\(TS = \frac {847.8}{23.6} = 35.92\). \(F^* = qf(0.99, 2, 48) = 5.076664\). TS > F*. We reject the null hypothesis with a p-value of [1 - pf(35.92, 2, 48) =] \(2.906 * 10^{-10}\) Conclude that \(\exists i \in [1,2,3] : \alpha_i \ne 0\).
\(H_0:\) all \(\beta_{j(i)} = 0\) vs. \(H_a:\) not all \(\beta_{j(i)} = 0\).
\(TS = \frac {MSB(A)}{MSE}\) \(H_0 \sim F(df_{B(A)}, df_e)\). We reject the null if TS > F* = \(F(1-\alpha, df_{B(A)}, df_e)\).
\(TS = \frac {252.5}{23.6} = 10.699\). \(F^* = qf(0.99, 9, 48) = 2.801816\). TS > F*. We reject the null hypothesis with a p-value of [1 - pf(10.699, 9, 48) =] \(6.985 * 10^{-9}\) and conclude that not all \(\beta_{j(i)} = 0\).
Operator.As.Factor <- factor(Dat1$Operator, levels = 1:4)
Machine.As.Factor <- factor(Dat1$Machine, levels = 1:3)
Day.As.Factor <- factor(Dat1$Day, levels = 1:5)
Cases2 <- Dat1$Cases
Dat3 <- data.frame(Cases2, Operator.As.Factor, Machine.As.Factor,
Day.As.Factor)
Anova2 <- anova(aov(Cases2 ~ Operator.As.Factor, data = subset(Dat3,
sub = Machine.As.Factor == 1)))
Anova2
## Analysis of Variance Table
##
## Response: Cases2
## Df Sum Sq Mean Sq F value Pr(>F)
## Operator.As.Factor 3 596.8 198.93 6.8883 0.003879 **
## Residuals 15 433.2 28.88
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Anova2)
## Df Sum Sq Mean Sq F value
## Min. : 3 Min. :433.2 Min. : 28.88 Min. :6.888
## 1st Qu.: 6 1st Qu.:474.1 1st Qu.: 71.39 1st Qu.:6.888
## Median : 9 Median :515.0 Median :113.91 Median :6.888
## Mean : 9 Mean :515.0 Mean :113.91 Mean :6.888
## 3rd Qu.:12 3rd Qu.:555.9 3rd Qu.:156.42 3rd Qu.:6.888
## Max. :15 Max. :596.8 Max. :198.93 Max. :6.888
## NA's :1
## Pr(>F)
## Min. :0.003879
## 1st Qu.:0.003879
## Median :0.003879
## Mean :0.003879
## 3rd Qu.:0.003879
## Max. :0.003879
## NA's :1
Anova3 <- anova(aov(Cases2 ~ Operator.As.Factor, data = subset(Dat3,
sub = Machine.As.Factor == 2)))
Anova3
## Analysis of Variance Table
##
## Response: Cases2
## Df Sum Sq Mean Sq F value Pr(>F)
## Operator.As.Factor 3 1538.5 512.85 25.452 2.492e-06 ***
## Residuals 16 322.4 20.15
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(Anova3)
## Df Sum Sq Mean Sq F value
## Min. : 3.00 Min. : 322.4 Min. : 20.15 Min. :25.45
## 1st Qu.: 6.25 1st Qu.: 626.4 1st Qu.:143.32 1st Qu.:25.45
## Median : 9.50 Median : 930.5 Median :266.50 Median :25.45
## Mean : 9.50 Mean : 930.5 Mean :266.50 Mean :25.45
## 3rd Qu.:12.75 3rd Qu.:1234.5 3rd Qu.:389.68 3rd Qu.:25.45
## Max. :16.00 Max. :1538.5 Max. :512.85 Max. :25.45
## NA's :1
## Pr(>F)
## Min. :2.5e-06
## 1st Qu.:2.5e-06
## Median :2.5e-06
## Mean :2.5e-06
## 3rd Qu.:2.5e-06
## Max. :2.5e-06
## NA's :1
Anova4 <- anova(aov(Cases2 ~ Operator.As.Factor, data = subset(Dat3,
sub = Machine.As.Factor == 3)))
Anova4
## Analysis of Variance Table
##
## Response: Cases2
## Df Sum Sq Mean Sq F value Pr(>F)
## Operator.As.Factor 3 134.55 44.850 1.9693 0.1593
## Residuals 16 364.40 22.775
summary(Anova4)
## Df Sum Sq Mean Sq F value
## Min. : 3.00 Min. :134.6 Min. :22.77 Min. :1.969
## 1st Qu.: 6.25 1st Qu.:192.0 1st Qu.:28.29 1st Qu.:1.969
## Median : 9.50 Median :249.5 Median :33.81 Median :1.969
## Mean : 9.50 Mean :249.5 Mean :33.81 Mean :1.969
## 3rd Qu.:12.75 3rd Qu.:306.9 3rd Qu.:39.33 3rd Qu.:1.969
## Max. :16.00 Max. :364.4 Max. :44.85 Max. :1.969
## NA's :1
## Pr(>F)
## Min. :0.1593
## 1st Qu.:0.1593
## Median :0.1593
## Mean :0.1593
## 3rd Qu.:0.1593
## Max. :0.1593
## NA's :1
\(H_0: \forall i \in [1,2,3,4] \Beta_j(1) = 0\) vs. \(H_a: \exists i \in [1,2,3,4] : \Beta_j(1) \ne 0\).
\(TS = \frac {MSB(1)}{MSE}\) \(H_0 \sim F(df_b(1), df_e)\). \(F(1-\alpha, df_b(1), df_e)\).
\(TS = \frac {199.733}{23.6} = 8.46\). \(F^* = qf(0.99, 3, 48) = 4.218\). TS > F*. We reject the null hypothesis with a p-value of [1 - pf(4.218, 3, 48) =] \(.01\) Conclude that \(\exists i \in [1,2,3,4] : \Beta_j(1) \ne 0\).
\(H_0: \forall i \in [1,2,3,4] \Beta_j(2) = 0\) vs. \(H_a: \exists i \in [1,2,3,4] : \Beta_j(2) \ne 0\).
\(TS = \frac {MSB(2)}{MSE}\) \(H_0 \sim F(df_b(2), df_e)\). \(F(1-\alpha, df_b(2), df_e)\).
\(TS = \frac {512.85}{23.6} = 21.73\). \(F^* = qf(0.99, 3, 48) = 4.218\). TS > F. We reject the null hypothesis with a p-value of [1 - pf(21.73, 3, 48) =] $ 4.94 10^{-9}$ Conclude that \(\exists i \in [1,2,3,4] : \Beta_j(2) \ne 0\).
\(H_0: \forall i \in [1,2,3,4] \Beta_j(3) = 0\) vs. \(H_a: \exists i \in [1,2,3,4] : \Beta_j(3) \ne 0\).
\(TS = \frac {MSB(3)}{MSE}\) \(H_0 \sim F(df_b(3), df_e)\). \(F(1-\alpha, df_b(3), df_e)\).
\(TS = \frac {44.85}{23.6} = 1.9\). \(F^* = qf(0.99, 3, 48) = 4.218\). TS > F*. We fail to reject the null hypothesis in this case since the F-value is lower than our decision rule.
\(\alpha_E \le g\alpha_1\). 5 comparisons at \(\alpha_1 = 0.01\). So \(\alpha_E \le 0.05\).
Ybar_1 = (Anova1$fitted.values[1] + Anova1$fitted.values[6] +
Anova1$fitted.values[11] + Anova1$fitted.values[16])/4
Ybar_2 = (Anova1$fitted.values[21] + Anova1$fitted.values[26] +
Anova1$fitted.values[31] + Anova1$fitted.values[36])/4
Ybar_3 = (Anova1$fitted.values[41] + Anova1$fitted.values[46] +
Anova1$fitted.values[51] + Anova1$fitted.values[56])/4
Criticalvalue = qtukey(0.95, 3, 48)/sqrt(2)
SE = sqrt(23.6/5 * 2/4)
Ybar_1 - Ybar_2 + c(-1, 1) * Criticalvalue * SE
## [1] -13.665351 -6.234649
Ybar_1 - Ybar_3 + c(-1, 1) * Criticalvalue * SE
## [1] -16.265351 -8.834649
Ybar_2 - Ybar_3 + c(-1, 1) * Criticalvalue * SE
## [1] -6.315351 1.115351
Machine 1 Vs. machine 2 (\(\hat{L}_1 = \bar{Y}_{1..} - \bar{Y}_{2..}\)) (-13.46, -6.034) does not contain 0.
Machine 1 Vs. Machine 3 ($ 2 = {Y}{1..} - {Y}_{3..}$) (-16.06, -8.63) does not contain 0.
Machine 2 Vs. Machine 3 ($ 3 = {Y}{2..} - {Y}_{3..}$) (-6.31, 1.115) does not contain 0.
At 95% family confidence, there is a significant difference between machines 1 and 2 and 1 and 3 but not between machines 1 and 3.
Ybar_4 = Anova1$fitted.values[1]
Ybar_5 = Anova1$fitted.values[6]
Ybar_6 = Anova1$fitted.values[11]
Ybar_7 = Anova1$fitted.values[16]
Critval2 = qt((1 - (0.05/12)), (60 - 12))
SE2 = sqrt(2 * 23.6/5)
Ybar_4 - Ybar_5 + c(-1, 1) * Critval2 * SE2
## [1] -15.255477 1.655477
Ybar_4 - Ybar_6 + c(-1, 1) * Critval2 * SE2
## [1] -10.055477 6.855477
Ybar_4 - Ybar_7 + c(-1, 1) * Critval2 * SE2
## [1] -0.05547672 16.85547672
Ybar_5 - Ybar_6 + c(-1, 1) * Critval2 * SE2
## [1] -3.255477 13.655477
Ybar_5 - Ybar_7 + c(-1, 1) * Critval2 * SE2
## [1] 6.744523 23.655477
Ybar_4 - Ybar_5 + c(-1, 1) * Critval2 * SE2
## [1] -15.255477 1.655477
Operator 1 with Machine 1 vs. Operator 2 with Mahchine 1 (-14.455477, 2.455477) contains 0. (\(\hat{L}_1 = \bar{Y}_{11.} - \bar{Y}_{12.}\))
Operator 1 with Machine 1 Vs. Operator 3 with Machine 1 (-9.255477, 7.655477) contains 0. (\(\hat{L}_2 = \bar{Y}_{11.} - \bar{Y}_{13.}\))
Operator 1 with Machine 1 Vs. Operator 4 with Machine 1 (0.7445233, 17.6554767) does not contain 0. ($ 3 = {Y}{11.} - {Y}_{14.}$)
Operator 2 with Machine 1 Vs. Operator 3 with Machine 1 (-3.255477, 13.655477) contains 0. (\(\hat{L}_4 = \bar{Y}_{12.} - \bar{Y}_{13.}\))
Operator 2 with Machine 1 Vs. Operator 4 with Machine 1 (6.744523, 23.655477) does not contain 0. (\(\hat{L}_5 = \bar{Y}_{12.} - \bar{Y}_{14.}\))
Operator 3 with Machine 1 Vs. Operator 4 with Machine 1 (-14.455477, 2.455477) contains 0. (\(\hat{L}_6 = \bar{Y}_{13.} - \bar{Y}_{14.}\))
Operator 1 with Machine 1 Vs. Operator 4 with Machine 4, and Operator 2 with Machine 1 Vs. Operator 4 with Machine 1 were the only two signifigant contrasts as their intervals did not contain 0.
Critvalue3 = qt((1 - (0.01/2)), (60 - 12))
SE3 = sqrt(23.6/5 * ((1/3)^2 + (1/3)^2 + (1/3)^2 + (-1)^2))
((Ybar_4 + Ybar_5 + Ybar_6)/3) - Ybar_7 + c(-1, 1) * Critvalue3 *
SE3
## [1] 4.471284 17.928716
We are 99% confident that the difference between the experience level of the operators is (4.737951, 18.195382).