Homework3

3.16, 3.17, 3.18, 3.19, 3.33, 3.34, 3.35, 3.38, 3.45, 3.46, 3.52 4.1 4.2, 4.3, 4.4, 4.5, 4.6, 4.7, 4.8, 4.12, 4.14, 4.15, 4.16, 4.21, 4.22, 4.24,4.25

#3.16

season=as.factor(c(rep("Summer",10),rep("Shoulder",7),rep("Winter",8)))
obs= c(83,85,85,87,90,88,88,84,91,90,91,87,84,87,85,86,83,94,91,87,85,87,91,92,86)

model=aov(obs~season)
summary(model)

##             Df Sum Sq Mean Sq F value Pr(>F)
## season       2  35.61  17.804   2.121  0.144
## Residuals   22 184.63   8.392

#Since the p-value >0.05, we fail to reject the null hypothesis and conclude that the season doesn't affect the golfer in question performance.The golfer's theorem is therefore debunked.


qqnorm(resid(model))
qqline(resid(model))

shapiro.test(resid(model))

## 
##  Shapiro-Wilk normality test
## 
## data:  resid(model)
## W = 0.94121, p-value = 0.1579

plot(model$fitted.values,model$residuals)

#The qq-plot and the fitted vs residual plot confirm normality and equal variance acrosss the season. Normality is confirmed using the shapiro wilk test.

#3.17

approach=as.factor(c(rep("1",8),rep("2",8),rep("3",8)))
contri= c(1000,1500,1200,1800,1600,1100,1000,1250,
       1500,1800,2000,1200,2000,1700,1800,1900,
        900,1000,1200,1500,1200,1550,1000,1100)

model2=aov(contri~approach)
summary(model2)

##             Df  Sum Sq Mean Sq F value  Pr(>F)   
## approach     2 1362708  681354    9.41 0.00121 **
## Residuals   21 1520625   72411                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Since the p-value < 0.05, we reject the null hypothesis and conclude that the approach affects contributions

qqnorm(resid(model2))
qqline(resid(model2))

shapiro.test(resid(model2))

## 
##  Shapiro-Wilk normality test
## 
## data:  resid(model2)
## W = 0.98067, p-value = 0.9078

plot(model2$fitted.values,model2$residuals)

#The qq-plot and the fitted vs residual plot confirm normality and equal variance acrosss the approach groups. Normality is confirmed using the shapiro wilk test.

3.18

temp=as.factor(c(rep("100",5),rep("125",4),rep("150",5),rep("175",4)))
contri= c(21.8,21.9,21.7,21.6,21.7,21.7,21.4,21.5,21.4,21.9,21.8,21.8,21.6,21.5,    21.9,21.7,21.8,21.4)
model3=aov(contri~temp)


summary(model3)

##             Df Sum Sq Mean Sq F value Pr(>F)
## temp         3 0.1561 0.05204   2.024  0.157
## Residuals   14 0.3600 0.02571

#a)
#Since the p-value > 0.05, we fail to reject the null hypothesis and conclude that the temperature doesn't affect density.

#b) No! Since the means are found to equal from the Anova test.

# c)
qqnorm(resid(model3))
qqline(resid(model3))

shapiro.test(resid(model3))

## 
##  Shapiro-Wilk normality test
## 
## data:  resid(model3)
## W = 0.95925, p-value = 0.5873

plot(model3$fitted.values,model3$residuals)

#The qq-plot and the fitted vs residual plot confirm normality and equal variance acrosss the temperature groups. Normality is confirmed using the shapiro wilk test.

#d)
plot(TukeyHSD(model3))

#Yes since the difference of means are all equal.

3.19

TukeyHSD(model3)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = contri ~ temp)
## 
## $temp
##          diff         lwr        upr     p adj
## 125-100 -0.24 -0.55266107 0.07266107 0.1626312
## 150-100 -0.02 -0.31477969 0.27477969 0.9971517
## 175-100 -0.04 -0.35266107 0.27266107 0.9817367
## 150-125  0.22 -0.09266107 0.53266107 0.2185356
## 175-125  0.20 -0.12957371 0.52957371 0.3299767
## 175-150 -0.02 -0.33266107 0.29266107 0.9976083

# There was no need for the Tukey test since Anova test indicated equal means.The disregard for such information could potentially lead to type 1 errors. 
#In other to account for the different sample sizes across each pairs, we find the critical value for each pairs.

3.33

k=4
n = 12
SSto = 330.56
SStr = 250.65
#a
MSe= (SSto - SStr)/(n-k)
MSe

## [1] 9.98875

#b
R = SStr/SSto
R

## [1] 0.7582587

3.34

k=6
n=30
SSto = 900.25
SStr = 750.50
#a
MSe= (SSto - SStr)/(n-k)
MSe

## [1] 6.239583

#b
R = SStr/SSto
R

## [1] 0.8336573

#3.35

k=4
n = 12
SSto = 330.56
SStr = 250.65
MSe= (SSto - SStr)/(n-k)
MSt= SStr/(k-1)



L= 1/3 * ((MSt/MSe) * (1/qf(0.05/2,k-1,n-k)) -1)
U= 1/3 * ((MSt/MSe) * (1/qf(1-0.05/2,k-1,n-k)) -1)
CI = c(U/(U+1),L/(L+1))
CI

## [1] 0.1535943 0.9757316

3.38

#Solution provided in class and book.

3.45

#Solution provided in class.

3.46

# Same problem as 3.46

#3.52

life <- c(17.6,18.9,16.3,17.4,20.1,21.6,16.9,15.3,18.6,17.1,19.5,20.3,
          21.4,23.6,19.4,18.5,20.5,22.3,19.3,21.1,16.9,17.5,18.3,19.8)
type <- factor(c(rep("1",6),rep("2",6),rep("3",6),rep("4",6)))

data = data.frame(life,type)

kruskal.test(life~type)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  life by type
## Kruskal-Wallis chi-squared = 6.2177, df = 3, p-value = 0.1015

model_av= aov(life~type)

summary(model_av)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## type         3  30.17   10.05   3.047 0.0525 .
## Residuals   20  65.99    3.30                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

qqnorm(resid(model_av))
qqline(resid(model_av))

shapiro.test(resid(model_av))

## 
##  Shapiro-Wilk normality test
## 
## data:  resid(model_av)
## W = 0.95671, p-value = 0.376

plot(model_av$fitted.values,model_av$residuals)

#The krustal test confirms the anova results. The means are found to be equal across treatment groups.

#4.1

#4.1
#Complete random design so blocking effect
k=4
n=24 
error_var = 500 + 200 / (24-4)
error_var

## [1] 510

#4.2

# The pure error will be decreased by b-1= 3-1 =2

#4.3

#True

#4.4

#False

#4.5

#c

#4.6

#SSb = 80
#df_factor= 29-25 =4
SS_treatment = 246.93 * 4
dftotal= 29
b=(29+1)/(5)
dfb= b-1
dferror= 29-4-5
sserror= 186.53-80
msb= 80/5
mse= sserror/dferror
f= 246.93/5.33
pvalue = 1- pf(f,4,20)

#Source   DF        SS      MS   F        P
#Factor    4    987.72   246.93  46.4     0
#Block     5        80      16
#Error    20    106.53      5.33
#Total    29   1174.24

4.7

values <- matrix(c( 73, 68, 74, 71, 67, 73, 67, 75, 72, 70, 75, 68, 78, 73, 68,73,  71, 75, 75, 69), ncol=5, byrow = T)

y <- as.numeric(values)

#treatment -- different chemicals
x <- factor(rep(1:4,5))

#blocks
b <- factor(c(rep(1,4),rep(2,4),rep(3,4),rep(4,4),rep(5,4)))
# fit the model
model <- aov(y~x+b)
anova(model)

## Analysis of Variance Table
## 
## Response: y
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## x          3  12.95   4.317  2.3761    0.1211    
## b          4 157.00  39.250 21.6055 2.059e-05 ***
## Residuals 12  21.80   1.817                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Based on the result, we fail to reject the null hypothesis and conclude that none of treatment effect is not equal to zero. This means all 4 chemicals had the same effect.

#4.8

values <- matrix(c(13,22,18,39,16,24,17,44,5,4,1,22), ncol=4, byrow = T)

y <- as.numeric(values)

#treatment -- different solutions
x <- factor(rep(1:3,4))

#blocks
b <- factor(c(rep(1,3),rep(2,3),rep(3,3),rep(4,3)))
# fit the model
model <- aov(y~x+b)
anova(model)

## Analysis of Variance Table
## 
## Response: y
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## x          2  703.50  351.75  40.717 0.0003232 ***
## b          3 1106.92  368.97  42.711 0.0001925 ***
## Residuals  6   51.83    8.64                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Based on the result, we reject the null hypothesis and conclude that at least one of the treatment effect is equal to zero. This means the treatment means are not all the same.

4.12

values <- matrix(c(
250,    350,    219,    375,
400,    525,    390,    580,
275,    340,    200,    310), 
ncol=4, byrow = T)

y <- as.numeric(values)

#treatment -- different designs
x <- factor(rep(1:3,4))

#blocks
b <- factor(c(rep(1,3),rep(2,3),rep(3,3),rep(4,3)))
# fit the model
model <- aov(y~x+b)
anova(model)

## Analysis of Variance Table
## 
## Response: y
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## x          2  90755   45378  50.152 0.0001798 ***
## b          3  49036   16345  18.065 0.0020837 ** 
## Residuals  6   5429     905                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Based on the result, we reject the null hypothesis and conclude that at least one of the treatment effect is equal to zero. This means the treatment means are not all the same.


#Use fisher LSD method:

library(agricolae)

## Warning: package 'agricolae' was built under R version 3.6.2

out <-LSD.test(model,"x",alpha = 0.05)
out

## $statistics
##    MSerror Df     Mean       CV  t.value      LSD
##   904.8056  6 351.1667 8.565729 2.446912 52.04523
## 
## $parameters
##         test p.ajusted name.t ntr alpha
##   Fisher-LSD      none      x   3  0.05
## 
## $means
##        y      std r      LCL      UCL Min Max    Q25   Q50    Q75
## 1 298.50 75.66814 4 261.6985 335.3015 219 375 242.25 300.0 356.25
## 2 473.75 93.75278 4 436.9485 510.5515 390 580 397.50 462.5 538.75
## 3 281.25 60.32896 4 244.4485 318.0515 200 340 256.25 292.5 317.50
## 
## $comparison
## NULL
## 
## $groups
##        y groups
## 2 473.75      a
## 1 298.50      b
## 3 281.25      b
## 
## attr(,"class")
## [1] "group"

out2<-TukeyHSD(model)
out2

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = y ~ x + b)
## 
## $x
##        diff        lwr        upr     p adj
## 2-1  175.25  109.98853  240.51147 0.0004236
## 3-1  -17.25  -82.51147   48.01147 0.7104869
## 3-2 -192.50 -257.76147 -127.23853 0.0002508
## 
## $b
##           diff        lwr       upr     p adj
## 2-1   96.66667   11.64632 181.68701 0.0292948
## 3-1  -38.66667 -123.68701  46.35368 0.4560435
## 4-1  113.33333   28.31299 198.35368 0.0142872
## 3-2 -135.33333 -220.35368 -50.31299 0.0060311
## 4-2   16.66667  -68.35368 101.68701 0.9015669
## 4-3  152.00000   66.97966 237.02034 0.0033310

# Design 2 differs from 1 and 3 as shown in the tukey and LSD test.


qqnorm(resid(model))
qqline(resid(model))

shapiro.test(model$residuals)

## 
##  Shapiro-Wilk normality test
## 
## data:  model$residuals
## W = 0.88867, p-value = 0.1133

# Shapiro test confirms data is normally distributed

4.14

values <- matrix(c(
    1244,   21, 82, 2221,   905,    839,
    281,    129,    396,    1306,   336,    910,
    220,    84, 458,    543,    300,    794,
  225,  83, 425,    552,    291,    826,
  19,   11, −34,    121,    15, 103,
    −20,    35, −53,    170,    104,    199), 
ncol=6, byrow = T)

y <- as.numeric(values)

#treatment -- different designs
x <- factor(rep(1:6,6))

#blocks
b <- factor(c(rep(1,6),rep(2,6),rep(3,6),rep(4,6),rep(5,6),rep(6,6)))
# fit the model
model <- aov(y~x+b)
anova(model)

## Analysis of Variance Table
## 
## Response: y
##           Df  Sum Sq Mean Sq F value   Pr(>F)   
## x          5 2989130  597826  5.3770 0.001720 **
## b          5 2287339  457468  4.1146 0.007295 **
## Residuals 25 2779574  111183                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Based on the result, we reject the null hypothesis and conclude that at least one of the treatment effect is equal to zero. This means the algorithms provide different mean cost estimation accuracy.

## b
qqnorm(model$residuals)
qqline(model$residuals)

#Residuals are normally distributed

## c
ybars <- aggregate(y,by=list(x),mean)
ybars

##   Group.1         x
## 1       1 885.33333
## 2       2 559.66667
## 3       3 399.83333
## 4       4 400.33333
## 5       5  39.16667
## 6       6  72.50000

## Recommend #5 since it has the minimum percent error in estimating the development cost.

4.15

#4.15

obs=c(334.5, 31.6, 701, 41.2, 61.2, 69.6,   67.5,   66.6,   120.7,  881.9,
  919.4, 404.2, 1024.8, 54.1,   62.8,   671.6,  882.1,  354.2,  321.9,  91.1,
108.4,  26.1,   240.8, 191.1,   69.7,   242.8,  62.7,   396.9,  23.6,   290.4)

treatment= as.factor(c(rep(1,10),rep(2,10),rep(3,10)))

model= aov(obs~treatment)
summary(model)

##             Df  Sum Sq Mean Sq F value Pr(>F)  
## treatment    2  538442  269221   3.249 0.0544 .
## Residuals   27 2236974   82851                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#The results indicate that that the treatment means differ but only slightly. Its on the border.


#b 
qqnorm(resid(model))
qqline(resid(model))

shapiro.test(resid(model))

## 
##  Shapiro-Wilk normality test
## 
## data:  resid(model)
## W = 0.91654, p-value = 0.02183

#normality fails

#c

model2= aov(log(obs)~treatment)
summary(model2)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## treatment    2   6.30   3.148   2.571 0.0951 .
## Residuals   27  33.07   1.225                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#The results indicate that that the treatment means differ.


#d

qqnorm(resid(model2))
qqline(resid(model2))

shapiro.test(resid(model2))

## 
##  Shapiro-Wilk normality test
## 
## data:  resid(model2)
## W = 0.95648, p-value = 0.2511

#normality now passes...Adequate model.

4.16

data <- matrix(c(0.05,0.05,0.05,0.06,0.03,0.05,
               0.04,0.02,0.03,0.05,0.03,0.02,
               0.09,0.13,0.11,0.15,0.08,0.12,
               0.03,0.04,0.05,0.05,0.03,0.02),ncol=6,byrow=T)
y <- as.numeric(data)

x <- factor(rep(1:4,6))
#Set the block to null
b <- NULL
for(k in 1:6) b <- c(b,rep(k,4))

## fit the model
model <- aov(y~x+b)
anova(model)

## Analysis of Variance Table
## 
## Response: y
##           Df    Sum Sq   Mean Sq F value    Pr(>F)    
## x          3 0.0258167 0.0086056 31.6667 1.349e-07 ***
## b          1 0.0000700 0.0000700  0.2576    0.6176    
## Residuals 19 0.0051633 0.0002718                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# The anova table indicate the treatment effect of one of the ratio control algorithms is not equal to zero.



ybars <- aggregate(y,by=list(x),mean)
ybars

##   Group.1          x
## 1       1 0.04833333
## 2       2 0.03166667
## 3       3 0.11333333
## 4       4 0.03666667

Ybar <- mean(y)
z <- y-Ybar

# The anova table indicate the treatment effect of one of the ratio control algorithms is not equal to zero.

model2 <- aov(z~-1+x,C(x,contr.sum))
anova(model2)

## Analysis of Variance Table
## 
## Response: z
##           Df    Sum Sq   Mean Sq F value    Pr(>F)    
## x          4 0.0258167 0.0064542  24.666 1.723e-07 ***
## Residuals 20 0.0052333 0.0002617                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#I'll select ratio 2

4.21

data <- matrix(c(73,68,74,71,67,73,67,75.25,72,70,75,68,78,73,68,
                 73,71,75,75,69),ncol=5,byrow=T)

y <- as.numeric(data)

#treatment
x <- factor(rep(1:4,5))

#block
b <- factor(c(rep(1,4),rep(2,4),rep(3,4),rep(4,4),rep(5,4)))


# fit the model
m <- aov(y~x+b)
anova(m)

## Analysis of Variance Table
## 
## Response: y
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## x          3  12.784   4.261  2.3498    0.1239    
## b          4 158.887  39.722 21.9029 1.919e-05 ***
## Residuals 12  21.762   1.814                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## From the anova result, there is no difference between each chemical level.

4.22

coupon <- matrix(c(9.3,9.4,9.6,10.0,
                   9.4,9.3,9.8,9.9,
                   9.2,9.4,9.5,9.7,
                   9.7,9.6,10.0,10.2),ncol=4,byrow=T)

X <- matrix(c(9.3,9.4,9.6,10.0,
              9.4,9.3,0,9.9,
              9.2,9.4,9.5,9.7,
              9.7,9.6,10.0,10.2),ncol=4,byrow=T)
miss <- (4*sum(X[2,])+4*sum(X[,3])-sum(X))/(3*3)
new_data <- matrix(c(9.3,9.4,9.6,10.0,
                 9.4,9.3,miss,9.9,
                 9.2,9.4,9.5,9.7,
                 9.7,9.6,10.0,10.2),ncol=4,byrow=T)

y <- as.numeric(new_data)
x <- factor(rep(1:4,4))
b <- factor(c(rep(1,4),rep(2,4),rep(3,4),rep(4,4)))

m <- aov(y~x+b)
anova(m)

## Analysis of Variance Table
## 
## Response: y
##           Df  Sum Sq  Mean Sq F value    Pr(>F)    
## x          3 0.39981 0.133272  19.277 0.0002945 ***
## b          3 0.79537 0.265123  38.348 1.873e-05 ***
## Residuals  9 0.06222 0.006914                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

##Result:  difference bewteen mean in each tip level.

4.24

X <- matrix(c(8,7,1,7,3,
              11,2,7,3,8,
              4,9,10,1,5,
              6,8,6,6,10,
              4,2,3,8,8),ncol=5,byrow=T)
y <- as.numeric(X)

x <- factor(c("a","c","b","d","e",
              "b","e","a","c","d",
              "d","a","c","e","b",
              "c","d","e","b","c",
              "e","b","d","a","c"))

matrix(x,ncol=5)

##      [,1] [,2] [,3] [,4] [,5]
## [1,] "a"  "b"  "d"  "c"  "e" 
## [2,] "c"  "e"  "a"  "d"  "b" 
## [3,] "b"  "a"  "c"  "e"  "d" 
## [4,] "d"  "c"  "e"  "b"  "a" 
## [5,] "e"  "d"  "b"  "c"  "c"

row <- factor(rep(1:5,5))

col <- NULL
for(i in 1:5) col <- c(col,rep(i,5))
col <- factor(col)

z <- y-mean(y)
model <- aov(z~-1+x+row+col)
anova(model)

## Analysis of Variance Table
## 
## Response: z
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## x          5 141.107 28.2213  9.4923 0.0007453 ***
## row        4  16.105  4.0262  1.3542 0.3065879    
## col        4  13.752  3.4379  1.1563 0.3774412    
## Residuals 12  35.677  2.9731                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(aov(y~x))

## Analysis of Variance Table
## 
## Response: y
##           Df  Sum Sq Mean Sq F value    Pr(>F)    
## x          4 141.107  35.277  10.766 8.057e-05 ***
## Residuals 20  65.533   3.277                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Result:  Day effects is not significant as batch.

4.25

X <- matrix(c(10,14,7,8,
              7,18,11,8,
              5,10,11,9,
              10,10,12,14),ncol=4,byrow=T)
y <- as.numeric(X)

x <- factor(c("c","b","a","d",
              "d","c","b","a",
              "a","d","c","b",
              "b","a","d","c"))
matrix(x,ncol=4)

##      [,1] [,2] [,3] [,4]
## [1,] "c"  "d"  "a"  "b" 
## [2,] "b"  "c"  "d"  "a" 
## [3,] "a"  "b"  "c"  "d" 
## [4,] "d"  "a"  "b"  "c"

row <- factor(rep(1:4,4))
col <- NULL
for(i in 1:4) col <- c(col,rep(i,4))
col <- factor(col)
z <- y-mean(y)
model <- aov(z~-1+x+row+col)
anova(model)

## Analysis of Variance Table
## 
## Response: z
##           Df Sum Sq Mean Sq F value   Pr(>F)   
## x          4   72.5 18.1250 10.3571 0.007330 **
## row        3   18.5  6.1667  3.5238 0.088519 . 
## col        3   51.5 17.1667  9.8095 0.009926 **
## Residuals  6   10.5  1.7500                    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

anova(aov(y~x))

## Analysis of Variance Table
## 
## Response: y
##           Df Sum Sq Mean Sq F value  Pr(>F)  
## x          3   72.5 24.1667  3.6025 0.04602 *
## Residuals 12   80.5  6.7083                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## Result: Assembly line required..

Homework3

Oghenetega Dibie

2/22/2020

4.16