Assignment 8 VICTOR LEON

A) What are the correct hypothesis?

H0 = The mean in all groups are equal

HA = At least there is one difference amongst the means

library('asbio')
## Warning: package 'asbio' was built under R version 3.3.3
## Loading required package: tcltk
data(life.exp)

attach(life.exp)

#B) Calculate SSE and SSa by hand.

nn85mean <- mean(lifespan[ treatment == "N/N85"])
nr50mean <- mean(lifespan[ treatment == 'N/R50'])
rr50mean <- mean(lifespan[ treatment == 'R/R50'])
nr40mean <- mean(lifespan[ treatment == 'N/R40'])

nn85<- life.exp[treatment=='N/N85',1]
nr50<- life.exp[treatment=='N/R50',1]
rr50<- life.exp[treatment=='R/R50',1]
nr40<- life.exp[treatment=='N/R40',1]

allnn85 <- sum((nn85-nn85mean)^(2))
allnr50 <- sum((nr50-nr50mean)^(2))
allrr50 <- sum((rr50-rr50mean)^(2))
allnr40 <- sum((nr40-nr40mean)^(2))

k <- 4
df_between <- k-1
N<- length(nn85)+length(nr50)+length(rr50)+length(nr40)
df_within <- N-k
df_total <- df_between + df_within 
all_sum<- sum(nn85,nr50,rr50,nr40)


grandmean <- all_sum/N
allnn85gm <- sum((nn85-grandmean)^(2))
allnr50gm <- sum((nr50-grandmean)^(2))
allrr50gm <- sum((rr50-grandmean)^(2))
allnr40gm <- sum((nr40-grandmean)^(2))

SST <- (allnn85gm+allnr50+allrr50gm+allnr40gm)

SSW <- (allnn85+allnr50+allrr50+allnr40)

print.default(SSW)
## [1] 10802.94
SSB <- sum( ((nn85mean-grandmean)^2*length(nn85)),
            ((nr50mean-grandmean)^2*length(nr50)),
            ((rr50mean-grandmean)^2*length(rr50)),
            ((nr40mean-grandmean)^2*length(nr40)))

print.default(SSB)
## [1] 5267.03
#Also SSB is SST-SSW

MSB <- SSB/df_between



MSW <- SSW/df_within



F <- MSB/MSW

print.default(F)
## [1] 39.00443
# (C)



Df<- cbind(df_between,df_within)
Mean_SQ <- cbind(MSB,MSW)
F_value<- F
P_value <- df(F,df_between,df_within)
table_a <- cbind(NULL,'treatment','Residuals')

table_big <- rbind(table_a,Df,Mean_SQ,F_value,P_value)
# (D)

summary(life.exp)
##     lifespan     treatment 
##  Min.   :17.90   N/N85:57  
##  1st Qu.:35.27   N/R40:60  
##  Median :42.30   N/R50:71  
##  Mean   :40.88   R/R50:56  
##  3rd Qu.:47.70             
##  Max.   :54.60
summary.lm(lm(lifespan~treatment,data=life.exp))
## 
## Call:
## lm(formula = lifespan ~ treatment, data = life.exp)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.5167  -2.9339   0.6615   5.2364   9.6088 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     32.6912     0.8886  36.788  < 2e-16 ***
## treatmentN/R40  12.4254     1.2409  10.013  < 2e-16 ***
## treatmentN/R50   9.6060     1.1932   8.051 3.82e-14 ***
## treatmentR/R50  10.1945     1.2623   8.076 3.25e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.709 on 240 degrees of freedom
## Multiple R-squared:  0.3278, Adjusted R-squared:  0.3194 
## F-statistic:    39 on 3 and 240 DF,  p-value: < 2.2e-16
anova(lm(lifespan~treatment,data=life.exp))
## Analysis of Variance Table
## 
## Response: lifespan
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## treatment   3   5267 1755.68  39.004 < 2.2e-16 ***
## Residuals 240  10803   45.01                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# (E)
lmq <- lm(lifespan~treatment,data=life.exp)
plot(lmq, which=1:4)

bartlett.test(lifespan~treatment, data=life.exp)
## 
##  Bartlett test of homogeneity of variances
## 
## data:  lifespan by treatment
## Bartlett's K-squared = 10.096, df = 3, p-value = 0.01777
# (F)
tk <- aov(lifespan~treatment, data = life.exp)
TukeyHSD(tk)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = lifespan ~ treatment, data = life.exp)
## 
## $treatment
##                   diff       lwr        upr     p adj
## N/R40-N/N85 12.4254385  9.215018 15.6358589 0.0000000
## N/R50-N/N85  9.6059554  6.519071 12.6928401 0.0000000
## R/R50-N/N85 10.1944862  6.928685 13.4602879 0.0000000
## N/R50-N/R40 -2.8194831 -5.863260  0.2242942 0.0804712
## R/R50-N/R40 -2.2309523 -5.456039  0.9941344 0.2808243
## R/R50-N/R50  0.5885309 -2.513604  3.6906659 0.9610849
plot(TukeyHSD(tk,'treatment'))

#Summarize conclusions ( F )

Most of the values found are close to the value using the anova command

if there are some differences in the numbers is due to the rounding