20190816
Created my own anova function for practicing R function.
- packages
library(reshape2) #melt()
- Create data.frame
G1 <- c(52, 43, 40, 47, 45)
G2 <- c(49, 52, 46, 43, 40)
G3 <- c(55, 51, 53, 55, 49)
G4 <- c(49, 52, 55, 54, 48)
random_df <- data.frame(G1, G2, G3, G4)
random_df
## G1 G2 G3 G4
## 1 52 49 55 49
## 2 43 52 51 52
## 3 40 46 53 55
## 4 47 43 55 54
## 5 45 40 49 48
- Custom ANOVA function
#One_way ANOVA function
My_anova <- function(df) {
ANOVA <- list() #create an empty list that will be returned at the end
N <- ncol(df)*nrow(df) #the total number of observations
k <- ncol(df) #the number of groups
n <- nrow(df) #the number of obs in each group
#Mean by group (for reference)
ANOVA$group_mean <- summary(df)[4, 1:k]
#SST
SST <- data.frame()
for (i in 1:k) {
for (j in 1:n) {
SST[i, j] <- (df[[i]][j] - sum(df)/N)^2 #squre sum of (each obs - total mean value)
}
}
SST <- sum(SST)
ANOVA$SST <- SST
#SSB
SSB <- data.frame()
for (i in 1:k) {
for(j in 1:n) {
SSB[i, j] <- (sum(df[[i]])/n - sum(df)/N)^2 #square sum of (mean of each group - total mean value)
}
}
SSB <- sum(SSB)
ANOVA$SSB <- SSB
#SSW
SSW <- SST-SSB
ANOVA$SSW <- SSW #SST = SSB + SSW
#ANOVA
df1 <- melt(df) #tidy form for anova function
pre_anova <- lm(value~variable, data=df1) #variable: group #value: each obs
model <- anova(pre_anova) #ANOVA result
ANOVA$anova <- model
#ANOVA table
source <- c("inter_group", "intra_group")
square_sum <- c(SSB, SSW)
df <- c(k-1, N-k)
MSB <- SSB/(k-1) #MSB = SSB/(df of SSB) #df: the number of groups - 1
MSW <- SSW/(N-k) #MSW = SSW/(df of SSW) #df: the number of total obs - the number of groups
Mean <- c(MSB, MSW)
F_value <- c(MSB/MSW, 0) #0 means NA
ANOVA$table <- data.frame(source, square_sum, df, Mean, F_value)
#explanatory power
eta_square <- SSB/SST
ANOVA$eta_square <- eta_square
print(ANOVA)
}
- Check the result
My_anova(random_df)
## No id variables; using all as measure variables
## $group_mean
## G1 G2 G3 G4
## "Mean :45.4 " "Mean :46 " "Mean :52.6 " "Mean :51.6 "
##
## $SST
## [1] 443.8
##
## $SSB
## [1] 208.2
##
## $SSW
## [1] 235.6
##
## $anova
## Analysis of Variance Table
##
## Response: value
## Df Sum Sq Mean Sq F value Pr(>F)
## variable 3 208.2 69.400 4.7131 0.01529 *
## Residuals 16 235.6 14.725
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## $table
## source square_sum df Mean F_value
## 1 inter_group 208.2 3 69.400 4.713073
## 2 intra_group 235.6 16 14.725 0.000000
##
## $eta_square
## [1] 0.4691302