library(readxl)
callcenter <- read_xlsx("H:/Mooc/Datalabs/Assigments/Mod1/callcenter.xlsx", sheet = 4)
Null hypothesis: The means of all the treatment groups are equal.
Alternate hypothesis: At least one of the means of the treatment groups is different.
meansofgroups <- function(groupsname){ # function to calculate means of groups
mog <- mean(groupsname, na.rm = TRUE)
return(mog)
}
total_mean <- sum(callcenter$zerotoone, callcenter$onetotwo, callcenter$twotothree, callcenter$threetofour)/length(c(callcenter$zerotoone, callcenter$onetotwo, callcenter$twotothree, callcenter$threetofour))
means_of_groups <- c(meansofgroups(callcenter$zerotoone), meansofgroups(callcenter$onetotwo), meansofgroups(callcenter$twotothree), meansofgroups(callcenter$threetofour))
ss <- function(ss_data, group_mean){ #function to calculate sum of squares
w <- 0
for(i in seq_along(ss_data)) {
w[i] <- ss_data[i] - group_mean
}
return(w^2)
}
ss_within <- sum(ss(callcenter$zerotoone, means_of_groups[1]), ss(callcenter$onetotwo, means_of_groups[2]), ss(callcenter$twotothree, means_of_groups[3]),ss(callcenter$threetofour, means_of_groups[4]), na.rm = TRUE)
changetomean <- function(changetomean_data, means_groups){ # function to change all obs to means of corresponding groups
changetomean_data[which(changetomean_data != 0)] <- means_groups
return(changetomean_data[which(changetomean_data != 0)])
}
ss_between <- sum(ss(changetomean(callcenter$zerotoone, means_of_groups[1]), total_mean),
ss(changetomean(callcenter$onetotwo, means_of_groups[2]), total_mean),
ss(changetomean(callcenter$twotothree, means_of_groups[3]), total_mean),
ss(changetomean(callcenter$threetofour, means_of_groups[4]), total_mean))
ss_total <- sum(ss(callcenter$zerotoone, total_mean), ss(callcenter$onetotwo, total_mean),
ss(callcenter$twotothree, total_mean), ss(callcenter$threetofour, total_mean))
df <- function(ntotal, constraints, ngroups) { # function to calculate degrees of freedom (DF)
df_total <- (ntotal-1)
df_between <- (ngroups - 1)
df_within <- (ntotal - constraints)
return(c(df_total, df_between, df_within))
}
dof <- df(380, 4, 4)
mse <- function(between, with_in, d_f){ # function to calculate mean square error (MSE)
mse_between <- between/d_f[[2]]
mse_within <- with_in/d_f[[3]]
return(c(mse_between, mse_within))
}
mean_sqaure_error <- mse(ss_between, ss_within, dof)
Null hypothesis: The variance between the groups is equal.
Alternate hypothesis: The variance is not equal between the groups.
fstatistic <- function(ms) { # function to calculate f-statistic
f <- (ms[[1]]/ms[[2]])
return(f)
}
fstat <- fstatistic(mean_sqaure_error)
decision <- function(ftab, mse_between, mse_within){ # function for inference
fcal <- mse_between/mse_within
ifelse(fcal >= ftab, "Reject null hypothesis and further analysis required",
"Failed to reject null hypothesis and no further analysis required")
}
decision(2.628, mean_sqaure_error[1], mean_sqaure_error[2])
## [1] "Reject null hypothesis and further analysis required"
Pair-wise comparision of all the treatment groups.
Ttab <- function(qtab, s, ni, nj){ #function to calculate tukey's criteria
tukey_criteria <- qtab*(sqrt(s/min(ni,nj)))
return(tukey_criteria)
}
Tcal <- function(xi, xj){ #function to calculate Tcal
tcal <- abs(xi-xj)
return(tcal)
}
tukey_ci <- function(xa, xb, qtab, s, ni, nj){ #function to calculate confidence interval for tukey
lower_ci <- xa - xb -(qtab/sqrt(2))*(sqrt(s))*sqrt((1/ni + 1/nj))
upper_ci <- xa - xb +(qtab/sqrt(2))*(sqrt(s))*sqrt((1/ni + 1/nj))
return(c(lower_ci,upper_ci))
}
# For H0: 0-1hr = 1-2hr
# H1: 0-1hr != 1-2hr
Tcal_one <- Tcal(means_of_groups[1], means_of_groups[2])
tukey_ci_one <- tukey_ci(means_of_groups[2], means_of_groups[1], 3.633, 1756, 95, 95)
group1 <- c(Tcal_one, tukey_ci_one)
# For H0: 0-1hr = 2-3hr
# H1: 0-1hr != 2-3hr
Tcal_two <- Tcal(means_of_groups[1], means_of_groups[3])
tukey_ci_two <- tukey_ci(means_of_groups[3], means_of_groups[1], 3.633, 1756, 95, 95)
group2 <- c(Tcal_two, tukey_ci_two)
# For H0: 0-1hr = 3-4hr
# H1: 0-1hr != 3-4hr
Tcal_three <- Tcal(means_of_groups[1], means_of_groups[4])
tukey_ci_three <- tukey_ci(means_of_groups[4], means_of_groups[1], 3.633, 1756, 95, 95)
group3 <- c(Tcal_three, tukey_ci_three)
# For H0: 1-2hr = 2-3hr
# H1: 1-2hr != 2-3hr
Tcal_four <-Tcal(means_of_groups[2], means_of_groups[3])
tukey_ci_four <- tukey_ci(means_of_groups[3], means_of_groups[2], 3.633, 1756, 95, 95)
group4 <- c(Tcal_four, tukey_ci_four)
# For H0: 1-2hr = 3-4hr
# H1: 1-2hr != 3-4hr
Tcal_five <- Tcal(means_of_groups[2], means_of_groups[4])
tukey_ci_five <- tukey_ci(means_of_groups[4], means_of_groups[2], 3.633, 1756, 95, 95)
group5 <- c(Tcal_five, tukey_ci_five)
# For H0: 2-3hr = 3-4hr
# H1: 2-3hr != 3-4hr
Tcal_six <- Tcal(means_of_groups[3], means_of_groups[4])
tukey_ci_six <- tukey_ci(means_of_groups[4], means_of_groups[3], 3.633, 1756, 95, 95)
group6 <- c(Tcal_six, tukey_ci_six)
Control group: 0-1hr.
Treatment groups: 1-2hr, 2-3hr & 3-4hr.
Dcal <- function(xi, xj){ #function to calculate Dcal
dcal <- abs(xi-xj)
return(dcal)
}
Dtab <- function(qtab, s, ni, nj){ #function to calculate Dunnet's criteria
dunnet_criteria <- qtab*(sqrt(s/min(ni,nj)))
return(dunnet_criteria)
}
dunnet_ci <- function(control, treatment, dtab, s, ni, nj){ #function to calculate confidence interval for dunnet
lower_ci <- (control - treatment) - (dtab)*(sqrt(s))*sqrt((1/ni + 1/nj))
upper_ci <- (control - treatment) + (dtab)*(sqrt(s))*sqrt((1/ni + 1/nj))
return(c(lower_ci, upper_ci))
}
# For H0: 0-1hr = 1-2hr
# H1: 0-1hr != 1-2hr
Dcal_one <- Dcal(means_of_groups[1], means_of_groups[2])
dunnet_ci_one <- dunnet_ci(means_of_groups[2], means_of_groups[1], 2.35, 1756, 95, 95)
d_group1 <- c(Dcal_one, dunnet_ci_one)
# For H0: 0-1hr = 2-3hr
# H1: 0-1hr != 2-3hr
Dcal_two <- Dcal(means_of_groups[1], means_of_groups[3])
dunnet_ci_two <- dunnet_ci(means_of_groups[3], means_of_groups[1], 2.35, 1756, 95, 95)
d_group2 <- c(Dcal_two, dunnet_ci_two)
# For H0: 0-1hr = 3-4hr
# H1: 0-1hr != 3-4hr
Dcal_three <- Dcal(means_of_groups[1], means_of_groups[4])
dunnet_ci_three <- dunnet_ci(means_of_groups[4], means_of_groups[1], 2.35, 1756, 95, 95)
d_group3 <- c(Dcal_three, dunnet_ci_three)
cat("SSB = ", ss_between, "\nSSW = ", ss_within, "\nSSt = ", ss_total, "\nMSE between = ", mean_sqaure_error[1], "\nMSE within = ", mean_sqaure_error[2], "\nF-Statistic =", fstat, "\n", decision(2.628, mean_sqaure_error[1], mean_sqaure_error[2]))
## SSB = 22887.17
## SSW = 660337.7
## SSt = 683224.8
## MSE between = 7629.056
## MSE within = 1756.217
## F-Statistic = 4.344028
## Reject null hypothesis and further analysis required
cat(" ","group", " meandiff", " lowerCI", " upperCI", "\n0-1 - 1-2hr", group1, "\n0-1 - 2-3hr", group2, "\n0-1 - 3-4hr", group3, "\n1-2 - 2-3hr", group4, "\n1-2 - 3-4hr", group5, "\n2-3 - 3-4hr", group6)
## group meandiff lowerCI upperCI
## 0-1 - 1-2hr 12.12632 -3.493138 27.74577
## 0-1 - 2-3hr 18.87368 3.25423 34.49314
## 0-1 - 3-4hr 19.12632 3.506862 34.74577
## 1-2 - 2-3hr 6.747368 -8.872086 22.36682
## 1-2 - 3-4hr 7 -8.619454 22.61945
## 2-3 - 3-4hr 0.2526316 -15.36682 15.87209
cat(" ","group", " meandiff", " lowerCI", " upperCI", "\n0-1 - 1-2hr", d_group1, "\n0-1 - 2-3hr", d_group2, "\n0-1 - 3-4hr", d_group3)
## group meandiff lowerCI upperCI
## 0-1 - 1-2hr 12.12632 -2.162075 26.41471
## 0-1 - 2-3hr 18.87368 4.585294 33.16207
## 0-1 - 3-4hr 19.12632 4.837925 33.41471