Import data

library(readxl)

callcenter <- read_xlsx("H:/Mooc/Datalabs/Assigments/Mod1/callcenter.xlsx", sheet = 4)

ANOVA

Null hypothesis: The means of all the treatment groups are equal.

Alternate hypothesis: At least one of the means of the treatment groups is different.

Calculate means

meansofgroups <- function(groupsname){ # function to calculate means of groups
  
  mog <- mean(groupsname, na.rm = TRUE)
  return(mog)
}

total_mean <- sum(callcenter$zerotoone, callcenter$onetotwo, callcenter$twotothree, callcenter$threetofour)/length(c(callcenter$zerotoone, callcenter$onetotwo,  callcenter$twotothree, callcenter$threetofour))

means_of_groups <- c(meansofgroups(callcenter$zerotoone), meansofgroups(callcenter$onetotwo),   meansofgroups(callcenter$twotothree), meansofgroups(callcenter$threetofour))

Sum of squares within the groups (SSW)

ss <- function(ss_data, group_mean){  #function to calculate sum of squares
  w <- 0
  for(i in seq_along(ss_data)) {
    w[i] <- ss_data[i] - group_mean 
  }
  return(w^2) 
}

ss_within <- sum(ss(callcenter$zerotoone, means_of_groups[1]), ss(callcenter$onetotwo,      means_of_groups[2]), ss(callcenter$twotothree, means_of_groups[3]),ss(callcenter$threetofour, means_of_groups[4]), na.rm = TRUE)

Sum of sqaures between the groups (SSB)

changetomean <- function(changetomean_data, means_groups){ # function to change all obs to means of corresponding groups
  changetomean_data[which(changetomean_data != 0)] <- means_groups
  return(changetomean_data[which(changetomean_data != 0)])
}

ss_between <- sum(ss(changetomean(callcenter$zerotoone, means_of_groups[1]), total_mean),
                  ss(changetomean(callcenter$onetotwo, means_of_groups[2]), total_mean),
                  ss(changetomean(callcenter$twotothree, means_of_groups[3]), total_mean),
                  ss(changetomean(callcenter$threetofour, means_of_groups[4]), total_mean))

Total sum of squares (SST)

ss_total <- sum(ss(callcenter$zerotoone, total_mean), ss(callcenter$onetotwo, total_mean), 
            ss(callcenter$twotothree, total_mean), ss(callcenter$threetofour, total_mean))

Degrees of freedom

df <- function(ntotal, constraints, ngroups) { # function to calculate degrees of freedom (DF) 
  df_total <- (ntotal-1)
  df_between <- (ngroups - 1)
  df_within <- (ntotal - constraints)
  return(c(df_total, df_between, df_within))
}

dof <- df(380, 4, 4)

Mean square error (MSE)

mse <- function(between, with_in, d_f){ # function to calculate mean square error (MSE) 
  mse_between <- between/d_f[[2]]
  mse_within <- with_in/d_f[[3]]
  return(c(mse_between, mse_within))
}

mean_sqaure_error <- mse(ss_between, ss_within, dof)

F-statistic

Null hypothesis: The variance between the groups is equal.

Alternate hypothesis: The variance is not equal between the groups.

fstatistic <- function(ms) { # function to calculate f-statistic
  
  f <- (ms[[1]]/ms[[2]])
  return(f)
}

fstat <- fstatistic(mean_sqaure_error)

Decision on hypothesis

decision <- function(ftab, mse_between, mse_within){ # function for inference 
  
  fcal <- mse_between/mse_within
  ifelse(fcal >= ftab, "Reject null hypothesis and further analysis required",
         "Failed to reject null hypothesis and no further analysis required")
}

decision(2.628, mean_sqaure_error[1], mean_sqaure_error[2])

## [1] "Reject null hypothesis and further analysis required"

Tukey test(pair wise comparision)

Pair-wise comparision of all the treatment groups.

Ttab <- function(qtab, s, ni, nj){ #function to calculate tukey's criteria
  tukey_criteria <- qtab*(sqrt(s/min(ni,nj)))
  return(tukey_criteria)
}


Tcal <- function(xi, xj){ #function to calculate Tcal
  tcal <- abs(xi-xj)
  return(tcal)
}


tukey_ci <- function(xa, xb, qtab, s, ni, nj){ #function to calculate confidence interval for tukey
  lower_ci <- xa - xb -(qtab/sqrt(2))*(sqrt(s))*sqrt((1/ni + 1/nj))
  upper_ci <- xa - xb +(qtab/sqrt(2))*(sqrt(s))*sqrt((1/ni + 1/nj))
  return(c(lower_ci,upper_ci))
}


# For   H0: 0-1hr = 1-2hr
#       H1: 0-1hr != 1-2hr
Tcal_one <- Tcal(means_of_groups[1], means_of_groups[2])
tukey_ci_one <- tukey_ci(means_of_groups[2], means_of_groups[1], 3.633, 1756, 95, 95)
group1 <- c(Tcal_one, tukey_ci_one)

# For   H0: 0-1hr = 2-3hr
#       H1: 0-1hr != 2-3hr
Tcal_two <- Tcal(means_of_groups[1], means_of_groups[3])
tukey_ci_two <- tukey_ci(means_of_groups[3], means_of_groups[1], 3.633, 1756, 95, 95)
group2 <- c(Tcal_two, tukey_ci_two)

# For   H0: 0-1hr = 3-4hr
#       H1: 0-1hr != 3-4hr
Tcal_three <- Tcal(means_of_groups[1], means_of_groups[4])
tukey_ci_three <- tukey_ci(means_of_groups[4], means_of_groups[1], 3.633, 1756, 95, 95)
group3 <- c(Tcal_three, tukey_ci_three)

# For   H0: 1-2hr = 2-3hr
#       H1: 1-2hr != 2-3hr
Tcal_four <-Tcal(means_of_groups[2], means_of_groups[3])
tukey_ci_four <- tukey_ci(means_of_groups[3], means_of_groups[2], 3.633, 1756, 95, 95)
group4 <- c(Tcal_four, tukey_ci_four)

# For   H0: 1-2hr = 3-4hr
#       H1: 1-2hr != 3-4hr
Tcal_five <- Tcal(means_of_groups[2], means_of_groups[4])
tukey_ci_five <- tukey_ci(means_of_groups[4], means_of_groups[2], 3.633, 1756, 95, 95)
group5 <- c(Tcal_five, tukey_ci_five)

# For   H0: 2-3hr = 3-4hr
#       H1: 2-3hr != 3-4hr
Tcal_six <- Tcal(means_of_groups[3], means_of_groups[4])
tukey_ci_six <- tukey_ci(means_of_groups[4], means_of_groups[3], 3.633, 1756, 95, 95)
group6 <- c(Tcal_six, tukey_ci_six)

Dunnet Test

Control group: 0-1hr.

Treatment groups: 1-2hr, 2-3hr & 3-4hr.

Dcal <- function(xi, xj){ #function to calculate Dcal
  dcal <- abs(xi-xj)
  return(dcal)
  
}

Dtab <- function(qtab, s, ni, nj){ #function to calculate Dunnet's criteria
  dunnet_criteria <- qtab*(sqrt(s/min(ni,nj)))
  return(dunnet_criteria)
}


dunnet_ci <- function(control, treatment, dtab, s, ni, nj){ #function to calculate confidence interval for dunnet
  lower_ci <- (control - treatment) - (dtab)*(sqrt(s))*sqrt((1/ni + 1/nj))
  upper_ci <- (control - treatment) + (dtab)*(sqrt(s))*sqrt((1/ni + 1/nj))
  return(c(lower_ci, upper_ci))
}

# For   H0: 0-1hr = 1-2hr
#       H1: 0-1hr != 1-2hr
Dcal_one <- Dcal(means_of_groups[1], means_of_groups[2])
dunnet_ci_one <- dunnet_ci(means_of_groups[2], means_of_groups[1], 2.35, 1756, 95, 95)
d_group1 <- c(Dcal_one, dunnet_ci_one)


# For   H0: 0-1hr = 2-3hr
#       H1: 0-1hr != 2-3hr
Dcal_two <- Dcal(means_of_groups[1], means_of_groups[3])
dunnet_ci_two <- dunnet_ci(means_of_groups[3], means_of_groups[1], 2.35, 1756, 95, 95)
d_group2 <- c(Dcal_two, dunnet_ci_two)


# For   H0: 0-1hr = 3-4hr
#       H1: 0-1hr != 3-4hr
Dcal_three <- Dcal(means_of_groups[1], means_of_groups[4])
dunnet_ci_three <- dunnet_ci(means_of_groups[4], means_of_groups[1], 2.35, 1756, 95, 95)
d_group3 <- c(Dcal_three, dunnet_ci_three)

Results

Anova Table

cat("SSB = ", ss_between, "\nSSW = ", ss_within, "\nSSt = ", ss_total, "\nMSE between = ", mean_sqaure_error[1], "\nMSE within = ", mean_sqaure_error[2], "\nF-Statistic =", fstat, "\n", decision(2.628, mean_sqaure_error[1], mean_sqaure_error[2]))

## SSB =  22887.17 
## SSW =  660337.7 
## SSt =  683224.8 
## MSE between =  7629.056 
## MSE within =  1756.217 
## F-Statistic = 4.344028 
##  Reject null hypothesis and further analysis required

Tukey test results

cat(" ","group", "   meandiff", "  lowerCI", "  upperCI", "\n0-1 - 1-2hr", group1, "\n0-1 - 2-3hr", group2, "\n0-1 - 3-4hr", group3, "\n1-2 - 2-3hr", group4, "\n1-2 - 3-4hr", group5, "\n2-3 - 3-4hr", group6)

##   group    meandiff   lowerCI   upperCI 
## 0-1 - 1-2hr 12.12632 -3.493138 27.74577 
## 0-1 - 2-3hr 18.87368 3.25423 34.49314 
## 0-1 - 3-4hr 19.12632 3.506862 34.74577 
## 1-2 - 2-3hr 6.747368 -8.872086 22.36682 
## 1-2 - 3-4hr 7 -8.619454 22.61945 
## 2-3 - 3-4hr 0.2526316 -15.36682 15.87209

Dunnet test results

cat(" ","group", "   meandiff", "  lowerCI", "  upperCI", "\n0-1 - 1-2hr", d_group1, "\n0-1 - 2-3hr", d_group2, "\n0-1 - 3-4hr", d_group3)

##   group    meandiff   lowerCI   upperCI 
## 0-1 - 1-2hr 12.12632 -2.162075 26.41471 
## 0-1 - 2-3hr 18.87368 4.585294 33.16207 
## 0-1 - 3-4hr 19.12632 4.837925 33.41471

One Factor Anova