library(readxl)
## Warning: package 'readxl' was built under R version 4.2.3
LAB2<-read_excel("D:/stat//Laboratory Exercise 2 Data.xlsx")
## Warning: package 'kableExtra' was built under R version 4.2.3
| Solutions | Day 1 | Day 2 | Day 3 | Day 4 |
|---|---|---|---|---|
| 1 | 13 | 22 | 18 | 39 |
| 2 | 16 | 24 | 17 | 44 |
| 3 | 5 | 4 | 1 | 22 |
DATA <- data.frame(
Day = factor(c("1", "1","1","2", "2","2","3", "3","3","4", "4", "4")),
Effectiveness = c(13,16,5,22,24,4,18,17,1,39,44,22),
Solution = factor(c("Solution 1","Solution 2","Solution 3","Solution 1","Solution 2","Solution 3","Solution 1", "Solution 2", "Solution 3", "Solution 1", "Solution 2", "Solution 3")))
DATA
## Day Effectiveness Solution
## 1 1 13 Solution 1
## 2 1 16 Solution 2
## 3 1 5 Solution 3
## 4 2 22 Solution 1
## 5 2 24 Solution 2
## 6 2 4 Solution 3
## 7 3 18 Solution 1
## 8 3 17 Solution 2
## 9 3 1 Solution 3
## 10 4 39 Solution 1
## 11 4 44 Solution 2
## 12 4 22 Solution 3
indices <- list(c(1, 4, 7, 10), c(2, 5, 8, 11), c(3, 6, 9, 12))
solution_names <- c("Solution 1", "Solution 2", "Solution 3")
# Calculate mean and standard deviation for each solution
results <- lapply(1:length(indices), function(i) {
ms <- mean(DATA$Effectiveness[indices[[i]]])
sds <- sd(DATA$Effectiveness[indices[[i]]])
n <- solution_names[i]
sprintf("Mean %s: %.2f, Standard Deviation %s: %.2f", n, ms, n, sds)
})
# Calculate mean and standard deviation for the overall data
overall_ms <- mean(mydata$Effectiveness)
overall_sds <- sd(mydata$Effectiveness)
overall_results <- sprintf("Mean Over-all: %.2f, Standard Deviation Over-all: %.2f", overall_ms, overall_sds)
# Combine the results into one list
RESULTS <- c(results, overall_results)
RESULTS
anova_model <- aov(Effectiveness ~ Solution + Day, data = DATA)
summary(anova_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## Solution 2 703.5 351.8 40.72 0.000323 ***
## Day 3 1106.9 369.0 42.71 0.000192 ***
## Residuals 6 51.8 8.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Tukey <- TukeyHSD(anova_model, "Solution")
Tukey
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Effectiveness ~ Solution + Day, data = DATA)
##
## $Solution
## diff lwr upr p adj
## Solution 2-Solution 1 2.25 -4.126879 8.626879 0.5577862
## Solution 3-Solution 1 -15.00 -21.376879 -8.623121 0.0008758
## Solution 3-Solution 2 -17.25 -23.626879 -10.873121 0.0004067
plot(tukey_result)
First I renamed my data to LAB2 for convenience.
library(readxl)
LAB2<-read_excel("D:/stat//Laboratory Exercise 2 Data.xlsx")
I used code to customize my data with colors
| Solutions | Day 1 | Day 2 | Day 3 | Day 4 |
|---|---|---|---|---|
| 1 | 13 | 22 | 18 | 39 |
| 2 | 16 | 24 | 17 | 44 |
| 3 | 5 | 4 | 1 | 22 |
For plotting and I used data.frame
DATA <- data.frame(
Day = factor(c("1", "1","1","2", "2","2","3", "3","3","4", "4", "4")),
Effectiveness = c(13,16,5,22,24,4,18,17,1,39,44,22),
Solution = factor(c("Solution 1","Solution 2","Solution 3","Solution 1","Solution 2","Solution 3","Solution 1", "Solution 2", "Solution 3", "Solution 1", "Solution 2", "Solution 3")))
DATA
## Day Effectiveness Solution
## 1 1 13 Solution 1
## 2 1 16 Solution 2
## 3 1 5 Solution 3
## 4 2 22 Solution 1
## 5 2 24 Solution 2
## 6 2 4 Solution 3
## 7 3 18 Solution 1
## 8 3 17 Solution 2
## 9 3 1 Solution 3
## 10 4 39 Solution 1
## 11 4 44 Solution 2
## 12 4 22 Solution 3
Again, I used code to customize my data with colors.
I used ggplot to plot my data for the clearer visual interpretation and basis for the analysis.
I used this code to get the overall result of mean and standard deviation of each washington solution.
# Define the indices and names for each solution
indices <- list(c(1, 4, 7, 10), c(2, 5, 8, 11), c(3, 6, 9, 12))
solution_names <- c("Solution 1", "Solution 2", "Solution 3")
# Calculate mean and standard deviation for each solution
results <- lapply(1:length(indices), function(i) {
ms <- mean(mydata$Effectiveness[indices[[i]]])
sds <- sd(mydata$Effectiveness[indices[[i]]])
n <- solution_names[i]
sprintf("Mean %s: %.2f, Standard Deviation %s: %.2f", n, ms, n, sds)
})
# Calculate mean and standard deviation for the overall data
overall_ms <- mean(mydata$Effectiveness)
overall_sds <- sd(mydata$Effectiveness)
overall_results <- sprintf("Mean Over-all: %.2f, Standard Deviation Over-all: %.2f", overall_ms, overall_sds)
# Combine the results into one list
RESULTS <- c(results, overall_results)
RESULTS
I used ANOVA to evaluate if there are any notable variations in the means of several groups. To create the ANOVA table, we use the function summary(anova_model).
anova_model <- aov(Effectiveness ~ Solution + Day, data = DATA)
summary(anova_model)
## Df Sum Sq Mean Sq F value Pr(>F)
## Solution 2 703.5 351.8 40.72 0.000323 ***
## Day 3 1106.9 369.0 42.71 0.000192 ***
## Residuals 6 51.8 8.6
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
I use TukeyHSD() and store in the variable tukey_result to compute for post-hoc test.
Tukey <- TukeyHSD(anova_model, "Solution")
Tukey
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = Effectiveness ~ Solution + Day, data = DATA)
##
## $Solution
## diff lwr upr p adj
## Solution 2-Solution 1 2.25 -4.126879 8.626879 0.5577862
## Solution 3-Solution 1 -15.00 -21.376879 -8.623121 0.0008758
## Solution 3-Solution 2 -17.25 -23.626879 -10.873121 0.0004067
And plot it.
plot(tukey_result)