MUHAMMAD BILAL DS-5TH(Comparing Means in R Tools)

##Comparing one-sample mean to a standard known mean: #One-sample T-test (parametric):

set.seed(1234)
my_data <- data.frame(
  name = paste0(rep("M_", 10), 1:10),
  weight = round(rnorm(10, 20, 2), 1)
)

#Check your data:

# Print the first 10 rows of the data
head(my_data, 10)

##    name weight
## 1   M_1   17.6
## 2   M_2   20.6
## 3   M_3   22.2
## 4   M_4   15.3
## 5   M_5   20.9
## 6   M_6   21.0
## 7   M_7   18.9
## 8   M_8   18.9
## 9   M_9   18.9
## 10 M_10   18.2

# Statistical summaries of weight
summary(my_data$weight)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   15.30   18.38   18.90   19.25   20.82   22.20

#Visualize your data using box plots:

library(ggpubr)

## Warning: package 'ggpubr' was built under R version 4.3.2

## Loading required package: ggplot2

ggboxplot(my_data$weight, 
          ylab = "Weight (g)", xlab = FALSE,
          ggtheme = theme_minimal())

#Preleminary test to check one-sample t-test assumptions:

shapiro.test(my_data$weight) # => p-value = 0.6993

## 
##  Shapiro-Wilk normality test
## 
## data:  my_data$weight
## W = 0.9526, p-value = 0.6993

library("ggpubr")
ggqqplot(my_data$weight, ylab = "Men's weight",
         ggtheme = theme_minimal())

#Compute one-sample t-test:

# One-sample t-test
res <- t.test(my_data$weight, mu = 25)
# Printing the results
res

## 
##  One Sample t-test
## 
## data:  my_data$weight
## t = -9.0783, df = 9, p-value = 7.953e-06
## alternative hypothesis: true mean is not equal to 25
## 95 percent confidence interval:
##  17.8172 20.6828
## sample estimates:
## mean of x 
##     19.25

# printing the p-value
res$p.value

## [1] 7.953383e-06

# printing the mean
res$estimate

## mean of x 
##     19.25

# printing the confidence interval
res$conf.int

## [1] 17.8172 20.6828
## attr(,"conf.level")
## [1] 0.95

#One-Sample Wilcoxon Signed Rank Test in R:

set.seed(1234)
my_data <- data.frame(
  name = paste0(rep("M_", 10), 1:10),
  weight = round(rnorm(10, 20, 2), 1)
)

#Check your data

# Print the first 10 rows of the data
head(my_data, 10)

##    name weight
## 1   M_1   17.6
## 2   M_2   20.6
## 3   M_3   22.2
## 4   M_4   15.3
## 5   M_5   20.9
## 6   M_6   21.0
## 7   M_7   18.9
## 8   M_8   18.9
## 9   M_9   18.9
## 10 M_10   18.2

#Visualize your data using box plots

library(ggpubr)
ggboxplot(my_data$weight, 
          ylab = "Weight (g)", xlab = FALSE,
          ggtheme = theme_minimal())

#Compute one-sample Wilcoxon test

# One-sample wilcoxon test
res <- wilcox.test(my_data$weight, mu = 25)

## Warning in wilcox.test.default(my_data$weight, mu = 25): cannot compute exact
## p-value with ties

# Printing the results
res

## 
##  Wilcoxon signed rank test with continuity correction
## 
## data:  my_data$weight
## V = 0, p-value = 0.005793
## alternative hypothesis: true location is not equal to 25

# print only the p-value
res$p.value

## [1] 0.005793045

##Comparing the means of two independent groups #Unpaired two samples t-test (parametric)

# Data in two numeric vectors
women_weight <- c(38.9, 61.2, 73.3, 21.8, 63.4, 64.6, 48.4, 48.8, 48.5)
men_weight <- c(67.8, 60, 63.4, 76, 89.4, 73.3, 67.3, 61.3, 62.4) 
# Create a data frame
my_data <- data.frame( 
                group = rep(c("Woman", "Man"), each = 9),
                weight = c(women_weight,  men_weight)
                )

#Check your data

# Print all data
print(my_data)

##    group weight
## 1  Woman   38.9
## 2  Woman   61.2
## 3  Woman   73.3
## 4  Woman   21.8
## 5  Woman   63.4
## 6  Woman   64.6
## 7  Woman   48.4
## 8  Woman   48.8
## 9  Woman   48.5
## 10   Man   67.8
## 11   Man   60.0
## 12   Man   63.4
## 13   Man   76.0
## 14   Man   89.4
## 15   Man   73.3
## 16   Man   67.3
## 17   Man   61.3
## 18   Man   62.4

library(dplyr)

## Warning: package 'dplyr' was built under R version 4.3.2

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

group_by(my_data, group) %>%
  summarise(
    count = n(),
    mean = mean(weight, na.rm = TRUE),
    sd = sd(weight, na.rm = TRUE)
  )

## # A tibble: 2 × 4
##   group count  mean    sd
##   <chr> <int> <dbl> <dbl>
## 1 Man       9  69.0  9.38
## 2 Woman     9  52.1 15.6

#Visualize your data using box plots

# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight", 
          color = "group", palette = c("#00AFBB", "#E7B800"),
        ylab = "Weight", xlab = "Groups")

# Shapiro-Wilk normality test for Men's weights
with(my_data, shapiro.test(weight[group == "Man"]))# p = 0.1

## 
##  Shapiro-Wilk normality test
## 
## data:  weight[group == "Man"]
## W = 0.86425, p-value = 0.1066

# Shapiro-Wilk normality test for Women's weights
with(my_data, shapiro.test(weight[group == "Woman"])) # p = 0.6

## 
##  Shapiro-Wilk normality test
## 
## data:  weight[group == "Woman"]
## W = 0.94266, p-value = 0.6101

res.ftest <- var.test(weight ~ group, data = my_data)
res.ftest

## 
##  F test to compare two variances
## 
## data:  weight by group
## F = 0.36134, num df = 8, denom df = 8, p-value = 0.1714
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.08150656 1.60191315
## sample estimates:
## ratio of variances 
##          0.3613398

# Compute t-test
res <- t.test(women_weight, men_weight, var.equal = TRUE)
res

## 
##  Two Sample t-test
## 
## data:  women_weight and men_weight
## t = -2.7842, df = 16, p-value = 0.01327
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -29.748019  -4.029759
## sample estimates:
## mean of x mean of y 
##  52.10000  68.98889

# Compute t-test
res <- t.test(weight ~ group, data = my_data, var.equal = TRUE)
res

## 
##  Two Sample t-test
## 
## data:  weight by group
## t = 2.7842, df = 16, p-value = 0.01327
## alternative hypothesis: true difference in means between group Man and group Woman is not equal to 0
## 95 percent confidence interval:
##   4.029759 29.748019
## sample estimates:
##   mean in group Man mean in group Woman 
##            68.98889            52.10000

# printing the p-value
res$p.value

## [1] 0.0132656

# printing the mean
res$estimate

##   mean in group Man mean in group Woman 
##            68.98889            52.10000

# printing the confidence interval
res$conf.int

## [1]  4.029759 29.748019
## attr(,"conf.level")
## [1] 0.95

##Unpaired two-samples Wilcoxon test (non-parametric): #Visualize your data and compute Wilcoxon test in R

# Data in two numeric vectors
women_weight <- c(38.9, 61.2, 73.3, 21.8, 63.4, 64.6, 48.4, 48.8, 48.5)
men_weight <- c(67.8, 60, 63.4, 76, 89.4, 73.3, 67.3, 61.3, 62.4) 
# Create a data frame
my_data <- data.frame( 
                group = rep(c("Woman", "Man"), each = 9),
                weight = c(women_weight,  men_weight)
                )

#Check your data

print(my_data)

##    group weight
## 1  Woman   38.9
## 2  Woman   61.2
## 3  Woman   73.3
## 4  Woman   21.8
## 5  Woman   63.4
## 6  Woman   64.6
## 7  Woman   48.4
## 8  Woman   48.8
## 9  Woman   48.5
## 10   Man   67.8
## 11   Man   60.0
## 12   Man   63.4
## 13   Man   76.0
## 14   Man   89.4
## 15   Man   73.3
## 16   Man   67.3
## 17   Man   61.3
## 18   Man   62.4

library(dplyr)
group_by(my_data, group) %>%
  summarise(
    count = n(),
    median = median(weight, na.rm = TRUE),
    IQR = IQR(weight, na.rm = TRUE)
  )

## # A tibble: 2 × 4
##   group count median   IQR
##   <chr> <int>  <dbl> <dbl>
## 1 Man       9   67.3  10.9
## 2 Woman     9   48.8  15

# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight", 
          color = "group", palette = c("#00AFBB", "#E7B800"),
          ylab = "Weight", xlab = "Groups")

#Compute unpaired two-samples Wilcoxon test

res <- wilcox.test(women_weight, men_weight)

## Warning in wilcox.test.default(women_weight, men_weight): cannot compute exact
## p-value with ties

res

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  women_weight and men_weight
## W = 15, p-value = 0.02712
## alternative hypothesis: true location shift is not equal to 0

res <- wilcox.test(weight ~ group, data = my_data,
                   exact = FALSE)
res

## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  weight by group
## W = 66, p-value = 0.02712
## alternative hypothesis: true location shift is not equal to 0

# Print the p-value only
res$p.value

## [1] 0.02711657

##Comparing the means of paired samples # Paired samples t-test (parametric) #Visualize your data and compute paired t-test in R

# Data in two numeric vectors
# ++++++++++++++++++++++++++
# Weight of the mice before treatment
before <-c(200.1, 190.9, 192.7, 213, 241.4, 196.9, 172.2, 185.5, 205.2, 193.7)
# Weight of the mice after treatment
after <-c(392.9, 393.2, 345.1, 393, 434, 427.9, 422, 383.9, 392.3, 352.2)
# Create a data frame
my_data <- data.frame( 
                group = rep(c("before", "after"), each = 10),
                weight = c(before,  after)
                )

#Check your data

# Print all data
print(my_data)

##     group weight
## 1  before  200.1
## 2  before  190.9
## 3  before  192.7
## 4  before  213.0
## 5  before  241.4
## 6  before  196.9
## 7  before  172.2
## 8  before  185.5
## 9  before  205.2
## 10 before  193.7
## 11  after  392.9
## 12  after  393.2
## 13  after  345.1
## 14  after  393.0
## 15  after  434.0
## 16  after  427.9
## 17  after  422.0
## 18  after  383.9
## 19  after  392.3
## 20  after  352.2

library("dplyr")
group_by(my_data, group) %>%
  summarise(
    count = n(),
    mean = mean(weight, na.rm = TRUE),
    sd = sd(weight, na.rm = TRUE)
  )

## # A tibble: 2 × 4
##   group  count  mean    sd
##   <chr>  <int> <dbl> <dbl>
## 1 after     10  394.  29.4
## 2 before    10  199.  18.5

 # Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight", 
          color = "group", palette = c("#00AFBB", "#E7B800"),
          order = c("before", "after"),
          ylab = "Weight", xlab = "Groups")

# Subset weight data before treatment
before <- subset(my_data,  group == "before", weight,
                 drop = TRUE)
# subset weight data after treatment
after <- subset(my_data,  group == "after", weight,
                 drop = TRUE)
# Plot paired data
library(PairedData)

## Warning: package 'PairedData' was built under R version 4.3.2

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: gld

## Warning: package 'gld' was built under R version 4.3.2

## Loading required package: mvtnorm

## Warning: package 'mvtnorm' was built under R version 4.3.2

## Loading required package: lattice

## 
## Attaching package: 'PairedData'

## The following object is masked from 'package:base':
## 
##     summary

pd <- paired(before, after)
plot(pd, type = "profile") + theme_bw()

# compute the difference
d <- with(my_data, 
        weight[group == "before"] - weight[group == "after"])
# Shapiro-Wilk normality test for the differences
shapiro.test(d) # => p-value = 0.6141

## 
##  Shapiro-Wilk normality test
## 
## data:  d
## W = 0.94536, p-value = 0.6141

#Compute paired samples t-test

# Compute t-test
res <- t.test(before, after, paired = TRUE)
res

## 
##  Paired t-test
## 
## data:  before and after
## t = -20.883, df = 9, p-value = 6.2e-09
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -215.5581 -173.4219
## sample estimates:
## mean difference 
##         -194.49

# Compute t-test
res <- t.test(weight ~ group, data = my_data, paired = TRUE)
res

## 
##  Paired t-test
## 
## data:  weight by group
## t = 20.883, df = 9, p-value = 6.2e-09
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  173.4219 215.5581
## sample estimates:
## mean difference 
##          194.49

##Paired samples Wilcoxon test (non-parametric) #Visualize your data and compute paired samples Wilcoxon test in R

# Data in two numeric vectors
# ++++++++++++++++++++++++++
# Weight of the mice before treatment
before <-c(200.1, 190.9, 192.7, 213, 241.4, 196.9, 172.2, 185.5, 205.2, 193.7)
# Weight of the mice after treatment
after <-c(392.9, 393.2, 345.1, 393, 434, 427.9, 422, 383.9, 392.3, 352.2)
# Create a data frame
my_data <- data.frame( 
                group = rep(c("before", "after"), each = 10),
                weight = c(before,  after)
                )

#Check your data

# Print all data
print(my_data)

##     group weight
## 1  before  200.1
## 2  before  190.9
## 3  before  192.7
## 4  before  213.0
## 5  before  241.4
## 6  before  196.9
## 7  before  172.2
## 8  before  185.5
## 9  before  205.2
## 10 before  193.7
## 11  after  392.9
## 12  after  393.2
## 13  after  345.1
## 14  after  393.0
## 15  after  434.0
## 16  after  427.9
## 17  after  422.0
## 18  after  383.9
## 19  after  392.3
## 20  after  352.2

library("dplyr")
group_by(my_data, group) %>%
  summarise(
    count = n(),
    median = median(weight, na.rm = TRUE),
    IQR = IQR(weight, na.rm = TRUE)
  )

## # A tibble: 2 × 4
##   group  count median   IQR
##   <chr>  <int>  <dbl> <dbl>
## 1 after     10   393.  28.8
## 2 before    10   195.  12.6

# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight", 
          color = "group", palette = c("#00AFBB", "#E7B800"),
          order = c("before", "after"),
          ylab = "Weight", xlab = "Groups")

# Subset weight data before treatment
before <- subset(my_data,  group == "before", weight,
                 drop = TRUE)
# subset weight data after treatment
after <- subset(my_data,  group == "after", weight,
                 drop = TRUE)
# Plot paired data
library(PairedData)
pd <- paired(before, after)
plot(pd, type = "profile") + theme_bw()

#Compute paired-sample Wilcoxon test

res <- wilcox.test(before, after, paired = TRUE)
res

## 
##  Wilcoxon signed rank exact test
## 
## data:  before and after
## V = 0, p-value = 0.001953
## alternative hypothesis: true location shift is not equal to 0

# Compute t-test
res <- wilcox.test(weight ~ group, data = my_data, paired = TRUE)
res

## 
##  Wilcoxon signed rank exact test
## 
## data:  weight by group
## V = 55, p-value = 0.001953
## alternative hypothesis: true location shift is not equal to 0

# print only the p-value
res$p.value

## [1] 0.001953125

##Comparing the means of more than two groups # One-way ANOVA test

my_data <- PlantGrowth

# Show a random sample
set.seed(1234)
dplyr::sample_n(my_data, 10)

##    weight group
## 1    6.15  trt2
## 2    3.83  trt1
## 3    5.29  trt2
## 4    5.12  trt2
## 5    4.50  ctrl
## 6    4.17  trt1
## 7    5.87  trt1
## 8    5.33  ctrl
## 9    5.26  trt2
## 10   4.61  ctrl

# Show the levels
levels(my_data$group)

## [1] "ctrl" "trt1" "trt2"

my_data$group <- ordered(my_data$group,
                         levels = c("ctrl", "trt1", "trt2"))

library(dplyr)
group_by(my_data, group) %>%
  summarise(
    count = n(),
    mean = mean(weight, na.rm = TRUE),
    sd = sd(weight, na.rm = TRUE)
  )

## # A tibble: 3 × 4
##   group count  mean    sd
##   <ord> <int> <dbl> <dbl>
## 1 ctrl     10  5.03 0.583
## 2 trt1     10  4.66 0.794
## 3 trt2     10  5.53 0.443

# Box plots
# ++++++++++++++++++++
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight", 
          color = "group", palette = c("#00AFBB", "#E7B800", "#FC4E07"),
          order = c("ctrl", "trt1", "trt2"),
          ylab = "Weight", xlab = "Treatment")

# Mean plots
# ++++++++++++++++++++
# Plot weight by group
# Add error bars: mean_se
# (other values include: mean_sd, mean_ci, median_iqr, ....)
library("ggpubr")
ggline(my_data, x = "group", y = "weight", 
       add = c("mean_se", "jitter"), 
       order = c("ctrl", "trt1", "trt2"),
       ylab = "Weight", xlab = "Treatment")

# Box plot
boxplot(weight ~ group, data = my_data,
        xlab = "Treatment", ylab = "Weight",
        frame = FALSE, col = c("#00AFBB", "#E7B800", "#FC4E07"))

# plotmeans
library("gplots")

## Warning: package 'gplots' was built under R version 4.3.2

## 
## Attaching package: 'gplots'

## The following object is masked from 'package:stats':
## 
##     lowess

plotmeans(weight ~ group, data = my_data, frame = FALSE,
          xlab = "Treatment", ylab = "Weight",
          main="Mean Plot with 95% CI")

## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter

## Warning in axis(1, at = 1:length(means), labels = legends, ...): "frame" is not
## a graphical parameter

## Warning in plot.xy(xy.coords(x, y), type = type, ...): "frame" is not a
## graphical parameter

# Compute the analysis of variance
res.aov <- aov(weight ~ group, data = my_data)
# Summary of the analysis
summary(res.aov)

##             Df Sum Sq Mean Sq F value Pr(>F)  
## group        2  3.766  1.8832   4.846 0.0159 *
## Residuals   27 10.492  0.3886                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Tukey multiple pairwise-comparisons:

TukeyHSD(res.aov)

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = weight ~ group, data = my_data)
## 
## $group
##             diff        lwr       upr     p adj
## trt1-ctrl -0.371 -1.0622161 0.3202161 0.3908711
## trt2-ctrl  0.494 -0.1972161 1.1852161 0.1979960
## trt2-trt1  0.865  0.1737839 1.5562161 0.0120064

library(multcomp)

## Warning: package 'multcomp' was built under R version 4.3.2

## Loading required package: survival

## Loading required package: TH.data

## Warning: package 'TH.data' was built under R version 4.3.2

## 
## Attaching package: 'TH.data'

## The following object is masked from 'package:MASS':
## 
##     geyser

summary(glht(res.aov, linfct = mcp(group = "Tukey")))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = weight ~ group, data = my_data)
## 
## Linear Hypotheses:
##                  Estimate Std. Error t value Pr(>|t|)  
## trt1 - ctrl == 0  -0.3710     0.2788  -1.331   0.3908  
## trt2 - ctrl == 0   0.4940     0.2788   1.772   0.1979  
## trt2 - trt1 == 0   0.8650     0.2788   3.103   0.0121 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

#Pairewise t-test

pairwise.t.test(my_data$weight, my_data$group,
                 p.adjust.method = "BH")

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  my_data$weight and my_data$group 
## 
##      ctrl  trt1 
## trt1 0.194 -    
## trt2 0.132 0.013
## 
## P value adjustment method: BH

# 1. Homogeneity of variances
plot(res.aov, 1)

library(car)

## Warning: package 'car' was built under R version 4.3.2

## Loading required package: carData

## Warning: package 'carData' was built under R version 4.3.2

## 
## Attaching package: 'car'

## The following object is masked from 'package:dplyr':
## 
##     recode

leveneTest(weight ~ group, data = my_data)

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  2  1.1192 0.3412
##       27

#ANOVA test with no assumption of equal variances

oneway.test(weight ~ group, data = my_data)

## 
##  One-way analysis of means (not assuming equal variances)
## 
## data:  weight and group
## F = 5.181, num df = 2.000, denom df = 17.128, p-value = 0.01739

#Pairwise t-tests with no assumption of equal variances

pairwise.t.test(my_data$weight, my_data$group,
                 p.adjust.method = "BH", pool.sd = FALSE)

## 
##  Pairwise comparisons using t tests with non-pooled SD 
## 
## data:  my_data$weight and my_data$group 
## 
##      ctrl  trt1 
## trt1 0.250 -    
## trt2 0.072 0.028
## 
## P value adjustment method: BH

# 2. Normality
plot(res.aov, 2)

# Extract the residuals
aov_residuals <- residuals(object = res.aov )
# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals )

## 
##  Shapiro-Wilk normality test
## 
## data:  aov_residuals
## W = 0.96607, p-value = 0.4379

#Non-parametric alternative to one-way ANOVA test

kruskal.test(weight ~ group, data = my_data)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  weight by group
## Kruskal-Wallis chi-squared = 7.9882, df = 2, p-value = 0.01842

##Two-Way ANOVA test:

# Store the data in the variable my_data
my_data <- ToothGrowth

# Show a random sample
set.seed(1234)
dplyr::sample_n(my_data, 10)

##     len supp dose
## 1  21.5   VC  2.0
## 2  17.3   VC  1.0
## 3  27.3   OJ  2.0
## 4  18.5   VC  2.0
## 5   8.2   OJ  0.5
## 6  26.4   OJ  1.0
## 7  25.8   OJ  1.0
## 8   5.2   VC  0.5
## 9   6.4   VC  0.5
## 10  9.4   OJ  0.5

# Check the structure
str(my_data)

## 'data.frame':    60 obs. of  3 variables:
##  $ len : num  4.2 11.5 7.3 5.8 6.4 10 11.2 11.2 5.2 7 ...
##  $ supp: Factor w/ 2 levels "OJ","VC": 2 2 2 2 2 2 2 2 2 2 ...
##  $ dose: num  0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 0.5 ...

# Convert dose as a factor and recode the levels
# as "D0.5", "D1", "D2"
my_data$dose <- factor(my_data$dose, 
                  levels = c(0.5, 1, 2),
                  labels = c("D0.5", "D1", "D2"))
head(my_data)

##    len supp dose
## 1  4.2   VC D0.5
## 2 11.5   VC D0.5
## 3  7.3   VC D0.5
## 4  5.8   VC D0.5
## 5  6.4   VC D0.5
## 6 10.0   VC D0.5

table(my_data$supp, my_data$dose)

##     
##      D0.5 D1 D2
##   OJ   10 10 10
##   VC   10 10 10

# Box plot with multiple groups
# +++++++++++++++++++++
# Plot tooth length ("len") by groups ("dose")
# Color box plot by a second group: "supp"
library("ggpubr")
ggboxplot(my_data, x = "dose", y = "len", color = "supp",
          palette = c("#00AFBB", "#E7B800"))

# Line plots with multiple groups
# +++++++++++++++++++++++
# Plot tooth length ("len") by groups ("dose")
# Color box plot by a second group: "supp"
# Add error bars: mean_se
# (other values include: mean_sd, mean_ci, median_iqr, ....)
library("ggpubr")
ggline(my_data, x = "dose", y = "len", color = "supp",
       add = c("mean_se", "dotplot"),
       palette = c("#00AFBB", "#E7B800"))

## Bin width defaults to 1/30 of the range of the data. Pick better value with
## `binwidth`.

# Box plot with two factor variables
boxplot(len ~ supp * dose, data=my_data, frame = FALSE, 
        col = c("#00AFBB", "#E7B800"), ylab="Tooth Length")

# Two-way interaction plot
interaction.plot(x.factor = my_data$dose, trace.factor = my_data$supp, 
                 response = my_data$len, fun = mean, 
                 type = "b", legend = TRUE, 
                 xlab = "Dose", ylab="Tooth Length",
                 pch=c(1,19), col = c("#00AFBB", "#E7B800"))

res.aov2 <- aov(len ~ supp + dose, data = my_data)
summary(res.aov2)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## supp         1  205.4   205.4   14.02 0.000429 ***
## dose         2 2426.4  1213.2   82.81  < 2e-16 ***
## Residuals   56  820.4    14.7                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Two-way ANOVA with interaction effect
# These two calls are equivalent
res.aov3 <- aov(len ~ supp * dose, data = my_data)
res.aov3 <- aov(len ~ supp + dose + supp:dose, data = my_data)
summary(res.aov3)

##             Df Sum Sq Mean Sq F value   Pr(>F)    
## supp         1  205.4   205.4  15.572 0.000231 ***
## dose         2 2426.4  1213.2  92.000  < 2e-16 ***
## supp:dose    2  108.3    54.2   4.107 0.021860 *  
## Residuals   54  712.1    13.2                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

require("dplyr")
group_by(my_data, supp, dose) %>%
  summarise(
    count = n(),
    mean = mean(len, na.rm = TRUE),
    sd = sd(len, na.rm = TRUE)
  )

## `summarise()` has grouped output by 'supp'. You can override using the
## `.groups` argument.

## # A tibble: 6 × 5
## # Groups:   supp [2]
##   supp  dose  count  mean    sd
##   <fct> <fct> <int> <dbl> <dbl>
## 1 OJ    D0.5     10 13.2   4.46
## 2 OJ    D1       10 22.7   3.91
## 3 OJ    D2       10 26.1   2.66
## 4 VC    D0.5     10  7.98  2.75
## 5 VC    D1       10 16.8   2.52
## 6 VC    D2       10 26.1   4.80

model.tables(res.aov3, type="means", se = TRUE)

## Tables of means
## Grand mean
##          
## 18.81333 
## 
##  supp 
## supp
##     OJ     VC 
## 20.663 16.963 
## 
##  dose 
## dose
##   D0.5     D1     D2 
## 10.605 19.735 26.100 
## 
##  supp:dose 
##     dose
## supp D0.5  D1    D2   
##   OJ 13.23 22.70 26.06
##   VC  7.98 16.77 26.14
## 
## Standard errors for differences of means
##           supp   dose supp:dose
##         0.9376 1.1484    1.6240
## replic.     30     20        10

TukeyHSD(res.aov3, which = "dose")

##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = len ~ supp + dose + supp:dose, data = my_data)
## 
## $dose
##           diff       lwr       upr   p adj
## D1-D0.5  9.130  6.362488 11.897512 0.0e+00
## D2-D0.5 15.495 12.727488 18.262512 0.0e+00
## D2-D1    6.365  3.597488  9.132512 2.7e-06

library(multcomp)
summary(glht(res.aov2, linfct = mcp(dose = "Tukey")))

## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: aov(formula = len ~ supp + dose, data = my_data)
## 
## Linear Hypotheses:
##                Estimate Std. Error t value Pr(>|t|)    
## D1 - D0.5 == 0    9.130      1.210   7.543   <1e-05 ***
## D2 - D0.5 == 0   15.495      1.210  12.802   <1e-05 ***
## D2 - D1 == 0      6.365      1.210   5.259   <1e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

#Pairwise t-test

pairwise.t.test(my_data$len, my_data$dose,
                p.adjust.method = "BH")

## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  my_data$len and my_data$dose 
## 
##    D0.5    D1     
## D1 1.0e-08 -      
## D2 4.4e-16 1.4e-05
## 
## P value adjustment method: BH

#Check the homogeneity of variance assumption

# 1. Homogeneity of variances
plot(res.aov3, 1)

library(car)
leveneTest(len ~ supp*dose, data = my_data)

## Levene's Test for Homogeneity of Variance (center = median)
##       Df F value Pr(>F)
## group  5  1.7086 0.1484
##       54

#Check the normality assumpttion

# 2. Normality
plot(res.aov3, 2)

# Extract the residuals
aov_residuals <- residuals(object = res.aov3)
# Run Shapiro-Wilk test
shapiro.test(x = aov_residuals )

## 
##  Shapiro-Wilk normality test
## 
## data:  aov_residuals
## W = 0.98499, p-value = 0.6694

#Compute two-way ANOVA test in R for unbalanced designs:

library(car)
my_anova <- aov(len ~ supp * dose, data = my_data)
Anova(my_anova, type = "III")

## Anova Table (Type III tests)
## 
## Response: len
##              Sum Sq Df F value    Pr(>F)    
## (Intercept) 1750.33  1 132.730 3.603e-16 ***
## supp         137.81  1  10.450  0.002092 ** 
## dose         885.26  2  33.565 3.363e-10 ***
## supp:dose    108.32  2   4.107  0.021860 *  
## Residuals    712.11 54                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

##MANOVA test: Multivariate analysis of variance:

# Store the data in the variable my_data
my_data <- iris

#Check your data:

# Show a random sample
set.seed(1234)
dplyr::sample_n(my_data, 10)

##    Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 1           5.2         3.5          1.5         0.2     setosa
## 2           5.7         2.6          3.5         1.0 versicolor
## 3           6.3         3.3          6.0         2.5  virginica
## 4           6.5         3.2          5.1         2.0  virginica
## 5           6.3         3.4          5.6         2.4  virginica
## 6           6.4         2.8          5.6         2.2  virginica
## 7           6.8         3.2          5.9         2.3  virginica
## 8           7.9         3.8          6.4         2.0  virginica
## 9           6.2         2.9          4.3         1.3 versicolor
## 10          7.1         3.0          5.9         2.1  virginica

#Compute MANOVA test

sepl <- iris$Sepal.Length
petl <- iris$Petal.Length
# MANOVA test
res.man <- manova(cbind(Sepal.Length, Petal.Length) ~ Species, data = iris)
summary(res.man)

##            Df Pillai approx F num Df den Df    Pr(>F)    
## Species     2 0.9885   71.829      4    294 < 2.2e-16 ***
## Residuals 147                                            
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# Look to see which differ
summary.aov(res.man)

##  Response Sepal.Length :
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## Species       2 63.212  31.606  119.26 < 2.2e-16 ***
## Residuals   147 38.956   0.265                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##  Response Petal.Length :
##              Df Sum Sq Mean Sq F value    Pr(>F)    
## Species       2 437.10 218.551  1180.2 < 2.2e-16 ***
## Residuals   147  27.22   0.185                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

##Kruskal-Wallis test:

my_data <- PlantGrowth

#Check your data

# print the head of the file
head(my_data)

##   weight group
## 1   4.17  ctrl
## 2   5.58  ctrl
## 3   5.18  ctrl
## 4   6.11  ctrl
## 5   4.50  ctrl
## 6   4.61  ctrl

# Show the group levels
levels(my_data$group)

## [1] "ctrl" "trt1" "trt2"

my_data$group <- ordered(my_data$group,
                         levels = c("ctrl", "trt1", "trt2"))

library(dplyr)
group_by(my_data, group) %>%
  summarise(
    count = n(),
    mean = mean(weight, na.rm = TRUE),
    sd = sd(weight, na.rm = TRUE),
    median = median(weight, na.rm = TRUE),
    IQR = IQR(weight, na.rm = TRUE)
  )

## # A tibble: 3 × 6
##   group count  mean    sd median   IQR
##   <ord> <int> <dbl> <dbl>  <dbl> <dbl>
## 1 ctrl     10  5.03 0.583   5.15 0.743
## 2 trt1     10  4.66 0.794   4.55 0.662
## 3 trt2     10  5.53 0.443   5.44 0.467

# Box plots
# ++++++++++++++++++++
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight", 
          color = "group", palette = c("#00AFBB", "#E7B800", "#FC4E07"),
          order = c("ctrl", "trt1", "trt2"),
          ylab = "Weight", xlab = "Treatment")

# Mean plots
# ++++++++++++++++++++
# Plot weight by group
# Add error bars: mean_se
# (other values include: mean_sd, mean_ci, median_iqr, ....)
library("ggpubr")
ggline(my_data, x = "group", y = "weight", 
       add = c("mean_se", "jitter"), 
       order = c("ctrl", "trt1", "trt2"),
       ylab = "Weight", xlab = "Treatment")

#Compute Kruskal-Wallis test

kruskal.test(weight ~ group, data = my_data)

## 
##  Kruskal-Wallis rank sum test
## 
## data:  weight by group
## Kruskal-Wallis chi-squared = 7.9882, df = 2, p-value = 0.01842

pairwise.wilcox.test(PlantGrowth$weight, PlantGrowth$group,
                 p.adjust.method = "BH")

## Warning in wilcox.test.default(xi, xj, paired = paired, ...): cannot compute
## exact p-value with ties

## 
##  Pairwise comparisons using Wilcoxon rank sum test with continuity correction 
## 
## data:  PlantGrowth$weight and PlantGrowth$group 
## 
##      ctrl  trt1 
## trt1 0.199 -    
## trt2 0.095 0.027
## 
## P value adjustment method: BH

MUHAMMAD BILAL DS-5TH(Comparing Means in R Tools)

Muhammad Bilal(5121107)

2023-12-26