##Formula of one-sample t-test The t-statistic can be calculated as follow: ### where,
# t.test(x, mu = 0, alternative = "two.sided")
# If .txt tab file, use this
# my_data <- read.delim(file.choose())
# Or, if .csv file, use this
# my_data <- read.csv(file.choose())
set.seed(1234)
my_data <- data.frame(
name = paste0(rep("M_", 10), 1:10),
weight = round(rnorm(10, 20, 2), 1)
)
# Print the first 10 rows of the data
head(my_data, 10)
## name weight
## 1 M_1 17.6
## 2 M_2 20.6
## 3 M_3 22.2
## 4 M_4 15.3
## 5 M_5 20.9
## 6 M_6 21.0
## 7 M_7 18.9
## 8 M_8 18.9
## 9 M_9 18.9
## 10 M_10 18.2
# Statistical summaries of weight
summary(my_data$weight)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.30 18.38 18.90 19.25 20.82 22.20
library(ggpubr)
## Loading required package: ggplot2
ggboxplot(my_data$weight,
ylab = "Weight (g)", xlab = FALSE,
ggtheme = theme_minimal())
shapiro.test(my_data$weight) # => p-value = 0.6993
##
## Shapiro-Wilk normality test
##
## data: my_data$weight
## W = 0.9526, p-value = 0.6993
library("ggpubr")
ggqqplot(my_data$weight, ylab = "Men's weight",
ggtheme = theme_minimal())
# One-sample t-test
res <- t.test(my_data$weight, mu = 25)
# Printing the results
res
##
## One Sample t-test
##
## data: my_data$weight
## t = -9.0783, df = 9, p-value = 7.953e-06
## alternative hypothesis: true mean is not equal to 25
## 95 percent confidence interval:
## 17.8172 20.6828
## sample estimates:
## mean of x
## 19.25
# printing the p-value
res$p.value
## [1] 7.953383e-06
# printing the mean
res$estimate
## mean of x
## 19.25
# printing the confidence interval
res$conf.int
## [1] 17.8172 20.6828
## attr(,"conf.level")
## [1] 0.95
# wilcox.test(x, mu = 0, alternative = "two.sided")
# If .txt tab file, use this
# my_data <- read.delim(file.choose())
# Or, if .csv file, use this
# my_data <- read.csv(file.choose())
set.seed(1234)
my_data <- data.frame(
name = paste0(rep("M_", 10), 1:10),
weight = round(rnorm(10, 20, 2), 1)
)
# Print the first 10 rows of the data
head(my_data, 10)
## name weight
## 1 M_1 17.6
## 2 M_2 20.6
## 3 M_3 22.2
## 4 M_4 15.3
## 5 M_5 20.9
## 6 M_6 21.0
## 7 M_7 18.9
## 8 M_8 18.9
## 9 M_9 18.9
## 10 M_10 18.2
# Statistical summaries of weight
summary(my_data$weight)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.30 18.38 18.90 19.25 20.82 22.20
library(ggpubr)
ggboxplot(my_data$weight,
ylab = "Weight (g)", xlab = FALSE,
ggtheme = theme_minimal())
# One-sample wilcoxon test
res <- wilcox.test(my_data$weight, mu = 25)
## Warning in wilcox.test.default(my_data$weight, mu = 25): cannot compute exact
## p-value with ties
# Printing the results
res
##
## Wilcoxon signed rank test with continuity correction
##
## data: my_data$weight
## V = 0, p-value = 0.005793
## alternative hypothesis: true location is not equal to 25
# print only the p-value
res$p.value
## [1] 0.005793045
#t.test(x, y, alternative = "two.sided", var.equal = FALSE)
# If .txt tab file, use this
# my_data <- read.delim(file.choose())
# Or, if .csv file, use this
# my_data <- read.csv(file.choose())
# Data in two numeric vectors
women_weight <- c(38.9, 61.2, 73.3, 21.8, 63.4, 64.6, 48.4, 48.8, 48.5)
men_weight <- c(67.8, 60, 63.4, 76, 89.4, 73.3, 67.3, 61.3, 62.4)
# Create a data frame
my_data <- data.frame(
group = rep(c("Woman", "Man"), each = 9),
weight = c(women_weight, men_weight)
)
# Check your data
# Print all data
print(my_data)
## group weight
## 1 Woman 38.9
## 2 Woman 61.2
## 3 Woman 73.3
## 4 Woman 21.8
## 5 Woman 63.4
## 6 Woman 64.6
## 7 Woman 48.4
## 8 Woman 48.8
## 9 Woman 48.5
## 10 Man 67.8
## 11 Man 60.0
## 12 Man 63.4
## 13 Man 76.0
## 14 Man 89.4
## 15 Man 73.3
## 16 Man 67.3
## 17 Man 61.3
## 18 Man 62.4
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
group_by(my_data, group) %>%
summarise(
count = n(),
mean = mean(weight, na.rm = TRUE),
sd = sd(weight, na.rm = TRUE)
)
## # A tibble: 2 × 4
## group count mean sd
## <chr> <int> <dbl> <dbl>
## 1 Man 9 69.0 9.38
## 2 Woman 9 52.1 15.6
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight",
color = "group", palette = c("#00AFBB", "#E7B800"),
ylab = "Weight", xlab = "Groups")
## Preleminary test to check independent t-test assumptions
# Shapiro-Wilk normality test for Men's weights
with(my_data, shapiro.test(weight[group == "Man"]))# p = 0.1
##
## Shapiro-Wilk normality test
##
## data: weight[group == "Man"]
## W = 0.86425, p-value = 0.1066
# Shapiro-Wilk normality test for Women's weights
with(my_data, shapiro.test(weight[group == "Woman"])) # p = 0.6
##
## Shapiro-Wilk normality test
##
## data: weight[group == "Woman"]
## W = 0.94266, p-value = 0.6101
res.ftest <- var.test(weight ~ group, data = my_data)
res.ftest
##
## F test to compare two variances
##
## data: weight by group
## F = 0.36134, num df = 8, denom df = 8, p-value = 0.1714
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.08150656 1.60191315
## sample estimates:
## ratio of variances
## 0.3613398
# Compute t-test
res <- t.test(women_weight, men_weight, var.equal = TRUE)
res
##
## Two Sample t-test
##
## data: women_weight and men_weight
## t = -2.7842, df = 16, p-value = 0.01327
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -29.748019 -4.029759
## sample estimates:
## mean of x mean of y
## 52.10000 68.98889
# Compute t-test
res <- t.test(weight ~ group, data = my_data, var.equal = TRUE)
res
##
## Two Sample t-test
##
## data: weight by group
## t = 2.7842, df = 16, p-value = 0.01327
## alternative hypothesis: true difference in means between group Man and group Woman is not equal to 0
## 95 percent confidence interval:
## 4.029759 29.748019
## sample estimates:
## mean in group Man mean in group Woman
## 68.98889 52.10000
# printing the p-value
res$p.value
## [1] 0.0132656
# printing the mean
res$estimate
## mean in group Man mean in group Woman
## 68.98889 52.10000
# printing the confidence interval
res$conf.int
## [1] 4.029759 29.748019
## attr(,"conf.level")
## [1] 0.95
#Tools
# wilcox.test(x, y, alternative = "two.sided")
# If .txt tab file, use this
# my_data <- read.delim(file.choose())
# Or, if .csv file, use this
# my_data <- read.csv(file.choose())
# Data in two numeric vectors
women_weight <- c(38.9, 61.2, 73.3, 21.8, 63.4, 64.6, 48.4, 48.8, 48.5)
men_weight <- c(67.8, 60, 63.4, 76, 89.4, 73.3, 67.3, 61.3, 62.4)
# Create a data frame
my_data <- data.frame(
group = rep(c("Woman", "Man"), each = 9),
weight = c(women_weight, men_weight)
)
print(my_data)
## group weight
## 1 Woman 38.9
## 2 Woman 61.2
## 3 Woman 73.3
## 4 Woman 21.8
## 5 Woman 63.4
## 6 Woman 64.6
## 7 Woman 48.4
## 8 Woman 48.8
## 9 Woman 48.5
## 10 Man 67.8
## 11 Man 60.0
## 12 Man 63.4
## 13 Man 76.0
## 14 Man 89.4
## 15 Man 73.3
## 16 Man 67.3
## 17 Man 61.3
## 18 Man 62.4
library(dplyr)
group_by(my_data, group) %>%
summarise(
count = n(),
median = median(weight, na.rm = TRUE),
IQR = IQR(weight, na.rm = TRUE)
)
## # A tibble: 2 × 4
## group count median IQR
## <chr> <int> <dbl> <dbl>
## 1 Man 9 67.3 10.9
## 2 Woman 9 48.8 15
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight",
color = "group", palette = c("#00AFBB", "#E7B800"),
ylab = "Weight", xlab = "Groups")
res <- wilcox.test(women_weight, men_weight)
## Warning in wilcox.test.default(women_weight, men_weight): cannot compute exact
## p-value with ties
res
##
## Wilcoxon rank sum test with continuity correction
##
## data: women_weight and men_weight
## W = 15, p-value = 0.02712
## alternative hypothesis: true location shift is not equal to 0
res <- wilcox.test(weight ~ group, data = my_data,
exact = FALSE)
res
##
## Wilcoxon rank sum test with continuity correction
##
## data: weight by group
## W = 66, p-value = 0.02712
## alternative hypothesis: true location shift is not equal to 0
# Print the p-value only
res$p.value
## [1] 0.02711657
##The corresponding alternative hypotheses (Ha) are as follow:
# t.test(x, y, paired = TRUE, alternative = "two.sided")
# If .txt tab file, use this
# my_data <- read.delim(file.choose())
# Or, if .csv file, use this
# my_data <- read.csv(file.choose())
# Data in two numeric vectors
# ++++++++++++++++++++++++++
# Weight of the mice before treatment
before <-c(200.1, 190.9, 192.7, 213, 241.4, 196.9, 172.2, 185.5, 205.2, 193.7)
# Weight of the mice after treatment
after <-c(392.9, 393.2, 345.1, 393, 434, 427.9, 422, 383.9, 392.3, 352.2)
# Create a data frame
my_data <- data.frame(
group = rep(c("before", "after"), each = 10),
weight = c(before, after)
)
# Print all data
print(my_data)
## group weight
## 1 before 200.1
## 2 before 190.9
## 3 before 192.7
## 4 before 213.0
## 5 before 241.4
## 6 before 196.9
## 7 before 172.2
## 8 before 185.5
## 9 before 205.2
## 10 before 193.7
## 11 after 392.9
## 12 after 393.2
## 13 after 345.1
## 14 after 393.0
## 15 after 434.0
## 16 after 427.9
## 17 after 422.0
## 18 after 383.9
## 19 after 392.3
## 20 after 352.2
library("dplyr")
group_by(my_data, group) %>%
summarise(
count = n(),
mean = mean(weight, na.rm = TRUE),
sd = sd(weight, na.rm = TRUE)
)
## # A tibble: 2 × 4
## group count mean sd
## <chr> <int> <dbl> <dbl>
## 1 after 10 394. 29.4
## 2 before 10 199. 18.5
# Visualize your data:
# Plot weight by group and color by group
library("ggpubr")
ggboxplot(my_data, x = "group", y = "weight",
color = "group", palette = c("#00AFBB", "#E7B800"),
order = c("before", "after"),
ylab = "Weight", xlab = "Groups")
# Subset weight data before treatment
before <- subset(my_data, group == "before", weight,
drop = TRUE)
# subset weight data after treatment
after <- subset(my_data, group == "after", weight,
drop = TRUE)
library(PairedData)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: gld
## Loading required package: mvtnorm
## Loading required package: lattice
##
## Attaching package: 'PairedData'
## The following object is masked from 'package:base':
##
## summary
pd <- paired(before, after)
plot(pd, type = "profile") + theme_bw()
# compute the difference
d <- with(my_data,
weight[group == "before"] - weight[group == "after"])
# Shapiro-Wilk normality test for the differences
shapiro.test(d) # => p-value = 0.6141
##
## Shapiro-Wilk normality test
##
## data: d
## W = 0.94536, p-value = 0.6141
# Compute t-test
res <- t.test(before, after, paired = TRUE)
res
##
## Paired t-test
##
## data: before and after
## t = -20.883, df = 9, p-value = 6.2e-09
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -215.5581 -173.4219
## sample estimates:
## mean difference
## -194.49
# Compute t-test
res <- t.test(weight ~ group, data = my_data, paired = TRUE)
res
##
## Paired t-test
##
## data: weight by group
## t = 20.883, df = 9, p-value = 6.2e-09
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 173.4219 215.5581
## sample estimates:
## mean difference
## 194.49
# printing the p-value
res$p.value
## [1] 6.200298e-09
# printing the mean
res$estimate
## mean difference
## 194.49
# printing the confidence interval
res$conf.int
## [1] 173.4219 215.5581
## attr(,"conf.level")
## [1] 0.95