# Paired t-test using weight_loss.csv data
# read the dataset
# weight = read.csv('weight_loss.csv')
# In this way we can read .csv files
# Let us read the original .xlsx file
library(readxl)
library(car)
## Loading required package: carData
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ dplyr::recode() masks car::recode()
## ✖ purrr::some() masks car::some()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(sjstats)
library(agricolae)
weight = read_excel('datasets.xlsx', sheet = 'z_t_test', range = 'C31:E61')
head(weight)
## # A tibble: 6 × 3
## sr. before after
## <dbl> <dbl> <dbl>
## 1 1 76.2 74.9
## 2 2 92.2 91.0
## 3 3 85.9 85.0
## 4 4 76.5 74.2
## 5 5 53.6 51.5
## 6 6 75.9 73.3
tail(weight)
## # A tibble: 6 × 3
## sr. before after
## <dbl> <dbl> <dbl>
## 1 25 58.1 59.2
## 2 26 70.4 70.4
## 3 27 77.1 76.7
## 4 28 68.2 67.7
## 5 29 68.9 69.1
## 6 30 53.9 53.0
dim(weight)
## [1] 30 3
str(weight)
## tibble [30 × 3] (S3: tbl_df/tbl/data.frame)
## $ sr. : num [1:30] 1 2 3 4 5 6 7 8 9 10 ...
## $ before: num [1:30] 76.2 92.2 85.9 76.5 53.6 ...
## $ after : num [1:30] 74.9 91 85 74.2 51.5 ...
names(weight)
## [1] "sr." "before" "after"
summary(weight)
## sr. before after
## Min. : 1.00 Min. :53.64 Min. :51.54
## 1st Qu.: 8.25 1st Qu.:64.81 1st Qu.:65.42
## Median :15.50 Median :74.70 Median :73.42
## Mean :15.50 Mean :72.32 Mean :71.58
## 3rd Qu.:22.75 3rd Qu.:77.61 3rd Qu.:76.67
## Max. :30.00 Max. :92.15 Max. :90.98
# Paired t-test
# 2 variables
before = weight$before
after = weight$after
t.test(before, after, paired = TRUE)
##
## Paired t-test
##
## data: before and after
## t = 3.7797, df = 29, p-value = 0.000725
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 0.3393565 1.1396511
## sample estimates:
## mean difference
## 0.7395038
# p < 0.05, insufficient evidence to support H0
# H0: weight before = weight after
# H0 rejected
# Conclusion: Diet has decreased their weights by 0.74 kgs which is statistically significant at 0.1 % (p-value = 0.0007) level of significance.
# Assumption
# Normality: boxplot, qqplot of differences
difference = weight$before - weight$after
boxplot(difference)

qqPlot(difference)

## [1] 15 16
# Data is normally distributed, so assumptions are met
# Equal variance usually not checked, becuase same individuals have two readings.