#confiednce level = 95% # alpha = 5% qt( 0.05/2 , df =30- 1 ,lower.tail =TRUE) #critical value
#acceptence region:(-2.045 to 2.045)
##Manual Calculation
set.seed (123)
sample_data <- rnorm(30, mean= 50, sd = 10)
mean(sample_data)
## [1] 49.52896
sd(sample_data)
## [1] 9.810307
Hypothesis:
\(H_0: \mu = \mu_0\)
\(H_1: \mu \neq \mu_0\)
#mu_0 = 55
SE <- sd(sample_data)/sqrt(length(sample_data))
t_cal <- (mean(sample_data) - 55)/SE
print(t_cal)
## [1] -3.054553
t_crit <- qt(0.05/2 , df = length(sample_data)-1, lower.tail = TRUE)
print (t_crit)
## [1] -2.04523
t_cal <= t_crit #descision : reject null
## [1] TRUE
pt(t_cal, df = 29, lower.tail =TRUE) +pt(-t_cal, df = 29, lower.tail =TRUE)
## [1] 1
calculating confidence interval
mean(sample_data) + c(-1 ,1) * abs(t_crit) * SE
## [1] 45.86573 53.19219
mean(sample_data) - abs(t_crit) * SE
## [1] 45.86573
mean(sample_data) + abs(t_crit) * SE
## [1] 53.19219
t.test(sample_data, mu = 55)
##
## One Sample t-test
##
## data: sample_data
## t = -3.0546, df = 29, p-value = 0.004797
## alternative hypothesis: true mean is not equal to 55
## 95 percent confidence interval:
## 45.86573 53.19219
## sample estimates:
## mean of x
## 49.52896
t.test(sample_data, mu = 55, conf.level = 0.99)
##
## One Sample t-test
##
## data: sample_data
## t = -3.0546, df = 29, p-value = 0.004797
## alternative hypothesis: true mean is not equal to 55
## 99 percent confidence interval:
## 44.59198 54.46595
## sample estimates:
## mean of x
## 49.52896
t.test(sample_data, mu = 55, conf.level = 0.90)
##
## One Sample t-test
##
## data: sample_data
## t = -3.0546, df = 29, p-value = 0.004797
## alternative hypothesis: true mean is not equal to 55
## 90 percent confidence interval:
## 46.48564 52.57228
## sample estimates:
## mean of x
## 49.52896
#Using Function
Generate sample data
set.seed(123)
sample_data <- rnorm(30, mean= 50, sd = 10)
Testing normality test (shapiro-wilk)
shapiro.test(sample_data)
##
## Shapiro-Wilk normality test
##
## data: sample_data
## W = 0.97894, p-value = 0.7966
\(H_0:\) Data follows normal
distribution.
\(H_1:\) Data does not follow normal
distribution.
Since the p-value is lower than the of significance (\(\alpha\) = 0.05). We have enough evidence to accept the alternative hypothesis.
One-sample t-test Hypothesis:
\(H_0: \mu = \mu_0\)
\(H_1: \mu \neq \mu_0\)
Using function , perform the tow tailed test:
t.test(sample_data, mu= 50, conf.level=0.95)
##
## One Sample t-test
##
## data: sample_data
## t = -0.26299, df = 29, p-value = 0.7944
## alternative hypothesis: true mean is not equal to 50
## 95 percent confidence interval:
## 45.86573 53.19219
## sample estimates:
## mean of x
## 49.52896
Hypothesis:
\(H_0: \mu <= 40\)Â
\(H_1: \mu > 40\)
t.test(sample_data, mu= 40, alternative = "greater")
##
## One Sample t-test
##
## data: sample_data
## t = 5.3201, df = 29, p-value = 5.21e-06
## alternative hypothesis: true mean is greater than 40
## 95 percent confidence interval:
## 46.48564 Inf
## sample estimates:
## mean of x
## 49.52896
Hypothesis:
\(H_0: \mu <= 58\)Â
\(H_1: \mu > 58\)
t.test(sample_data, mu= 58, alternative = "less")
##
## One Sample t-test
##
## data: sample_data
## t = -4.7295, df = 29, p-value = 2.689e-05
## alternative hypothesis: true mean is less than 58
## 95 percent confidence interval:
## -Inf 52.57228
## sample estimates:
## mean of x
## 49.52896
#Visualization
library(ggplot2)
ggplot(data.frame(value = sample_data), aes (x = value)) + geom_histogram(
aes(y = after_stat(density)),
bins = 10 ,
fill = "blue" ,
alpha = 0.5
) + geom_density(color = "red" , linewidth = 1) + labs(title = "Sample Data Distribution" , x = "Value", y = "Density") +
theme_minimal()
#Two- sample t-test
Use built-in dataset: mtcars (comparing mpg for automatic vs manual cars):
data(mtcars)
split into two groups based on transmission type:
auto_mpg <- mtcars$mpg[mtcars$am == 0] #Automatic
manual_mpg <- mtcars$mpg[mtcars$am== 1] #Manual
\(\H_0\): Automatic cars and manual
cars have equal average mpg.
\(\H_1\): Automatic cars and manual
cars have unequal average mpg.
mean(auto_mpg)
## [1] 17.14737
mean(manual_mpg)
## [1] 24.39231
var(auto_mpg)
## [1] 14.6993
var(manual_mpg)
## [1] 38.02577
Normality test for both groups:
shapiro.test(auto_mpg)
##
## Shapiro-Wilk normality test
##
## data: auto_mpg
## W = 0.97677, p-value = 0.8987
shapiro.test (manual_mpg)
##
## Shapiro-Wilk normality test
##
## data: manual_mpg
## W = 0.9458, p-value = 0.5363
Check variance homogeneity (Levene’s test):
library(car)
## Loading required package: carData
leveneTest(mpg~ factor(am), data =mtcars, center = "mean")
## Levene's Test for Homogeneity of Variance (center = "mean")
## Df F value Pr(>F)
## group 1 5.921 0.02113 *
## 30
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Perform two-sample t-test:
t.test(auto_mpg, manual_mpg, var.equal = TRUE)
##
## Two Sample t-test
##
## data: auto_mpg and manual_mpg
## t = -4.1061, df = 30, p-value = 0.000285
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -10.84837 -3.64151
## sample estimates:
## mean of x mean of y
## 17.14737 24.39231
Visualize the data
ggplot(mtcars , aes (
x = factor (am),
y = mpg ,
fill = factor(am)
)) + geom_boxplot(alpha = 0.6) + geom_jitter(width = 0.2, alpha = 0.7) + labs (title = "MPG Comparison: Automatic vs Manual" , x = "Transmission (0= Auto, 1 = Manual)" , y = "Miles Per Gallon") + scale_fill_manual(values = c("blue", "red"),labels = c("Automatic", "Manual")) +
theme_minimal()
#Paired sample test
Generate 30 Observations
set.seed(123)
before <- round(rnorm(30, mean = 300 , sd = 10), 0)
summary(before)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 280.0 293.2 299.5 299.6 305.0 318.0
after <- before + round(rnorm(30, mean = 5 , sd = 5), 0) #simulating a increase
summary(after)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 283.0 298.2 305.0 305.5 312.2 325.0
mean(after)- mean(before)
## [1] 5.933333
\(H_0\): Before and after the course
the true GRE average score of the students stays the same.
\(H_1\): Before and after the course
the true GRE average score of the students does not remain the same.
df <- data.frame(
ID = 1:30,
Before = before,
After = after
)
print(df)
## ID Before After
## 1 1 294 301
## 2 2 298 302
## 3 3 316 325
## 4 4 301 310
## 5 5 301 310
## 6 6 317 325
## 7 7 305 313
## 8 8 287 292
## 9 9 293 296
## 10 10 296 299
## 11 11 312 314
## 12 12 304 308
## 13 13 304 303
## 14 14 301 317
## 15 15 294 305
## 16 16 318 317
## 17 17 305 308
## 18 18 280 283
## 19 19 307 316
## 20 20 295 300
## 21 21 289 295
## 22 22 298 303
## 23 23 290 295
## 24 24 293 305
## 25 25 294 298
## 26 26 283 296
## 27 27 308 305
## 28 28 302 310
## 29 29 289 295
## 30 30 313 319
Perform Paired t-test
t.test (x= after , y = before ,paired = TRUE, alternative = "two.sided")
##
## Paired t-test
##
## data: after and before
## t = 7.7811, df = 29, p-value = 1.398e-08
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## 4.373779 7.492888
## sample estimates:
## mean difference
## 5.933333
Visualization
library(reshape2)
library(ggpubr)
df <- data.frame(ID = 1:30,
Before = before,
After = after)
df_long <- melt(df, id.vars = "ID")
ggplot(df_long, aes(x = variable, y = value, group = ID)) +
geom_point(aes(color = variable), size = 3) +
geom_line(alpha = 0.5) +
labs(title = "Paired Samples (Before vs After)", y = "Values", x = "Condition") +
theme_minimal()
ggpaired(
df_long ,
x = "variable" ,
y = "value" ,
color = "variable" ,
line.color = "gray" ,
line.size = 0.4 ,
palette = "jco"
) + stat_compare_means(paired = TRUE , method = "t.test")