#confiednce level = 95% # alpha = 5% qt( 0.05/2 , df =30- 1 ,lower.tail =TRUE) #critical value

#acceptence region:(-2.045 to 2.045)

##Manual Calculation

set.seed (123)
sample_data <- rnorm(30, mean= 50, sd = 10)
mean(sample_data)
## [1] 49.52896
sd(sample_data)
## [1] 9.810307

Hypothesis:
\(H_0: \mu = \mu_0\)
\(H_1: \mu \neq \mu_0\)

#mu_0 = 55
SE <- sd(sample_data)/sqrt(length(sample_data))
t_cal <- (mean(sample_data) - 55)/SE
print(t_cal)
## [1] -3.054553
t_crit <- qt(0.05/2 , df = length(sample_data)-1, lower.tail = TRUE)
print (t_crit)
## [1] -2.04523
t_cal <= t_crit  #descision : reject null
## [1] TRUE
pt(t_cal, df = 29, lower.tail =TRUE) +pt(-t_cal, df = 29, lower.tail =TRUE)
## [1] 1

calculating confidence interval

mean(sample_data) + c(-1 ,1) * abs(t_crit) * SE
## [1] 45.86573 53.19219
mean(sample_data) - abs(t_crit) * SE
## [1] 45.86573
mean(sample_data) + abs(t_crit) * SE
## [1] 53.19219
t.test(sample_data, mu = 55)
## 
##  One Sample t-test
## 
## data:  sample_data
## t = -3.0546, df = 29, p-value = 0.004797
## alternative hypothesis: true mean is not equal to 55
## 95 percent confidence interval:
##  45.86573 53.19219
## sample estimates:
## mean of x 
##  49.52896
t.test(sample_data, mu = 55, conf.level = 0.99)
## 
##  One Sample t-test
## 
## data:  sample_data
## t = -3.0546, df = 29, p-value = 0.004797
## alternative hypothesis: true mean is not equal to 55
## 99 percent confidence interval:
##  44.59198 54.46595
## sample estimates:
## mean of x 
##  49.52896
t.test(sample_data, mu = 55, conf.level = 0.90)
## 
##  One Sample t-test
## 
## data:  sample_data
## t = -3.0546, df = 29, p-value = 0.004797
## alternative hypothesis: true mean is not equal to 55
## 90 percent confidence interval:
##  46.48564 52.57228
## sample estimates:
## mean of x 
##  49.52896

#Using Function

Generate sample data

set.seed(123)
sample_data <- rnorm(30, mean= 50, sd = 10)

Testing normality test (shapiro-wilk)

shapiro.test(sample_data)
## 
##  Shapiro-Wilk normality test
## 
## data:  sample_data
## W = 0.97894, p-value = 0.7966

\(H_0:\) Data follows normal distribution.
\(H_1:\) Data does not follow normal distribution.

Since the p-value is lower than the of significance (\(\alpha\) = 0.05). We have enough evidence to accept the alternative hypothesis.

One-sample t-test Hypothesis:
\(H_0: \mu = \mu_0\)
\(H_1: \mu \neq \mu_0\)

Using function , perform the tow tailed test:

t.test(sample_data, mu= 50, conf.level=0.95)
## 
##  One Sample t-test
## 
## data:  sample_data
## t = -0.26299, df = 29, p-value = 0.7944
## alternative hypothesis: true mean is not equal to 50
## 95 percent confidence interval:
##  45.86573 53.19219
## sample estimates:
## mean of x 
##  49.52896

Hypothesis:
\(H_0: \mu <= 40\) 
\(H_1: \mu > 40\)

t.test(sample_data, mu= 40, alternative = "greater")
## 
##  One Sample t-test
## 
## data:  sample_data
## t = 5.3201, df = 29, p-value = 5.21e-06
## alternative hypothesis: true mean is greater than 40
## 95 percent confidence interval:
##  46.48564      Inf
## sample estimates:
## mean of x 
##  49.52896

Hypothesis:
\(H_0: \mu <= 58\) 
\(H_1: \mu > 58\)

t.test(sample_data, mu= 58, alternative = "less")
## 
##  One Sample t-test
## 
## data:  sample_data
## t = -4.7295, df = 29, p-value = 2.689e-05
## alternative hypothesis: true mean is less than 58
## 95 percent confidence interval:
##      -Inf 52.57228
## sample estimates:
## mean of x 
##  49.52896

#Visualization

library(ggplot2)

ggplot(data.frame(value = sample_data), aes (x = value)) + geom_histogram(
  aes(y = after_stat(density)),
  bins = 10 ,
  fill = "blue" ,
  alpha = 0.5
) + geom_density(color = "red" , linewidth = 1) + labs(title = "Sample Data Distribution" , x = "Value", y = "Density") +
  theme_minimal()

#Two- sample t-test

Use built-in dataset: mtcars (comparing mpg for automatic vs manual cars):

data(mtcars)

split into two groups based on transmission type:

auto_mpg <- mtcars$mpg[mtcars$am == 0] #Automatic
manual_mpg <- mtcars$mpg[mtcars$am== 1] #Manual

\(\H_0\): Automatic cars and manual cars have equal average mpg.
\(\H_1\): Automatic cars and manual cars have unequal average mpg.

mean(auto_mpg)
## [1] 17.14737
mean(manual_mpg)
## [1] 24.39231
var(auto_mpg)
## [1] 14.6993
var(manual_mpg)
## [1] 38.02577

Normality test for both groups:

shapiro.test(auto_mpg)
## 
##  Shapiro-Wilk normality test
## 
## data:  auto_mpg
## W = 0.97677, p-value = 0.8987
shapiro.test (manual_mpg)
## 
##  Shapiro-Wilk normality test
## 
## data:  manual_mpg
## W = 0.9458, p-value = 0.5363

Check variance homogeneity (Levene’s test):

library(car)
## Loading required package: carData
leveneTest(mpg~ factor(am), data =mtcars, center = "mean")
## Levene's Test for Homogeneity of Variance (center = "mean")
##       Df F value  Pr(>F)  
## group  1   5.921 0.02113 *
##       30                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Perform two-sample t-test:

t.test(auto_mpg, manual_mpg, var.equal = TRUE)
## 
##  Two Sample t-test
## 
## data:  auto_mpg and manual_mpg
## t = -4.1061, df = 30, p-value = 0.000285
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -10.84837  -3.64151
## sample estimates:
## mean of x mean of y 
##  17.14737  24.39231

Visualize the data

ggplot(mtcars , aes (
  x = factor (am),
  y = mpg ,
  fill = factor(am)
)) + geom_boxplot(alpha = 0.6) + geom_jitter(width = 0.2, alpha = 0.7) + labs (title = "MPG Comparison: Automatic vs Manual" , x = "Transmission (0= Auto, 1 = Manual)" , y = "Miles Per Gallon") + scale_fill_manual(values = c("blue", "red"),labels = c("Automatic", "Manual")) + 
theme_minimal()

#Paired sample test

Generate 30 Observations

set.seed(123)
before <- round(rnorm(30, mean = 300 , sd = 10), 0)
summary(before)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   280.0   293.2   299.5   299.6   305.0   318.0
after <- before + round(rnorm(30, mean = 5 , sd = 5), 0) #simulating a increase
summary(after)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   283.0   298.2   305.0   305.5   312.2   325.0
mean(after)- mean(before)
## [1] 5.933333

\(H_0\): Before and after the course the true GRE average score of the students stays the same.
\(H_1\): Before and after the course the true GRE average score of the students does not remain the same.

df <- data.frame(
  ID = 1:30,
  Before = before,
  After = after
)
print(df)
##    ID Before After
## 1   1    294   301
## 2   2    298   302
## 3   3    316   325
## 4   4    301   310
## 5   5    301   310
## 6   6    317   325
## 7   7    305   313
## 8   8    287   292
## 9   9    293   296
## 10 10    296   299
## 11 11    312   314
## 12 12    304   308
## 13 13    304   303
## 14 14    301   317
## 15 15    294   305
## 16 16    318   317
## 17 17    305   308
## 18 18    280   283
## 19 19    307   316
## 20 20    295   300
## 21 21    289   295
## 22 22    298   303
## 23 23    290   295
## 24 24    293   305
## 25 25    294   298
## 26 26    283   296
## 27 27    308   305
## 28 28    302   310
## 29 29    289   295
## 30 30    313   319

Perform Paired t-test

t.test (x= after , y = before ,paired = TRUE, alternative = "two.sided")
## 
##  Paired t-test
## 
## data:  after and before
## t = 7.7811, df = 29, p-value = 1.398e-08
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  4.373779 7.492888
## sample estimates:
## mean difference 
##        5.933333

Visualization

library(reshape2)
library(ggpubr)

df <- data.frame(ID = 1:30,
                 Before = before,
                 After = after)
df_long <- melt(df, id.vars = "ID")
ggplot(df_long, aes(x = variable, y = value, group = ID)) +
  geom_point(aes(color = variable), size = 3) +
  geom_line(alpha = 0.5) +
  labs(title = "Paired Samples (Before vs After)", y = "Values", x = "Condition") +
  theme_minimal()

ggpaired(
  df_long ,
  x = "variable" ,
  y = "value" ,
  color = "variable" ,
  line.color = "gray" ,
  line.size = 0.4 ,
  palette = "jco"
) + stat_compare_means(paired = TRUE , method = "t.test")