Hypothesis Testing (2-Sample t test)

Class Example

### Read the Excel File ###
library(readxl)
# height_data <- read_excel(file.choose())
height_data <- read_excel("C:\\Users\\samy_\\Desktop\\R_Python_Machine Learning DataSets\\height_data.xlsx") ## Select height_data
#colnames(height_data)
#View(height_data)
attach(height_data)

######### Step 1: Normality Test ##############
# Shapito-Wilk Normality Test
shapiro.test(school1)

## 
##  Shapiro-Wilk normality test
## 
## data:  school1
## W = 0.99164, p-value = 0.7946

qqnorm(school1)

# p-value = 0.7946 > 0.05 so p high null fly => It follows normal distribution

## Repeat the test for school2 
shapiro.test(school2)

## 
##  Shapiro-Wilk normality test
## 
## data:  school2
## W = 0.99091, p-value = 0.7375

qqnorm(school2)

# p-value = 0.7375 > 0.05 so p high null fuly => It follows normal distribution

######### Step 2: Variance Test ###############
var.test(school1, school2) # Variance Test

## 
##  F test to compare two variances
## 
## data:  school1 and school2
## F = 1.0828, num df = 99, denom df = 99, p-value = 0.6932
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.7285324 1.6092486
## sample estimates:
## ratio of variances 
##            1.08277

# p-value = 0.6932 > 0.05 so p high null fly => Equal Variances

######## Step 3: 2 sample t-test #############
t.test(school1, school2, alternative = "two.sided", var.equal = T)

## 
##  Two Sample t-test
## 
## data:  school1 and school2
## t = -6.5876, df = 198, p-value = 3.942e-10
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -32.91169 -17.74694
## sample estimates:
## mean of x mean of y 
##  156.0444  181.3737

# p-value = 3.942e-10 < 0.05 so p low null go => Accept Alternative Hypothesis (means are not equal)

## Repeat with new Hypothesis : Null Hyp: mean 1 <= mean 2
## Alternative Hyp: mean 1 > mean 2
t.test(school1, school2, alternative = "greater", var.equal = T)

## 
##  Two Sample t-test
## 
## data:  school1 and school2
## t = -6.5876, df = 198, p-value = 1
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -31.68348       Inf
## sample estimates:
## mean of x mean of y 
##  156.0444  181.3737

# p-value = 1 > 0.05 so p high null fly => Accept Null (Mean 1 less than Mean 2)