Class Example
### Read the Excel File ###
library(readxl)
# height_data <- read_excel(file.choose())
height_data <- read_excel("C:\\Users\\samy_\\Desktop\\R_Python_Machine Learning DataSets\\height_data.xlsx") ## Select height_data
#colnames(height_data)
#View(height_data)
attach(height_data)
######### Step 1: Normality Test ##############
# Shapito-Wilk Normality Test
shapiro.test(school1)
##
## Shapiro-Wilk normality test
##
## data: school1
## W = 0.99164, p-value = 0.7946
qqnorm(school1)

# p-value = 0.7946 > 0.05 so p high null fly => It follows normal distribution
## Repeat the test for school2
shapiro.test(school2)
##
## Shapiro-Wilk normality test
##
## data: school2
## W = 0.99091, p-value = 0.7375
qqnorm(school2)

# p-value = 0.7375 > 0.05 so p high null fuly => It follows normal distribution
######### Step 2: Variance Test ###############
var.test(school1, school2) # Variance Test
##
## F test to compare two variances
##
## data: school1 and school2
## F = 1.0828, num df = 99, denom df = 99, p-value = 0.6932
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.7285324 1.6092486
## sample estimates:
## ratio of variances
## 1.08277
# p-value = 0.6932 > 0.05 so p high null fly => Equal Variances
######## Step 3: 2 sample t-test #############
t.test(school1, school2, alternative = "two.sided", var.equal = T)
##
## Two Sample t-test
##
## data: school1 and school2
## t = -6.5876, df = 198, p-value = 3.942e-10
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -32.91169 -17.74694
## sample estimates:
## mean of x mean of y
## 156.0444 181.3737
# p-value = 3.942e-10 < 0.05 so p low null go => Accept Alternative Hypothesis (means are not equal)
## Repeat with new Hypothesis : Null Hyp: mean 1 <= mean 2
## Alternative Hyp: mean 1 > mean 2
t.test(school1, school2, alternative = "greater", var.equal = T)
##
## Two Sample t-test
##
## data: school1 and school2
## t = -6.5876, df = 198, p-value = 1
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## -31.68348 Inf
## sample estimates:
## mean of x mean of y
## 156.0444 181.3737
# p-value = 1 > 0.05 so p high null fly => Accept Null (Mean 1 less than Mean 2)