SESSION 5 HYPOTHESIS TESTING

ONE SAMPLE T- test

sample_data = c(22, 24, 26, 28, 30, 32, 34, 36)
known_mean = 30

Perform 1 sample t-test

result = t.test (sample_data, mu = known_mean)

# print results
print(result)

    One Sample t-test

data:  sample_data
t = -0.57735, df = 7, p-value = 0.5818
alternative hypothesis: true mean is not equal to 30
95 percent confidence interval:
 24.90435 33.09565
sample estimates:
mean of x 
       29 

#—————————————————————————– # Two sample t-test #——————————————————————————

group1 = c(22, 24, 26, 28, 30)
group2 = c(32, 34, 36, 38, 40)

# Perform 2 sample t-test
result = t.test(group1, group2)
print(result)

    Welch Two Sample t-test

data:  group1 and group2
t = -5, df = 8, p-value = 0.001053
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -14.612008  -5.387992
sample estimates:
mean of x mean of y 
       26        36 

#—————————————————————————- # Paired t-test #—————————————————————————-

# Example paired t-test
before = c(21, 24, 26, 28, 30)
after = c(24, 26, 27, 30, 32)

# Perform paired t-test
result = t.test (before, after, paired = TRUE)
print(result)

    Paired t-test

data:  before and after
t = -6.3246, df = 4, p-value = 0.003198
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
 -2.877989 -1.122011
sample estimates:
mean difference 
             -2 

Chi square test for indipendence

data = matrix(c(20, 10, 15, 25), nrow = 2, byrow = TRUE)
rownames(data) = c("group1", "group2")
colnames(data) = c("categoryA", "categoryB")
print(data)
       categoryA categoryB
group1        20        10
group2        15        25
result = chisq.test(data)
print(result)

    Pearson's Chi-squared test with Yates' continuity correction

data:  data
X-squared = 4.725, df = 1, p-value = 0.02973

#—————————————————————————– # Hands on Excersice # Install and load dplyr, graphics and MASS

#Load the library
library(dplyr)
library(tidyverse)
library(MASS)
library(rio)

#————————————————————————— # Set a working directory #————————————————————————–

setwd("~/R- HYPOTHESIS TEST REG ANOVA")

Import a data set

gss = import("GSSsubset.csv")
View(gss)
head(gss)
tail(gss)

Perform one-sample t-test on age

age_result = t.test(gss$age, mu = 30)
print(age_result)

    One Sample t-test

data:  gss$age
t = 34.799, df = 993, p-value < 2.2e-16
alternative hypothesis: true mean is not equal to 30
95 percent confidence interval:
 43.67189 45.30598
sample estimates:
mean of x 
 44.48893 

#Perform one sample t-test on income

income_result = t.test(gss$income, mu = 25000)
print(income_result)

    One Sample t-test

data:  gss$income
t = 10.8, df = 993, p-value < 2.2e-16
alternative hypothesis: true mean is not equal to 25000
95 percent confidence interval:
 34727.28 39047.16
sample estimates:
mean of x 
 36887.22 

Two sample t-test

# perform two sample t-test on age by gender
group1 = gss$age[gss$sex == "MALE"]
group2 = gss$age[gss$sex== "FEMALE"]

result = t.test(group1, group2)
print(result)

    Welch Two Sample t-test

data:  group1 and group2
t = 0.95244, df = 989.14, p-value = 0.3411
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
 -0.8414227  2.4284689
sample estimates:
mean of x mean of y 
 44.87771  44.08419 

#—————————————————————————- # chisquare test for independence #————————————————————————-

# Create a contingency table of gender and marital status
contingency_table = table(gss$sex, gss$marital)

print(contingency_table)
        
         DIVORCED MARRIED NEVER MARRIED SEPARATED WIDOWED
  FEMALE       85     220           140        19      23
  MALE         87     260           129        18      13
# Perform chi-square on contingency table
result = chisq.test(contingency_table)
print(result)

    Pearson's Chi-squared test

data:  contingency_table
X-squared = 6.2113, df = 4, p-value = 0.1839
# Create contingency on gender and degree
contingency_table1 = table(gss$degree, gss$marital)
print(contingency_table1)
                
                 DIVORCED MARRIED NEVER MARRIED SEPARATED WIDOWED
  BACHELOR             28     122            52         1       6
  GRADUATE             19      75            25         5       1
  HIGH SCHOOL          93     203           151        20      24
  JUNIOR COLLEGE       19      41            26         5       1
  LT HIGH SCHOOL       13      39            15         6       4
# perform chi-square
result = chisq.test(contingency_table1)
G2;H2;Warningh in chisq.test(contingency_table1) :
  Chi-squared approximation may be incorrectg
print(result)

    Pearson's Chi-squared test

data:  contingency_table1
X-squared = 41.217, df = 16, p-value = 0.0005158

#—————————————————————————— # Check expected frequencies # if any expected frequency is less than five you may need to combine categories or use fishers test

# Check expected frequencies
print(result$expected)
                
                 DIVORCED   MARRIED NEVER MARRIED SEPARATED   WIDOWED
  BACHELOR       36.16499 100.92555      56.56036  7.779678  7.569416
  GRADUATE       21.62978  60.36217      33.82797  4.652918  4.527163
  HIGH SCHOOL    84.96177 237.10262     132.87626 18.276660 17.782696
  JUNIOR COLLEGE 15.91952  44.42656      24.89738  3.424547  3.331992
  LT HIGH SCHOOL 13.32394  37.18310      20.83803  2.866197  2.788732
# Perform Fishers exact test
#fisher_test_result = fisher.test(contingency_table1)

#print(fisher_test_result)
contingency_table = table(gss$sex, gss$marital)

print(contingency_table)
        
         DIVORCED MARRIED NEVER MARRIED SEPARATED WIDOWED
  FEMALE       85     220           140        19      23
  MALE         87     260           129        18      13
result = chisq.test(contingency_table)
print(result)

    Pearson's Chi-squared test

data:  contingency_table
X-squared = 6.2113, df = 4, p-value = 0.1839
#Check expected frequencies
print(result$expected)
        
         DIVORCED MARRIED NEVER MARRIED SEPARATED  WIDOWED
  FEMALE 84.26962 235.171      131.7938  18.12777 17.63783
  MALE   87.73038 244.829      137.2062  18.87223 18.36217
# Perform Fishers exact test
fisher_test_result = fisher.test(contingency_table)

print(fisher_test_result)

    Fisher's Exact Test for Count Data

data:  contingency_table
p-value = 0.184
alternative hypothesis: two.sided

#—————————————————————————-

#HOMEWORK

data("mtcars")
View(mtcars)
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQojIFNFU1NJT04gNSBIWVBPVEhFU0lTIFRFU1RJTkcgDQojIE9ORSBTQU1QTEUgVC0gdGVzdA0KDQpgYGB7cn0NCnNhbXBsZV9kYXRhID0gYygyMiwgMjQsIDI2LCAyOCwgMzAsIDMyLCAzNCwgMzYpDQprbm93bl9tZWFuID0gMzANCg0KYGBgDQoNCg0KIyBQZXJmb3JtIDEgc2FtcGxlIHQtdGVzdA0KDQpgYGB7cn0NCnJlc3VsdCA9IHQudGVzdCAoc2FtcGxlX2RhdGEsIG11ID0ga25vd25fbWVhbikNCg0KIyBwcmludCByZXN1bHRzDQpwcmludChyZXN1bHQpDQpgYGANCg0KIy0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tDQojIFR3byBzYW1wbGUgdC10ZXN0DQojLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tDQoNCmBgYHtyfQ0KZ3JvdXAxID0gYygyMiwgMjQsIDI2LCAyOCwgMzApDQpncm91cDIgPSBjKDMyLCAzNCwgMzYsIDM4LCA0MCkNCg0KIyBQZXJmb3JtIDIgc2FtcGxlIHQtdGVzdA0KcmVzdWx0ID0gdC50ZXN0KGdyb3VwMSwgZ3JvdXAyKQ0KcHJpbnQocmVzdWx0KQ0KYGBgDQoNCiMtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tDQojIFBhaXJlZCB0LXRlc3QNCiMtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tDQoNCmBgYHtyfQ0KIyBFeGFtcGxlIHBhaXJlZCB0LXRlc3QNCmJlZm9yZSA9IGMoMjEsIDI0LCAyNiwgMjgsIDMwKQ0KYWZ0ZXIgPSBjKDI0LCAyNiwgMjcsIDMwLCAzMikNCg0KIyBQZXJmb3JtIHBhaXJlZCB0LXRlc3QNCnJlc3VsdCA9IHQudGVzdCAoYmVmb3JlLCBhZnRlciwgcGFpcmVkID0gVFJVRSkNCnByaW50KHJlc3VsdCkNCmBgYA0KDQojIENoaSBzcXVhcmUgdGVzdCBmb3IgaW5kaXBlbmRlbmNlDQpgYGB7cn0NCmRhdGEgPSBtYXRyaXgoYygyMCwgMTAsIDE1LCAyNSksIG5yb3cgPSAyLCBieXJvdyA9IFRSVUUpDQpyb3duYW1lcyhkYXRhKSA9IGMoImdyb3VwMSIsICJncm91cDIiKQ0KY29sbmFtZXMoZGF0YSkgPSBjKCJjYXRlZ29yeUEiLCAiY2F0ZWdvcnlCIikNCnByaW50KGRhdGEpDQpgYGANCg0KDQpgYGB7cn0NCnJlc3VsdCA9IGNoaXNxLnRlc3QoZGF0YSkNCnByaW50KHJlc3VsdCkNCmBgYA0KDQojLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0NCiMgSGFuZHMgb24gRXhjZXJzaWNlDQojIEluc3RhbGwgYW5kIGxvYWQgZHBseXIsIGdyYXBoaWNzIGFuZCBNQVNTDQoNCmBgYHtyfQ0KI0xvYWQgdGhlIGxpYnJhcnkNCmxpYnJhcnkoZHBseXIpDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCmxpYnJhcnkoTUFTUykNCmxpYnJhcnkocmlvKQ0KYGBgDQoNCiMtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0NCiMgU2V0IGEgd29ya2luZyBkaXJlY3RvcnkNCiMtLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQ0KYGBge3J9DQpzZXR3ZCgifi9SLSBIWVBPVEhFU0lTIFRFU1QgUkVHIEFOT1ZBIikNCmBgYA0KDQojIEltcG9ydCBhIGRhdGEgc2V0DQpgYGB7cn0NCmdzcyA9IGltcG9ydCgiR1NTc3Vic2V0LmNzdiIpDQpWaWV3KGdzcykNCmhlYWQoZ3NzKQ0KdGFpbChnc3MpDQpgYGANCg0KIyBQZXJmb3JtIG9uZS1zYW1wbGUgdC10ZXN0IG9uIGFnZQ0KYGBge3J9DQphZ2VfcmVzdWx0ID0gdC50ZXN0KGdzcyRhZ2UsIG11ID0gMzApDQpwcmludChhZ2VfcmVzdWx0KQ0KDQpgYGANCiNQZXJmb3JtIG9uZSBzYW1wbGUgdC10ZXN0IG9uIGluY29tZQ0KYGBge3J9DQppbmNvbWVfcmVzdWx0ID0gdC50ZXN0KGdzcyRpbmNvbWUsIG11ID0gMjUwMDApDQpwcmludChpbmNvbWVfcmVzdWx0KQ0KYGBgDQoNCiMgVHdvIHNhbXBsZSB0LXRlc3QNCmBgYHtyfQ0KIyBwZXJmb3JtIHR3byBzYW1wbGUgdC10ZXN0IG9uIGFnZSBieSBnZW5kZXINCmdyb3VwMSA9IGdzcyRhZ2VbZ3NzJHNleCA9PSAiTUFMRSJdDQpncm91cDIgPSBnc3MkYWdlW2dzcyRzZXg9PSAiRkVNQUxFIl0NCg0KcmVzdWx0ID0gdC50ZXN0KGdyb3VwMSwgZ3JvdXAyKQ0KcHJpbnQocmVzdWx0KQ0KDQpgYGANCg0KIy0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0NCiMgY2hpc3F1YXJlIHRlc3QgZm9yIGluZGVwZW5kZW5jZQ0KIy0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0NCg0KYGBge3J9DQojIENyZWF0ZSBhIGNvbnRpbmdlbmN5IHRhYmxlIG9mIGdlbmRlciBhbmQgbWFyaXRhbCBzdGF0dXMNCmNvbnRpbmdlbmN5X3RhYmxlID0gdGFibGUoZ3NzJHNleCwgZ3NzJG1hcml0YWwpDQoNCnByaW50KGNvbnRpbmdlbmN5X3RhYmxlKQ0KYGBgDQoNCg0KYGBge3J9DQojIFBlcmZvcm0gY2hpLXNxdWFyZSBvbiBjb250aW5nZW5jeSB0YWJsZQ0KcmVzdWx0ID0gY2hpc3EudGVzdChjb250aW5nZW5jeV90YWJsZSkNCnByaW50KHJlc3VsdCkNCmBgYA0KDQoNCmBgYHtyfQ0KIyBDcmVhdGUgY29udGluZ2VuY3kgb24gZ2VuZGVyIGFuZCBkZWdyZWUNCmNvbnRpbmdlbmN5X3RhYmxlMSA9IHRhYmxlKGdzcyRkZWdyZWUsIGdzcyRtYXJpdGFsKQ0KcHJpbnQoY29udGluZ2VuY3lfdGFibGUxKQ0KYGBgDQoNCg0KYGBge3J9DQojIHBlcmZvcm0gY2hpLXNxdWFyZQ0KcmVzdWx0ID0gY2hpc3EudGVzdChjb250aW5nZW5jeV90YWJsZTEpDQpwcmludChyZXN1bHQpDQpgYGANCg0KIy0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLQ0KIyBDaGVjayBleHBlY3RlZCBmcmVxdWVuY2llcw0KIyBpZiBhbnkgZXhwZWN0ZWQgZnJlcXVlbmN5IGlzIGxlc3MgdGhhbiBmaXZlIHlvdSBtYXkgbmVlZCB0byBjb21iaW5lIGNhdGVnb3JpZXMgb3IgdXNlIGZpc2hlcnMgdGVzdA0KDQpgYGB7cn0NCiMgQ2hlY2sgZXhwZWN0ZWQgZnJlcXVlbmNpZXMNCnByaW50KHJlc3VsdCRleHBlY3RlZCkNCmBgYA0KDQpgYGB7cn0NCiMgUGVyZm9ybSBGaXNoZXJzIGV4YWN0IHRlc3QNCiNmaXNoZXJfdGVzdF9yZXN1bHQgPSBmaXNoZXIudGVzdChjb250aW5nZW5jeV90YWJsZTEpDQoNCiNwcmludChmaXNoZXJfdGVzdF9yZXN1bHQpDQoNCmBgYA0KDQoNCmBgYHtyfQ0KY29udGluZ2VuY3lfdGFibGUgPSB0YWJsZShnc3Mkc2V4LCBnc3MkbWFyaXRhbCkNCg0KcHJpbnQoY29udGluZ2VuY3lfdGFibGUpDQoNCnJlc3VsdCA9IGNoaXNxLnRlc3QoY29udGluZ2VuY3lfdGFibGUpDQpwcmludChyZXN1bHQpDQpgYGANCg0KDQpgYGB7cn0NCiNDaGVjayBleHBlY3RlZCBmcmVxdWVuY2llcw0KcHJpbnQocmVzdWx0JGV4cGVjdGVkKQ0KDQojIFBlcmZvcm0gRmlzaGVycyBleGFjdCB0ZXN0DQpmaXNoZXJfdGVzdF9yZXN1bHQgPSBmaXNoZXIudGVzdChjb250aW5nZW5jeV90YWJsZSkNCg0KcHJpbnQoZmlzaGVyX3Rlc3RfcmVzdWx0KQ0KYGBgDQoNCg0KIy0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0tLS0NCg0KI0hPTUVXT1JLDQpgYGB7cn0NCmRhdGEoIm10Y2FycyIpDQpWaWV3KG10Y2FycykNCmBgYA0KDQoNCg0KDQoNCmBgYHtyfQ0KDQpgYGANCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg0KDQoNCg==