sample_data = c(22, 24, 26, 28, 30, 32, 34, 36)
known_mean = 30
result = t.test (sample_data, mu = known_mean)
# print results
print(result)
One Sample t-test
data: sample_data
t = -0.57735, df = 7, p-value = 0.5818
alternative hypothesis: true mean is not equal to 30
95 percent confidence interval:
24.90435 33.09565
sample estimates:
mean of x
29
#—————————————————————————– # Two sample t-test #——————————————————————————
group1 = c(22, 24, 26, 28, 30)
group2 = c(32, 34, 36, 38, 40)
# Perform 2 sample t-test
result = t.test(group1, group2)
print(result)
Welch Two Sample t-test
data: group1 and group2
t = -5, df = 8, p-value = 0.001053
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-14.612008 -5.387992
sample estimates:
mean of x mean of y
26 36
#—————————————————————————- # Paired t-test #—————————————————————————-
# Example paired t-test
before = c(21, 24, 26, 28, 30)
after = c(24, 26, 27, 30, 32)
# Perform paired t-test
result = t.test (before, after, paired = TRUE)
print(result)
Paired t-test
data: before and after
t = -6.3246, df = 4, p-value = 0.003198
alternative hypothesis: true mean difference is not equal to 0
95 percent confidence interval:
-2.877989 -1.122011
sample estimates:
mean difference
-2
data = matrix(c(20, 10, 15, 25), nrow = 2, byrow = TRUE)
rownames(data) = c("group1", "group2")
colnames(data) = c("categoryA", "categoryB")
print(data)
categoryA categoryB
group1 20 10
group2 15 25
result = chisq.test(data)
print(result)
Pearson's Chi-squared test with Yates' continuity correction
data: data
X-squared = 4.725, df = 1, p-value = 0.02973
#—————————————————————————– # Hands on Excersice # Install and load dplyr, graphics and MASS
#Load the library
library(dplyr)
library(tidyverse)
library(MASS)
library(rio)
#————————————————————————— # Set a working directory #————————————————————————–
setwd("~/R- HYPOTHESIS TEST REG ANOVA")
gss = import("GSSsubset.csv")
View(gss)
head(gss)
tail(gss)
age_result = t.test(gss$age, mu = 30)
print(age_result)
One Sample t-test
data: gss$age
t = 34.799, df = 993, p-value < 2.2e-16
alternative hypothesis: true mean is not equal to 30
95 percent confidence interval:
43.67189 45.30598
sample estimates:
mean of x
44.48893
#Perform one sample t-test on income
income_result = t.test(gss$income, mu = 25000)
print(income_result)
One Sample t-test
data: gss$income
t = 10.8, df = 993, p-value < 2.2e-16
alternative hypothesis: true mean is not equal to 25000
95 percent confidence interval:
34727.28 39047.16
sample estimates:
mean of x
36887.22
# perform two sample t-test on age by gender
group1 = gss$age[gss$sex == "MALE"]
group2 = gss$age[gss$sex== "FEMALE"]
result = t.test(group1, group2)
print(result)
Welch Two Sample t-test
data: group1 and group2
t = 0.95244, df = 989.14, p-value = 0.3411
alternative hypothesis: true difference in means is not equal to 0
95 percent confidence interval:
-0.8414227 2.4284689
sample estimates:
mean of x mean of y
44.87771 44.08419
#—————————————————————————- # chisquare test for independence #————————————————————————-
# Create a contingency table of gender and marital status
contingency_table = table(gss$sex, gss$marital)
print(contingency_table)
DIVORCED MARRIED NEVER MARRIED SEPARATED WIDOWED
FEMALE 85 220 140 19 23
MALE 87 260 129 18 13
# Perform chi-square on contingency table
result = chisq.test(contingency_table)
print(result)
Pearson's Chi-squared test
data: contingency_table
X-squared = 6.2113, df = 4, p-value = 0.1839
# Create contingency on gender and degree
contingency_table1 = table(gss$degree, gss$marital)
print(contingency_table1)
DIVORCED MARRIED NEVER MARRIED SEPARATED WIDOWED
BACHELOR 28 122 52 1 6
GRADUATE 19 75 25 5 1
HIGH SCHOOL 93 203 151 20 24
JUNIOR COLLEGE 19 41 26 5 1
LT HIGH SCHOOL 13 39 15 6 4
# perform chi-square
result = chisq.test(contingency_table1)
G2;H2;Warningh in chisq.test(contingency_table1) :
Chi-squared approximation may be incorrectg
print(result)
Pearson's Chi-squared test
data: contingency_table1
X-squared = 41.217, df = 16, p-value = 0.0005158
#—————————————————————————— # Check expected frequencies # if any expected frequency is less than five you may need to combine categories or use fishers test
# Check expected frequencies
print(result$expected)
DIVORCED MARRIED NEVER MARRIED SEPARATED WIDOWED
BACHELOR 36.16499 100.92555 56.56036 7.779678 7.569416
GRADUATE 21.62978 60.36217 33.82797 4.652918 4.527163
HIGH SCHOOL 84.96177 237.10262 132.87626 18.276660 17.782696
JUNIOR COLLEGE 15.91952 44.42656 24.89738 3.424547 3.331992
LT HIGH SCHOOL 13.32394 37.18310 20.83803 2.866197 2.788732
# Perform Fishers exact test
#fisher_test_result = fisher.test(contingency_table1)
#print(fisher_test_result)
contingency_table = table(gss$sex, gss$marital)
print(contingency_table)
DIVORCED MARRIED NEVER MARRIED SEPARATED WIDOWED
FEMALE 85 220 140 19 23
MALE 87 260 129 18 13
result = chisq.test(contingency_table)
print(result)
Pearson's Chi-squared test
data: contingency_table
X-squared = 6.2113, df = 4, p-value = 0.1839
#Check expected frequencies
print(result$expected)
DIVORCED MARRIED NEVER MARRIED SEPARATED WIDOWED
FEMALE 84.26962 235.171 131.7938 18.12777 17.63783
MALE 87.73038 244.829 137.2062 18.87223 18.36217
# Perform Fishers exact test
fisher_test_result = fisher.test(contingency_table)
print(fisher_test_result)
Fisher's Exact Test for Count Data
data: contingency_table
p-value = 0.184
alternative hypothesis: two.sided
#—————————————————————————-
#HOMEWORK
data("mtcars")
View(mtcars)