n=6000
p_bar = 335/6000
z <- (p_bar - 0.05)/sqrt(0.05*(1-0.05)/6000)
z
## [1] 2.073221
p <- 1-pnorm(z)
p
## [1] 0.01907586
#Ho : mu <= 2
#H1 : mu > 2
#Criteria : Right tailed test
library(readxl)
calcenter <-read_xlsx("/home/student/RCodes/Stats With R/Data_Files/Call Center Data.xlsx",1,range = "A4:E74")
t.test(calcenter$`Length of Service (years)`,mu = 2,alternative = c("greater"))
##
## One Sample t-test
##
## data: calcenter$`Length of Service (years)`
## t = -0.80599, df = 69, p-value = 0.7885
## alternative hypothesis: true mean is greater than 2
## 95 percent confidence interval:
## 1.67537 Inf
## sample estimates:
## mean of x
## 1.894207
Result : We don’t reject Ho because p-value = 0.7885 > 0.05
Conclusion : The claim of reporter that avg tenure is no more than 2 years is available.
#Ho : p = 0.6
#H1 : p != 0.6
n = 50
p_bar = 35/50
z <- (p_bar - 0.6)/sqrt(0.6*(1-0.6)/50)
z
## [1] 1.443376
pval_left <- pnorm(-z)
pval_left
## [1] 0.07445734
pval_right <- 1- pnorm(z)
pval_right
## [1] 0.07445734
twotailedPval <- pval_left+pval_right
twotailedPval
## [1] 0.1489147
Result : We don’t reject Ho ad p-value = 0.1489147 > 0.05 at 5% level of significance.
Conclusion : Manager’s claim that 60% of shoppers entering the store leave without making a purchase may be true.
n1<-80
p1<-63/80
n2<-120
p2<-70/120
p <-((80*63/80)+(120*70/120))/(80+120)
z<- (p1 - p2)/sqrt((p*(1-p))*(1/n1)+(1/n2))
z
## [1] 1.936293
library(MASS)
data("anorexia")
contAnorex <- subset(anorexia,Treat == "Cont")
t.test(contAnorex$Prewt,contAnorex$Postwt,mu = 0,paired = T)
##
## Paired t-test
##
## data: contAnorex$Prewt and contAnorex$Postwt
## t = 0.28723, df = 25, p-value = 0.7763
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.776708 3.676708
## sample estimates:
## mean of the differences
## 0.45
contAnorexT2 <- subset(anorexia,Treat == "CBT")
t.test(contAnorexT2$Prewt,contAnorexT2$Postwt,mu = 0,paired = T)
##
## Paired t-test
##
## data: contAnorexT2$Prewt and contAnorexT2$Postwt
## t = -2.2156, df = 28, p-value = 0.03502
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -5.7869029 -0.2268902
## sample estimates:
## mean of the differences
## -3.006897
contAnorexT3 <- subset(anorexia,Treat == "FT")
t.test(contAnorexT3$Prewt,contAnorexT3$Postwt,mu = 0,paired = T,alternative = c("less"))
##
## Paired t-test
##
## data: contAnorexT3$Prewt and contAnorexT3$Postwt
## t = -4.1849, df = 16, p-value = 0.0003501
## alternative hypothesis: true difference in means is less than 0
## 95 percent confidence interval:
## -Inf -4.233975
## sample estimates:
## mean of the differences
## -7.264706
library(readxl)
ohio <- read_excel("/home/student/RCodes/Stats With R/Data_Files/Ohio Education Performance.xlsx",1,range ="A3:G34")
t.test(ohio$Writing,ohio$Reading,paired = T)
##
## Paired t-test
##
## data: ohio$Writing and ohio$Reading
## t = 3.1503, df = 30, p-value = 0.00368
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 1.282107 6.008216
## sample estimates:
## mean of the differences
## 3.645161
t.test(ohio$Math,ohio$Science,paired = T)
##
## Paired t-test
##
## data: ohio$Math and ohio$Science
## t = -7.7103, df = 30, p-value = 1.334e-08
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -9.506963 -5.525296
## sample estimates:
## mean of the differences
## -7.516129
library(readxl)
earnings <- read_excel("/home/student/RCodes/Stats With R/Data_Files/Earnings2005.xlsx",1,range = "A2:C27")
t.test(earnings$Current,earnings$Previous,alternative = "g",paired = T)
##
## Paired t-test
##
## data: earnings$Current and earnings$Previous
## t = 3.8891, df = 24, p-value = 0.0003485
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
## 0.1156002 Inf
## sample estimates:
## mean of the differences
## 0.2064
y <- c(14.3,23.2,22.7,33.9,22.9,22.9,10.4,9.3,2.4)
category <- c("A","A","B","B","A","B","A","B","B")
category <-factor(category)
ay <- y[category == "A"]
by <- y[category == "B"]
var.test(ay,by)
##
## F test to compare two variances
##
## data: ay and by
## F = 0.26366, num df = 3, denom df = 4, p-value = 0.3022
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.02642062 3.98147359
## sample estimates:
## ratio of variances
## 0.2636567
y <- c(14.3,23.2,22.7,33.9,22.9,22.9,10.4,9.3,2.4)
category <- c("A","A","B","B","A","B","A","B","B")
var.test(y ~ category)
##
## F test to compare two variances
##
## data: y by category
## F = 0.26366, num df = 3, denom df = 4, p-value = 0.3022
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.02642062 3.98147359
## sample estimates:
## ratio of variances
## 0.2636567
library(readxl)
stud <- read_excel("/home/student/RCodes/Stats With R/Data_Files/Student Grades.xlsx",1,range = "A3:C59")
var.test(stud$Midterm,stud$`Final Exam`)
##
## F test to compare two variances
##
## data: stud$Midterm and stud$`Final Exam`
## F = 0.78418, num df = 55, denom df = 55, p-value = 0.3701
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.4596921 1.3377145
## sample estimates:
## ratio of variances
## 0.7841791
Result : Reject Ho because p-value > 0.05
Conclusion : Variance are equal
library(readxl)
fb <- read_excel("/home/student/RCodes/Stats With R/Data_Files/Facebook Survey.xlsx",1,range = "A3:D36")
var.test(fb$`Hours online/week` ~ fb$Gender)
##
## F test to compare two variances
##
## data: fb$`Hours online/week` by fb$Gender
## F = 0.97782, num df = 19, denom df = 12, p-value = 0.9347
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.3164907 2.6592598
## sample estimates:
## ratio of variances
## 0.9778224
t.test(fb$`Hours online/week` ~ fb$Gender,var.equal = T)
##
## Two Sample t-test
##
## data: fb$`Hours online/week` by fb$Gender
## t = -0.20704, df = 31, p-value = 0.8373
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -2.545736 2.076506
## sample estimates:
## mean in group female mean in group male
## 6.150000 6.384615
library(readxl)
df <- read_excel("/home/student/RCodes/Stats With R/Data_Files/Graduate School Survey.xlsx",1,range = "A3:D33")
ss <- subset(df,`Plan to attend graduate school` == "yes")
var.test(ss$`Undergraduate GPA` ~ ss$Gender)
##
## F test to compare two variances
##
## data: ss$`Undergraduate GPA` by ss$Gender
## F = 0.76687, num df = 7, denom df = 10, p-value = 0.7453
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.1941521 3.6511422
## sample estimates:
## ratio of variances
## 0.7668668
t.test(ss$`Undergraduate GPA` ~ ss$Gender,var.equal = T)
##
## Two Sample t-test
##
## data: ss$`Undergraduate GPA` by ss$Gender
## t = 1.3753, df = 17, p-value = 0.1869
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.1474802 0.6997529
## sample estimates:
## mean in group F mean in group M
## 3.612500 3.336364
library(readxl)
df <- read_excel("/home/student/RCodes/Stats With R/Data_Files/Accounting Professionals.xlsx",1,range = "A3:G30")
#a
var.test(df$`Years of Service` ~ df$Gender)
##
## F test to compare two variances
##
## data: df$`Years of Service` by df$Gender
## F = 0.27419, num df = 13, denom df = 12, p-value = 0.02816
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 0.08464546 0.86456574
## sample estimates:
## ratio of variances
## 0.2741889
t.test(df$`Years of Service` ~ df$Gender,var.equal = F)
##
## Welch Two Sample t-test
##
## data: df$`Years of Service` by df$Gender
## t = -3.6911, df = 17.822, p-value = 0.001695
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -15.100886 -4.140872
## sample estimates:
## mean in group F mean in group M
## 10.07143 19.69231
#b
var.test(df$`Years Undergraduate Study` ~ df$Gender)
##
## F test to compare two variances
##
## data: df$`Years Undergraduate Study` by df$Gender
## F = 3.3994, num df = 13, denom df = 12, p-value = 0.04176
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 1.049421 10.718748
## sample estimates:
## ratio of variances
## 3.399351
t.test(df$`Years Undergraduate Study` ~ df$Gender,var.equal = F)
##
## Welch Two Sample t-test
##
## data: df$`Years Undergraduate Study` by df$Gender
## t = -1.462, df = 20.331, p-value = 0.159
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -1.5058458 0.2640875
## sample estimates:
## mean in group F mean in group M
## 3.071429 3.692308