# Clear the workspace
rm(list = ls()) # Clear environment
gc() # Clear unused memory
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 522973 28.0 1164015 62.2 660385 35.3
## Vcells 953924 7.3 8388608 64.0 1769723 13.6
cat("\f") # Clear the console
Function reject or not
myp=function(p, alpha){
if(p<alpha){print('REJECT Ho')}else{print('FAIL 2 REJECT')}
}
#Shade norm
shadenorm = function(below=NULL, above=NULL, pcts = c(0.025,0.975), mu=0, sig=1, numpts = 500, color = "gray", dens = 40, justabove= FALSE, justbelow = FALSE, lines=FALSE,between=NULL,outside=NULL){
if(is.null(between)){
below = ifelse(is.null(below), qnorm(pcts[1],mu,sig), below)
above = ifelse(is.null(above), qnorm(pcts[2],mu,sig), above)
}
if(is.null(outside)==FALSE){
below = min(outside)
above = max(outside)
}
lowlim = mu - 4*sig # min point plotted on x axis
uplim = mu + 4*sig # max point plotted on x axis
x.grid = seq(lowlim,uplim, length= numpts)
dens.all = dnorm(x.grid,mean=mu, sd = sig)
if(lines==FALSE){
plot(x.grid, dens.all, type="l", xlab="X", ylab="Density") # label y and x axis
}
if(lines==TRUE){
lines(x.grid,dens.all)
}
if(justabove==FALSE){
x.below = x.grid[x.grid<below]
dens.below = dens.all[x.grid<below]
polygon(c(x.below,rev(x.below)),c(rep(0,length(x.below)),rev(dens.below)),col=color,density=dens)
}
if(justbelow==FALSE){
x.above = x.grid[x.grid>above]
dens.above = dens.all[x.grid>above]
polygon(c(x.above,rev(x.above)),c(rep(0,length(x.above)),rev(dens.above)),col=color,density=dens)
}
if(is.null(between)==FALSE){
from = min(between)
to = max(between)
x.between = x.grid[x.grid>from&x.grid<to]
dens.between = dens.all[x.grid>from&x.grid<to]
polygon(c(x.between,rev(x.between)),c(rep(0,length(x.between)),rev(dens.between)),col=color,density=dens)
}
}
#Shade t
shadet = function(below=NULL, above=NULL, pcts = c(0.025,0.975), df=1, numpts = 500, color = "gray", dens = 40, justabove= FALSE, justbelow = FALSE, lines=FALSE,between=NULL,outside=NULL){
if(is.null(between)){
below = ifelse(is.null(below), qt(pcts[1],df), below)
above = ifelse(is.null(above), qt(pcts[2],df), above)
}
if(is.null(outside)==FALSE){
below = min(outside)
above = max(outside)
}
lowlim = -4
uplim = 4
x.grid = seq(lowlim,uplim, length= numpts)
dens.all = dt(x.grid,df)
if(lines==FALSE){
plot(x.grid, dens.all, type="l", xlab="X", ylab="Density")
}
if(lines==TRUE){
lines(x.grid,dens.all)
}
if(justabove==FALSE){
x.below = x.grid[x.grid<below]
dens.below = dens.all[x.grid<below]
polygon(c(x.below,rev(x.below)),c(rep(0,length(x.below)),rev(dens.below)),col=color,density=dens)
}
if(justbelow==FALSE){
x.above = x.grid[x.grid>above]
dens.above = dens.all[x.grid>above]
polygon(c(x.above,rev(x.above)),c(rep(0,length(x.above)),rev(dens.above)),col=color,density=dens)
}
if(is.null(between)==FALSE){
from = min(between)
to = max(between)
x.between = x.grid[x.grid>from&x.grid<to]
dens.between = dens.all[x.grid>from&x.grid<to]
polygon(c(x.between,rev(x.between)),c(rep(0,length(x.between)),rev(dens.between)),col=color,density=dens)
}
}
#Shade Chisquare
shadechi = function(below=NULL, above=NULL, pcts = c(0.025,0.975), df=1, numpts = 500, color = "gray", dens = 40, justabove= FALSE, justbelow = FALSE, lines=FALSE,between=NULL,outside=NULL){
if(is.null(between)){
below = ifelse(is.null(below), qchisq(pcts[1],df), below)
above = ifelse(is.null(above), qchisq(pcts[2],df), above)
}
if(is.null(outside)==FALSE){
below = min(outside)
above = max(outside)
}
lowlim = 0
uplim = qchisq(.99,df)
x.grid = seq(lowlim,uplim, length= numpts)
dens.all = dchisq(x.grid,df)
if(lines==FALSE){
plot(x.grid, dens.all, type="l", xlab="X", ylab="Density")
}
if(lines==TRUE){
lines(x.grid,dens.all)
}
if(justabove==FALSE){
x.below = x.grid[x.grid<below]
dens.below = dens.all[x.grid<below]
polygon(c(x.below,rev(x.below)),c(rep(0,length(x.below)),rev(dens.below)),col=color,density=dens)
}
if(justbelow==FALSE){
x.above = x.grid[x.grid>above]
dens.above = dens.all[x.grid>above]
polygon(c(x.above,rev(x.above)),c(rep(0,length(x.above)),rev(dens.above)),col=color,density=dens)
}
if(is.null(between)==FALSE){
from = min(between)
to = max(between)
x.between = x.grid[x.grid>from&x.grid<to]
dens.between = dens.all[x.grid>from&x.grid<to]
polygon(c(x.between,rev(x.between)),c(rep(0,length(x.between)),rev(dens.between)),col=color,density=dens)
}
}
# Ho: Mu=109, Ha: Mu<>109
n = 190 # Sample population
m = 109 # Population Mean
sm = 110 # Sample mean
sd = 6 # Standard deviation of population
se = sd/sqrt(n) #Standard error
alpha = 0.05 #Level of significance
z = (sm - m)/(se)
z
## [1] 2.297341
p = round(2*(1 - pnorm(z)),4)
p
## [1] 0.0216
myp(p,alpha)
## [1] "REJECT Ho"
p value < alpha, reject null
shadenorm( mu = 109, sig = 6/sqrt(190), pcts = c(0.025,0.975), color = "red") # shades significance level gates
lines(x=rep(110,10), y=seq(0,1,length.out=10), col='blue')
# Ho: Mu=5, Ha: Mu<>5
n = 5 # Sample population
m = 5.3 # Population Mean
smean = 5.0 # Sample mean
std = 1.1 # Standard deviation of population
se = std/sqrt(n) #standard error
alpha = 0.05 # Level of significance
z = (smean - m)/(se)
z
## [1] -0.6098367
df = n-1
p = qt(p=alpha/2, df, lower.tail=FALSE)
p
## [1] 2.776445
interval = c(smean - p * se, smean + p * se)
interval
## [1] 3.63417 6.36583
The level of ozone normally found is between 3.63417 and 6.36583 ppm. The interval is within 5.3, so we cannot reject the null hypothesis. And the data does not support the claim
myp(p,alpha)
## [1] "FAIL 2 REJECT"
# Ho: Mu=7.3,
#Ha: Mu<>7.3
n <- 51 # Sample population
m <- 7.3 # Population Mean
Samp <- 7.1 # Sample mean
var <- 0.49 # Variance of population
sd <- sqrt(var) # Standard deviation of population
se <- sd/sqrt(n) # Standard deviation of sample (standard error)
alpha <- 0.01 # Level of significance
t <- (7.1 - 7.3) / se
t
## [1] -2.040408
?pt
## starting httpd help server ... done
df = n-1
p = 2 * pt(q = t,
df)
p
## [1] 0.04660827
interval = c(Samp - p * se, Samp + p * se)
interval
## [1] 7.095431 7.104569
myp(p, alpha)
## [1] "FAIL 2 REJECT"
The level of ozone normally found is between 7.095431 and 7.104569 ppm. 7.3 is outside of the interval, so we can reject the null hypothesis and assume that the data supports the researcherβs claim.
shadet(df = n-1,
pcts = c(.005 , 0.995),
color = "lightblue")
lines(x = rep(t,10),
y = seq(from = 0,
to = 1,
length.out = 10
),
col='gray'
)
# Ho: pi>=0.36, Ha: pi<0.36
#No standard deviation so t distribution
n = 100
pr = 0.36
sam = 0.29
q = 1-pr
alpha <- 0.02 # Level of significance
se <- sqrt(pr*q/n)
Z = (sam - pr) / se
Z
## [1] -1.458333
p = pnorm(Z)
pnorm(Z)
## [1] 0.07237434
myp(p, alpha)
## [1] "FAIL 2 REJECT"
shadenorm(mu = .36,
sig = se,
pcts = c(.02),
color = 'red'
)
lines(x = rep(.29,10),
y = seq(from = 0,
to = 20,
length.out=10),
col='blue')
# Ho: pi>=0.31
#Ha: pi<0.31
hd = .31
n = 380
unins = .95
df = n-1
lev = -0.05
SE<-sqrt((hd*(1-hd))/n)
SE
## [1] 0.0237254
z<-qnorm(1-.05/2)
z
## [1] 1.959964
myp(p,alpha)
## [1] "FAIL 2 REJECT"
upper<-unins+Z*SE
upper
## [1] 0.9154005
lower<-unins-Z*SE
lower
## [1] 0.9845995
The percent of uninsured patients is between 90.34991 and 99.65009 percent.Because 31 is equal to or less than the upper bound, we cannot reject the null hypothesis. The data does not support the claim.
Act<-112
Lat<-102
sd1<-24
sd2<-15.4387
n=22
alpha<-0.1
meandifference<-Act-Lat
meandifference
## [1] 10
SE<-sqrt((sd1^2/n)+(sd2^2/n))
SE
## [1] 6.084083
df<-n-1
tvdf<-qt(p=alpha/2,df,lower.tail = FALSE)
tvdf
## [1] 1.720743
upper<-Lat+(tvdf*SE)
upper
## [1] 112.4691
lower<-Lat-(tvdf*SE)
lower
## [1] 91.53086
upper1<-sd2+(tvdf*SE)
upper1
## [1] 25.90784
lower1<-sd2-(tvdf*SE)
lower1
## [1] 4.969557
Fail to reject null.
# Ho: Mu1-Mu2=0
# Ha: Mu1-Mu2<>0
#Let smoker group be indexed by 1, non-smoker group by 2.
mu1 <- 87
mu2 <- 84
alpha = 0.1
n1 = 32
n2 = 31
df1 = n1 - 1
df2 = n2 - 1
sd1 = 9
sd2 = 10
var1 = sd1^2
var2 = sd2^2
pointestdiff = (mu1-mu2)
denSe = sqrt(var1/n1 + var2/n2)
t = (pointestdiff - 0) / denSe
t
## [1] 1.25032
numdf = (var1/n1 + var2/n2)^2
dendf = (var1/n1)^2 / df1 + (var2/n2)^2 / df2
df = numdf / dendf
shadet(df = df, pcts = c(.05,.95))
lines(rep(t,10), seq(0,1,length.out=10),col='red')
p_value = 2 * ( 1 - pt(q = t, df = numdf/dendf))
p_value
## [1] 0.2160473
myp(p = p_value,alpha)
## [1] "FAIL 2 REJECT"
p_value_robust = 2 * ( 1 - pt(q = t, df = min(df1, df2)))
p_value_robust
## [1] 0.220848
myp(p = p_value_robust, alpha )
## [1] "FAIL 2 REJECT"
Fail to reject null
# Ho: xbar1 - xbar2 = 0,
# Ha: xbar1 - xbar2 <> 0
alpha = 0.05
xbar1 = 127
xbar2 = 157
n1 = 11
n2 = 18
df1 = n1-1
df2 = n2-1
s1 = 33
s2 = 27
var1 = s1^2
var2 = s2^2
numdf = (var1 / n1 + var2 / n2)^2
dendf = (var1 / n1)^2 / df1 +
(var2 / n2)^2 / df2
df = numdf / dendf
df
## [1] 18.0759
delta = xbar1 - xbar2
delta
## [1] -30
t = qt(p = 0.975, df = df)
t
## [1] 2.10029
Se = sqrt( var1/n1 + var2/n2 )
Se
## [1] 11.81101
interval = c( delta - t * Se , delta + t * Se )
interval
## [1] -54.806548 -5.193452
CI is -54.806548 and -5.193452.
pval = 2*(pt(q = delta/Se, df = numdf/dendf))
pval
## [1] 0.02048034
myp(p = pval,alpha)
## [1] "REJECT Ho"
pval2 = 2*( pt(q = delta/Se, df = min(df1, df2)))
pval2
## [1] 0.02936303
myp(p = pval2,alpha)
## [1] "REJECT Ho"
Day
Mo Tu Wed Th F Mo Tu Wed Th F
Route I
32 27 34 24 31 25 30 23 27 35
Route II
28 28 33 25 26 29 33 27 25 33
Using this data, find the 98% confidence interval for the true mean difference between the average travel time for route I and the average travel time for route II.
Let π = (πππ’π‘π πΌ π‘πππ£ππ π‘πππ) β (πππ’π‘π πΌπΌ π‘πππ£ππ π‘πππ). Assume that the populations of travel times are normally distributed for both routes. Show all work and hypothesis testing steps.
r1 <- c(32, 27, 34, 24, 31, 25, 30, 23, 27, 35)
r2 <- c(28, 28, 33, 25, 26, 29, 33, 27, 25, 33)
alpha <- 0.02
xbar1 <- mean(r1)
xbar2 <- mean(r2)
n1 <- 10
n2 <- 10
df1 <- n1 - 1
df2 <- n2 - 1
?sd
s1 <- sd(r1)
s2 <- sd(r2)
var1 <- s1^2
var2 <- s2^2
Se = sqrt(var1/n1 + var2/n2)
Se
## [1] 1.678955
delta <- xbar1-xbar2
delta
## [1] 0.1
t = qt(p = alpha/2, df = min(df1,df2))
t
## [1] -2.821438
t = qt(p = 1-alpha/2, df = min(df1,df2))
t
## [1] 2.821438
interval = c( delta - t * Se , delta + t * Se )
interval
## [1] -4.637066 4.837066
numdf = (var1/n1 + var2/n2)^2
dendf = (var1/n1)^2 / df1 + (var2/n2)^2 / df2
df = numdf / dendf
t1 <- qt(p = (1-alpha/2), df = df)
t1
## [1] 2.568883
interval = c(delta - t1 * Se ,
delta + t1 * Se )
interval
## [1] -4.213039 4.413039
# Ho: p1 - p2 <= 0,
# Ha: p1 - p2 > 0
alpha=0.05
n1 = 391
n2 = 510
x1 = 195
x2 = 193
p1 = x1/n1
p2 = x2/n2
p1-p2
## [1] 0.1202899
Se = sqrt (p1 * (1-p1) / n1 + p2 * (1-p2) / n2)
Se
## [1] 0.03317529
Z = (p1 - p2) / Se
Z
## [1] 3.625887
p = pnorm(q = Z,
lower.tail = FALSE)
p
## [1] 0.0001439855
myp(p, alpha)
## [1] "REJECT Ho"
Reject null.