# Clearing workspace
rm(list = ls()) # Clear environment
gc() # Clear unused memory
## used (Mb) gc trigger (Mb) max used (Mb)
## Ncells 524301 28.1 1167757 62.4 660491 35.3
## Vcells 955822 7.3 8388608 64.0 1769514 13.6
cat("\f") # Clear the console
myp=function(p, alpha){
if(p<alpha){print('REJECT Ho')}else{print('FAIL 2 REJECT')}
}
# Test
myp(.01, .05) # p is less than alpha
## [1] "REJECT Ho"
# Test 2
myp(.1, .05) # p is greater than alpha
## [1] "FAIL 2 REJECT"
shadenorm = function(below=NULL, above=NULL, pcts = c(0.025,0.975), mu=0, sig=1, numpts = 500, color = "gray", dens = 40, justabove= FALSE, justbelow = FALSE, lines=FALSE,between=NULL,outside=NULL){
if(is.null(between)){
below = ifelse(is.null(below), qnorm(pcts[1],mu,sig), below)
above = ifelse(is.null(above), qnorm(pcts[2],mu,sig), above)
}
if(is.null(outside)==FALSE){
below = min(outside)
above = max(outside)
}
lowlim = mu - 4*sig # min point plotted on x axis
uplim = mu + 4*sig # max point plotted on x axis
x.grid = seq(lowlim,uplim, length= numpts)
dens.all = dnorm(x.grid,mean=mu, sd = sig)
if(lines==FALSE){
plot(x.grid, dens.all, type="l", xlab="X", ylab="Density") # label y and x axis
}
if(lines==TRUE){
lines(x.grid,dens.all)
}
if(justabove==FALSE){
x.below = x.grid[x.grid<below]
dens.below = dens.all[x.grid<below]
polygon(c(x.below,rev(x.below)),c(rep(0,length(x.below)),rev(dens.below)),col=color,density=dens)
}
if(justbelow==FALSE){
x.above = x.grid[x.grid>above]
dens.above = dens.all[x.grid>above]
polygon(c(x.above,rev(x.above)),c(rep(0,length(x.above)),rev(dens.above)),col=color,density=dens)
}
if(is.null(between)==FALSE){
from = min(between)
to = max(between)
x.between = x.grid[x.grid>from&x.grid<to]
dens.between = dens.all[x.grid>from&x.grid<to]
polygon(c(x.between,rev(x.between)),c(rep(0,length(x.between)),rev(dens.between)),col=color,density=dens)
}
}
# TEST THE FUCTION
shadenorm(mu = 0, sig = 1, pcts = c(0.025,0.975))
shadet = function(below=NULL, above=NULL, pcts = c(0.025,0.975), df=1, numpts = 500, color = "gray", dens = 40, justabove= FALSE, justbelow = FALSE, lines=FALSE,between=NULL,outside=NULL){
if(is.null(between)){
below = ifelse(is.null(below), qt(pcts[1],df), below)
above = ifelse(is.null(above), qt(pcts[2],df), above)
}
if(is.null(outside)==FALSE){
below = min(outside)
above = max(outside)
}
lowlim = -4
uplim = 4
x.grid = seq(lowlim,uplim, length= numpts)
dens.all = dt(x.grid,df)
if(lines==FALSE){
plot(x.grid, dens.all, type="l", xlab="X", ylab="Density")
}
if(lines==TRUE){
lines(x.grid,dens.all)
}
if(justabove==FALSE){
x.below = x.grid[x.grid<below]
dens.below = dens.all[x.grid<below]
polygon(c(x.below,rev(x.below)),c(rep(0,length(x.below)),rev(dens.below)),col=color,density=dens)
}
if(justbelow==FALSE){
x.above = x.grid[x.grid>above]
dens.above = dens.all[x.grid>above]
polygon(c(x.above,rev(x.above)),c(rep(0,length(x.above)),rev(dens.above)),col=color,density=dens)
}
if(is.null(between)==FALSE){
from = min(between)
to = max(between)
x.between = x.grid[x.grid>from&x.grid<to]
dens.between = dens.all[x.grid>from&x.grid<to]
polygon(c(x.between,rev(x.between)),c(rep(0,length(x.between)),rev(dens.between)),col=color,density=dens)
}
}
# TEST THE FUCTION
shadet(df = 4, pcts = c(0.025,0.975)) # see the area under the tails are further away from the mean 0..
shadechi = function(below=NULL, above=NULL, pcts = c(0.025,0.975), df=1, numpts = 500, color = "gray", dens = 40, justabove= FALSE, justbelow = FALSE, lines=FALSE,between=NULL,outside=NULL){
if(is.null(between)){
below = ifelse(is.null(below), qchisq(pcts[1],df), below)
above = ifelse(is.null(above), qchisq(pcts[2],df), above)
}
if(is.null(outside)==FALSE){
below = min(outside)
above = max(outside)
}
lowlim = 0
uplim = qchisq(.99,df)
x.grid = seq(lowlim,uplim, length= numpts)
dens.all = dchisq(x.grid,df)
if(lines==FALSE){
plot(x.grid, dens.all, type="l", xlab="X", ylab="Density")
}
if(lines==TRUE){
lines(x.grid,dens.all)
}
if(justabove==FALSE){
x.below = x.grid[x.grid<below]
dens.below = dens.all[x.grid<below]
polygon(c(x.below,rev(x.below)),c(rep(0,length(x.below)),rev(dens.below)),col=color,density=dens)
}
if(justbelow==FALSE){
x.above = x.grid[x.grid>above]
dens.above = dens.all[x.grid>above]
polygon(c(x.above,rev(x.above)),c(rep(0,length(x.above)),rev(dens.above)),col=color,density=dens)
}
if(is.null(between)==FALSE){
from = min(between)
to = max(between)
x.between = x.grid[x.grid>from&x.grid<to]
dens.between = dens.all[x.grid>from&x.grid<to]
polygon(c(x.between,rev(x.between)),c(rep(0,length(x.between)),rev(dens.between)),col=color,density=dens)
}
}
# TEST THE FUCTION
shadechi(df = 2, pcts=c(.05)) # change pcts and see what happen
Null and Alternative Hypothesis
Ho (Null): Our Null hypothesis is that the mean number of hours to obtain a driverβs license using the new training method using computer aided instruction (CAI) IS equal to the mean for the number of hours to obtain a driverβs license with the original traditional method.
Ha (Alternative): Our alternative hypothesis is that the mean number of hours to obtain a driverβs license using the new training method (CAI) is NOT equal to the mean number of hours to obtain a drivers license using the original traditional method.
Level of Significance: 0.05
Test Statistic: Z since we know the SD and is two sides
# Setting our Parameters
n1 <- 190 # Sample Population
pm1 <- 109 # Population Mean
sm1 <- 110 # sample Mean
sd1 <- 6 # Standard Deviation of Population
a1 <- 0.05 # Alpha
# Computing Z Score
Z1 = (sm1-pm1)/(sd1/sqrt(n1))
Z1
## [1] 2.297341
# Computing P Value
p.value = 2 * (1-pnorm(q = Z1,
mean = 0,
sd = 1)
)
round(p.value, digits = 4)
## [1] 0.0216
As we can see here, this p value is less than our alpha, but just to double check, lets throw it into the formula we set up.
# Testing Rejection
myp(p = p.value,
alpha = a1 )
## [1] "REJECT Ho"
As we can see above, we would reject our NULL hypothesis. This tells us that the two means are NOT equal.
# Plotting This Distribution
shadenorm( mu = 109,
sig = 6/sqrt(190),
pcts = c(0.025,0.975),
color = "blue") # shades significance level gates
lines(x=rep(110,10),
y=seq(0,1,length.out=10),
col='green') # mark point estimate from sample
As we can see from this graph, the highlighted line is in our region to reject as well.
Null and Alternative Hypothesis:
Ho (Null Hypothesis): The level of the Ozone is equal to 5.3 parts/million (ppm).
Ha : The level of the Ozone is not equal to 5.3 parts/million (ppm) and is at an insufficient level.
Significance Level: 0.05
Test Statistic: T since the population standard deviation is not know and then observations are less than 30. This is also one-sided.
# Setting our Parameters
n2 <- 5 # Sample Population
pm2 <- 5.3 # Population Mean
sm2 <- 5.0 # Sample Mean
sd2 <- 1.1 # Sample Standard Deviation
a2 <- 0.05 # Alpha
# Computing Z Score
Z2 = (sm2-pm2)/(sd2/sqrt(n2))
Z2
## [1] -0.6098367
# Computing P Value
?pt
## starting httpd help server ... done
P2 <- pt(q = Z2,
df=n2-1,
lower.tail=TRUE)
round(P2, digits = 4)
## [1] 0.2875
# Testing Rejection
myp(p = P2,
alpha = a2 )
## [1] "FAIL 2 REJECT"
As seen above, we know that at the 0.05 significance level, our P-value of .2875 is larger than 0.05. To confirm, we can plug this into our rejection algorithm which confirms we fail to reject our null hypothesis. This means that the Ozone level is equal to 5.3 parts/million. Based off of our question, we can assume that this is a sufficient level.
# Plotting This
shadet(df = n2-1,
pcts = c(0.025,0.975)) # shades significance level gates
Null and Alternative Hypothesis:
Ho: The level of the Ozone is 7.3 parts.million (ppm)
Ha: The level of the Ozone is not at a normal level and not equal to 7.3mm
Significance Level: 0.01
Test Statistic: T distribution since we do not know the standard deviation.
# setting our Parameters
n3 <- 51 # Sample Population
pm3 <- 7.3 # Population Mean
sm3 <- 7.1 # Sample Mean
var3 <- 1.1 # Variance
a3 <- 0.01 # Alpha
# Computing Standard Deviation of the Population
sd3 <- sqrt(var3)
sd3
## [1] 1.048809
# Computing Standard Error
se3 <- sd3/sqrt(n3)
se3
## [1] 0.1468626
# Computing Z score
z3 <- (sm3-pm3)/se3
z3
## [1] -1.361817
# Calculating P Value
p3 <- 2 * pt(q = z3,
df = n3-1,
lower.tail = TRUE)
p3
## [1] 0.1793602
# Testing Rejection
myp(p = p3,
alpha = a3)
## [1] "FAIL 2 REJECT"
We fail to reject our NULL hypothesis after doing our calculations. We can see that our P value of 0.17 is greater than our significance level of 0.01 so therefore, we fail to reject the NULL hypothesis.
Null and Alternative Hypothesis
Ho: Our null hypothesis is 36% or more of readers own a laptop
Ha: Our alternative hypothesis is that less than 36% of readers own a laptop
Level of significance: 0.02
Test Statistic: Z distribution but need to find the standard deviation/error on our own
# Setting our Parameters
n4 <- 100 # Sample Population
pm4 <- .36 # Population %
sm4 <- .29 # Sample %
a4 <- 0.02 # Alpha
# Calculating Standard Error
se4 <- sqrt(pm4*(1-pm4)/n4)
se4
## [1] 0.048
# Calculating Z Score
z4 <- (sm4-pm4)/se4
z4
## [1] -1.458333
# Calculating P Value
p4 <- pnorm(z4)
p4
## [1] 0.07237434
# Testing Rejection
myp(p = p4,
alpha = a4)
## [1] "FAIL 2 REJECT"
We fail to reject our hypothesis again. We can see that our P value ended up being 0.072 but at our level of significance was 0.02. We can clearly see this is above our significance level. Our results from this would be that 36% or more of the publishers readers own a laptop.
# Plotting This
shadenorm(mu = .36,
sig = se4,
pcts = c(.02),
color = 'lightblue'
)
lines(x = rep(.29,10),
y = seq(from = 0,
to = 20,
length.out=10),
col='green')
Null and Alternative Hypothesis
Ho: The null hypothesis is that 31% or more of patients are uninsured.
Ha: The alternative hypothesis is that less than 31% of patients are uninsured.
Level of Significance: 0.05
Test Statistic: Z distribution
# Setting Our Parameters
n5 <- 380 # Sample Population
pm5 <- .31 # Population %
amt5 <- 95 # Amount of people uninsured
sm5 <- amt5/n5 # Sample %
a5 <- 0.05 # Alpha
# Calculating Standard Error
se5 <- sqrt(pm5*(1-pm5)/n5)
se5
## [1] 0.0237254
# Calculating Z Score
z5 <- (sm5-pm5)/se5
z5
## [1] -2.528935
# Calculating P Value
p5 <- pnorm(z5)
p5
## [1] 0.005720462
# Testing Rejection
myp(p = p5,
alpha = a5)
## [1] "REJECT Ho"
As we can see from the above, we can reject our NULL hypothesis meaning that we can conclude that less than 31% of patient are uninsured.We confirmed this by plugging it into our rejection algorithm which told us we can reject Ho. In this problem, we were not given the sample % but could pretty easily calculate this from our sample statistics. I included this in the initial parameters for this question.
# Plotting This
shadenorm(mu = .31,
sig = se5,
pcts = c(.05),
color = 'lightblue'
)
lines(x = rep(sm5,10),
y = seq(from = 0,
to = 20,
length.out=10),
col='green')
Null and Alternative Hypothesis
Ho: The null hypothesis is that standard deviation of test scores did not decrease from 24
Ha: The alternative hypothesis is that the standard deviation of test scores did decrease from 24
Level of significance: 0.1
Test Statistic: Chi-Squared
# Setting up Parameters
hist.mean <- 112 # Historical Mean
hist.sd <- 24 # Historical standard deviation
n6 <- 22 # Sample size
s.mean <- 102 # Sample mean
s.std <- 15.4387 # Sample Standard Deviation
a6 <- 0.1 # Alpha
# Compute Test Statistic
chi_square6 <- ((n6 - 1) * s.std^2) / hist.sd^2
chi_square6
## [1] 8.68997
# Determining Critical Value
critical_value6 <- qchisq(1 - a6,
df = n6 - 1
)
critical_value6
## [1] 29.61509
As we can see above, we calculate both the test statistic and critical value numbers. Our test statistic was 8.68997 and our critical value was 29.61509. Since our test statistic is less than our critical value, we FAIL to reject our null hypothesis. This would tell us that the standard deviation of tests scores did not decrease from 24.
# Plotting This
shadechi(df = n6-1,
pcts=c(.10),
color = 'lightblue'
)
Null and Alternative Hypothesis
Ho: The pulse rate for smokers and non-smokers is not different
Ha: The pulse rate for smokers and non-smokers is different
Level of Significance: 0.1
Test Statistic: T
# Setting our Parameters
# Smokers
n.smoke <- 32 # Sample Smokers
m.smoke <- 87 # Sample Mean Smokers
sd.smoke <- 9 # Sample Standard Deviation Smokers
# Non Smokers
n.nonsmoke <- 31 # Sample Non Smokers
m.nonsmoke <- 84 # Sample Mean Non Smokers
sd.nonsmoke <- 10 # Sample standard Deviation non smokers
# Total
n7 <- n.smoke+n.nonsmoke # Total Sample
a7 <- 0.1 # Alpha
# Calculating Variance
var.smoke <- sd.smoke^2
var.nonsmoke <- sd.nonsmoke^2
# Standard Error
se7 <- sqrt((var.smoke/n.smoke)+(var.nonsmoke/n.nonsmoke))
se7
## [1] 2.399387
t <- (m.smoke-m.nonsmoke)/se7
t
## [1] 1.25032
# P Value
p7 <- 2*pt(t,
df = n.nonsmoke-1,
lower.tail = FALSE)
p7
## [1] 0.220848
# Testing Rejection
myp(p = p7,
alpha = a7)
## [1] "FAIL 2 REJECT"
Per our calculations, we fail to reject our NULL hypothesis. In this case, the pulse rate is not different for smokers and non-smokers.
π1 = 11 π₯Μ 1 = 127 π 1 = 33 π2 = 18 π₯Μ 2 = 157 π 2 = 27
Null and Alternative Hypothesis
Ho: The Population variances are not equal
Ha: The population variances are equal
Level of Significance: 0.05
Test Statistic: T distribution
# Setting Up Parameters (all listed in our question)
n8.1 <- 11
xbar1 <- 127
sigma1 <- 33
n8.2 <- 18
xbar2 <- 157
sigma2 <- 27
alpha <- 0.05
var1 <- sigma1^2
var2 <- sigma2^2
df <- min(n8.1-1,n8.2-1)
mdiff <- xbar1 - xbar2
se8 <- sqrt((var1/n8.1)+(var2/n8.2))
tstat <- mdiff/se8
t8 <- qt(0.025,
df,
lower.tail = FALSE
)
margin.error <- t8 * se8
low.b <- mdiff - margin.error
up.b <- mdiff + margin.error
# CI
cat("95% Confidence interval:", round(low.b, 4), ",", round(up.b, 4))
## 95% Confidence interval: -56.3166 , -3.6834
We can see here that our 95% confidence interval is -56.31657,-3.683426
# Create data sets for each route
route1 <- c(32, 27, 34, 24, 31, 25, 30, 23, 27, 35)
route2 <- c(28, 28, 33, 25, 26, 29, 33, 27, 25, 33)
# Setting up parameters
# Define Variables
n9.1 <- 10
xbar1.9 <- mean(route1)
s1 <- sd(route1)
var9.1 <- s1^2
n9.2 <- 10
xbar2.9 <- mean(route2)
s2 <- sd(route2)
var9.2 <- s2^2
df <- n9.1-1
a9 <- 0.02
mdiff9 <- xbar1.9 - xbar2.9
se9 <- sqrt((var9.1/n9.1)+(var9.2/n9.2))
t9 <- mdiff9/se9
tdf <- qt(p=.01,
df,
lower.tail=FALSE
)
up.b9 <- mdiff9 + (tdf*se9)
low.b9 <- mdiff9 - (tdf*se9)
cat("(Lower: ",low.b9,", Upper:",up.b9,")")
## (Lower: -4.637066 , Upper: 4.837066 )
Null and Alternative Hypothesis
Ho: The Null hypothesis is that the percentage of unemployed workers is more than employed workers.
Ha: The alternate hypothesis is the percentage of employed workers is more than unemployed workers.
# Setting the Parameters
n1.10 <- 391 #Employed
xbar1.10 <- 195
s1.10 <- 1
var1.10 <- s1.10^2
n2.10 <- 510 #Unemployed
xbar2.10 <- 193
s2.10 <- 1
var2.10 <- s2.10^2
df <- min(n1.10-1,n2.10-1)
a.10 <- 0.05
mdiff.10 <- xbar1.10 - xbar2.10
se.10 <- sqrt((var1.10/n1.10)+(var2.10/n2.10))
t.10 <- mdiff.10/se.10
tdf.10<-qt(p=.025,
df,
lower.tail=FALSE
)
up.b10<- mdiff.10 + (tdf.10*se.10)
low.b10<- mdiff.10 - (tdf.10*se.10)
cat("(Lower: ",low.b10,", Upper:",up.b10,")")
## (Lower: 1.867844 , Upper: 2.132156 )
p.10<-2*pt(t.10,
df,
lower.tail = FALSE)
p.10
## [1] 2.234708e-102
myp(p = p.10,
alpha = a.10)
## [1] "REJECT Ho"