Function to Reject or Not

myp=function(p, alpha){ifelse(p<alpha, return(noquote('REJECT Ho')),return(noquote('FAIL 2 REJECT')))}

Function for Shading Normal

shadenorm = function(below=NULL, above=NULL, 
                     pcts = c(0.025,0.975), mu=0, sig=1, 
                     numpts = 500, color = "gray", dens = 40, 
                     justabove= FALSE, justbelow = FALSE, 
                     lines=FALSE,between=NULL,outside=NULL)
  {

    if(is.null(between)){
         below = ifelse(is.null(below), qnorm(pcts[1],mu,sig), below)
         above = ifelse(is.null(above), qnorm(pcts[2],mu,sig), above)
    }
    if(is.null(outside)==FALSE){
         below = min(outside)
         above = max(outside)
    }
    lowlim = mu - 4*sig
    uplim  = mu + 4*sig
    x.grid = seq(lowlim,uplim, length= numpts)
    dens.all = dnorm(x.grid,mean=mu, sd = sig)
    if(lines==FALSE){
          plot(x.grid, dens.all, type="l", xlab="X", ylab="Density")
    }

    if(lines==TRUE){
          lines(x.grid,dens.all)
    }
    if(justabove==FALSE){
        x.below    = x.grid[x.grid<below]
        dens.below = dens.all[x.grid<below]
        polygon(c(x.below,rev(x.below)),c(rep(0,length(x.below)),rev(dens.below)),col=color,density=dens)
    }
    if(justbelow==FALSE){
        x.above    = x.grid[x.grid>above]
        dens.above = dens.all[x.grid>above]
        polygon(c(x.above,rev(x.above)),c(rep(0,length(x.above)),rev(dens.above)),col=color,density=dens)
    }
    if(is.null(between)==FALSE){
         from = min(between)
         to   = max(between)
         x.between    = x.grid[x.grid>from&x.grid<to]
         dens.between = dens.all[x.grid>from&x.grid<to]
         polygon(c(x.between,rev(x.between)),c(rep(0,length(x.between)),rev(dens.between)),col=color,density=dens)
    }
}

Function for Shading t

shadet = function(below=NULL, above=NULL, pcts = c(0.025,0.975), df=1, numpts = 500, color = "gray", dens = 40,   justabove= FALSE, justbelow = FALSE, lines=FALSE,between=NULL,outside=NULL){

    if(is.null(between)){
         below = ifelse(is.null(below), qt(pcts[1],df), below)
         above = ifelse(is.null(above), qt(pcts[2],df), above)
    }
    if(is.null(outside)==FALSE){
         below = min(outside)
         above = max(outside)
    }
    lowlim = -4
    uplim  = 4
    x.grid = seq(lowlim,uplim, length= numpts)
    dens.all = dt(x.grid,df)
    if(lines==FALSE){
          plot(x.grid, dens.all, type="l", xlab="X", ylab="Density")
    }

    if(lines==TRUE){
          lines(x.grid,dens.all)
    }
    if(justabove==FALSE){
        x.below    = x.grid[x.grid<below]
        dens.below = dens.all[x.grid<below]
        polygon(c(x.below,rev(x.below)),c(rep(0,length(x.below)),rev(dens.below)),col=color,density=dens)
    }
    if(justbelow==FALSE){
        x.above    = x.grid[x.grid>above]
        dens.above = dens.all[x.grid>above]
        polygon(c(x.above,rev(x.above)),c(rep(0,length(x.above)),rev(dens.above)),col=color,density=dens)
    }
    if(is.null(between)==FALSE){
         from = min(between)
         to   = max(between)
         x.between    = x.grid[x.grid>from&x.grid<to]
         dens.between = dens.all[x.grid>from&x.grid<to]
         polygon(c(x.between,rev(x.between)),c(rep(0,length(x.between)),rev(dens.between)),col=color,density=dens)
    }
}

Function for Shading Chi Square

shadechi = function(below=NULL, above=NULL, pcts = c(0.025,0.975), df=1, numpts = 500, color = "gray", dens = 40,   justabove= FALSE, justbelow = FALSE, lines=FALSE,between=NULL,outside=NULL){

    if(is.null(between)){
         below = ifelse(is.null(below), qchisq(pcts[1],df), below)
         above = ifelse(is.null(above), qchisq(pcts[2],df), above)
    }
    if(is.null(outside)==FALSE){
         below = min(outside)
         above = max(outside)
    }
    lowlim = 0
    uplim  = qchisq(.99,df)
    x.grid = seq(lowlim,uplim, length= numpts)
    dens.all = dchisq(x.grid,df)
    if(lines==FALSE){
          plot(x.grid, dens.all, type="l", xlab="X", ylab="Density")
    }

    if(lines==TRUE){
          lines(x.grid,dens.all)
    }
    if(justabove==FALSE){
        x.below    = x.grid[x.grid<below]
        dens.below = dens.all[x.grid<below]
        polygon(c(x.below,rev(x.below)),c(rep(0,length(x.below)),rev(dens.below)),col=color,density=dens)
    }
    if(justbelow==FALSE){
        x.above    = x.grid[x.grid>above]
        dens.above = dens.all[x.grid>above]
        polygon(c(x.above,rev(x.above)),c(rep(0,length(x.above)),rev(dens.above)),col=color,density=dens)
    }
    if(is.null(between)==FALSE){
         from = min(between)
         to   = max(between)
         x.between    = x.grid[x.grid>from&x.grid<to]
         dens.between = dens.all[x.grid>from&x.grid<to]
         polygon(c(x.between,rev(x.between)),c(rep(0,length(x.between)),rev(dens.between)),col=color,density=dens)
    }
}

Question 1-Reject, 2T, Z, 0.021

Using traditional methods, it takes 109 hours to receive a basic driving license. A new license training method using Computer Aided Instruction (CAI) has been proposed. A researcher used the technique with 190 students and observed that they had a mean of 110 hours. Assume the standard deviation is known to be 6. A level of significance of 0.05 will be used to determine if the technique performs differently than the traditional method. Make a decision to reject or fail to reject the null hypothesis. Show all work in R.

#Ho:  Mu=109, Ha:  Mu<>109
alpha = .05
#distribution:  Z (known SD)
Z=(110-109)/(6/sqrt(190))
shadenorm(mu=109, sig=6/sqrt(190))
lines(x=rep(110,10), y=seq(0,1,length.out=10), col='red')

(answer=2*(1-pnorm(Z)))

## [1] 0.0215993

temp=rnorm(100000,110,6/sqrt(190))
2*length(temp[temp<=109])/length(temp)

## [1] 0.02288

myp(answer,alpha)

## [1] REJECT Ho

Question 2-FTR, 1T, t, 0.287

Our environment is very sensitive to the amount of ozone in the upper atmosphere. The level of ozone normally found is 5.3 parts/million (ppm). A researcher believes that the current ozone level is at an insufficient level. The mean of 5 samples is 5.0 ppm with a standard deviation of 1.1. Does the data support the claim at the 0.05 level? Assume the population distribution is approximately normal.

#Ho:  Mu>=5.3, Ha:  Mu<5.3
Mu=5.3
alpha=.05
#distribution: t (SD not known)
n=5
df=n-1
sd=1.1
Se=sd/sqrt(n)
t=(5-5.3)/Se
shadet(df=n-1, pcts=.05)
lines(x=rep(t,10), y=seq(0,1,length.out=10), col='red')

(ans=pt(t,df))

## [1] 0.2874568

temp=rt(100000,df)*Se+Mu
length(temp[temp<=5])/length(temp)

## [1] 0.2887

myp(ans,alpha)

## [1] FAIL 2 REJECT

Question 3-FTR, 2T, t, 0.047

Our environment is very sensitive to the amount of ozone in the upper atmosphere. The level of ozone normally found is 7.3 parts/million (ppm). A researcher believes that the current ozone level is not at a normal level. The mean of 51 samples is 7.1 ppm with a variance of 0.49. Assume the population is normally distributed. A level of significance of 0.01 will be used. Show all work and hypothesis testing steps.

#Ho:  Mu=7.3, Ha:  Mu<>7.3
Mu=7.3
alpha = .01
#distribution=t
n=51
df=n-1
sd=sqrt(.49)
Se=sd/sqrt(n)
t=(7.1-7.3)/Se
shadet(df=n-1, pcts=c(.005,.995))
lines(x=rep(t,10), y=seq(0,1,length.out=10), col='red')

2*pt(t,df)

## [1] 0.04660827

temp=rt(100000,df)*Se+Mu
2*(length(temp[temp<=7.1])/length(temp))

## [1] 0.04692

myp(ans,alpha)

## [1] FAIL 2 REJECT

Question 4-FTR, 1T, Z, 0.072

A publisher reports that 36% of their readers own a laptop. A marketing executive wants to test the claim that the percentage is actually less than the reported percentage. A random sample of 100 found that 29% of the readers owned a laptop. Is there sufficient evidence at the 0.02 level to support the executive’s claim? Show all work and hypothesis testing steps.

#Ho:  pi>=.36, Ha: pi<.36
p=.36
q=1-p
alpha=.02
n=100
#distribution: Z
Se=sqrt(p*q/n)
Z=(.29-.36)/Se
shadenorm(mu=.36, sig=Se, pcts=c(.02))
lines(x=rep(.29,10), y=seq(0,20,length.out=10), col='red')

(ans=pnorm(Z))

## [1] 0.07237434

temp=rnorm(100000, p, Se)
length(temp[temp<.29])/length(temp)

## [1] 0.0737

myp(ans,alpha)

## [1] FAIL 2 REJECT

Question 5-Reject, 1T, Z for Prop, 0.006

A hospital director is told that 31% of the treated patients are uninsured. The director wants to test the claim that the percentage of uninsured patients is less than the expected percentage. A sample of 380 patients found that 95 were uninsured. Make the decision to reject or fail to reject the null hypothesis at the 0.05 level. Show all work and hypothesis testing steps.

#Ho:  pi>=.31, Ha: pi<.31
p=.31
q=1-p
n=380
x=95
phat=x/n
alpha=.05
Se=sqrt(p*q/n)
shadenorm(mu=.31, sig=Se, pcts=c(.05))
lines(x=rep(phat,10), y=seq(0,20,length.out=10), col='red')

Z=(phat-p)/Se
(ans=pnorm(Z))

## [1] 0.005720462

temp=rnorm(1000000, p, Se)
length(temp[temp<=x/n])/length(temp)

## [1] 0.005569

myp(ans,alpha)

## [1] REJECT Ho

#Reject. p<alpha

Question 6

Omitted

Question 7-Reject, 1T, Chi, 0.009

A standardized test is given to a sixth-grade class. Historically the mean score has been 112 with a standard deviation of 24. The superintendent believes that the standard deviation of performance may have recently decreased. She randomly sampled 22 students and found a mean of 102 with a standard deviation of 15.4387. Is there evidence that the standard deviation has decreased at the = 0 .1 level? Show all work and hypothesis testing steps.

#Ho:  sigma>=24, Ha: sigma<=24
sigma=24
sigma_2=sigma^2
alpha = .1
n=22
s=15.4387
s_2=s^2

#Chi
df=n-1
chi=df*s_2/sigma_2
shadechi(df=n-1, pcts=c(.1))
lines(rep(chi,10), seq(0,20,length.out=10),col='red')

(ans=pchisq(chi,df,lower.tail=T))

## [1] 0.008549436

#Reject Null, p<alpha
temp=rchisq(100000,df)
length(temp[temp<=chi])/length(temp)

## [1] 0.00856

myp(ans,alpha)

## [1] REJECT Ho

For 8 and 9, I use the Satterthwaite approximation. Alternatively, you may select the smaller of the two degrees of freedom.

Question 8-FTR, 2T, t, 0.216

A medical researcher wants to compare the pulse rates of smokers and non-smokers. He believes that the pulse rate for smokers and non-smokers is different and wants to test this claim at the 0.1 level of significance. The researcher checks 32 smokers andfinds that they have a mean pulse rate of 87, and 31 non-smokers have a mean pulse rate of 84. The standard deviation of the pulse rates is found to be 9 for smokers and 10 for non-smokers. Let \(\mu_1\) be the true mean pulse rate for smokers and \(\mu_2\) be the true mean pulse rate for non-smokers. Show all work and hypothesis testing steps.

#Ho: Mu1-mu2=0, Ha:  Mu1-Mu2<>0
alpha=.1
#dist=t
n1=32
n2=31
df1=n1-1
df2=n2-1
sd1=9
sd2=10
var1=sd1^2
var2=sd2^2

num=(87-84)
den=sqrt(var1/n1+var2/n2)
t=num/den
numdf=(var1/n1+var2/n2)^2
dendf=(var1/n1)^2/df1+(var2/n2)^2/df2
df=numdf/dendf
shadet(df=df,pcts=c(.05,.95))
lines(rep(t,10), seq(0,1,length.out=10),col='red')

(ans=2*(1-pt(t, numdf/dendf)))

## [1] 0.2160473

myp(ans,alpha)

## [1] FAIL 2 REJECT

Question 9, 2T, t, -37.6, -22.7

Given two independent random samples with the following results: n1=11, xbar1=127, s1=33, n2=18, xbar2=157, s2==27

Use this data to find the 95% confidence interval for the true difference between the population means. Assume that the population variances are not equal and that the two populations are normally distributed.

xbar1=127
xbar2=157
n1=11
n2=18
df1=n1-1
df2=n2-1
s1=33
s2=27
v1=s1^2
v2=s2^2
numdf=(var1/n1+var2/n2)^2
dendf=(var1/n1)^2/df1+(var2/n2)^2/df2
df=numdf/dendf

delta=xbar1-xbar2
t=qt(.975,df)
Sp=sqrt((df1*var1+df2*var2)/(df1+df2))
adj=sqrt(1/n1+1/n2)
shadet(df=df, pcts=c(.025,.975))
(interval=c(delta-t*Sp*adj, delta+t*Sp*adj))

## [1] -37.63216 -22.36784

text(x=-3, y=.05, round(interval[1],3))
text(x=3, y=.05, round(interval[2],3))

This is a paired t-test!

Question 10, 2T, paired t (one-sample), -2.767, 2.967

Two men, A and B, who usually commute to work together decide to conduct an experiment to see whether one route is faster than the other. The men feel that their driving habits are approximately the same, so each morning for two weeks one driver is assigned to route I and the other to route II. The times, recorded to the nearest minute, are shown in the following table. Using this data, find the 98% confidence interval for the true mean difference between the average travel time for route I and the average travel time for route II. Let d1 (route I travel time) − (route II travel time). Assume that the populations of travel times are normally distributed for both routes. Show all work and hypothesis testing steps.

r1=c(32,27, 34,24,31,25,30,23, 27,35)
r2=c(28,28,33,25,26,29, 33,27,25, 33)
delta=r1-r2

#xbar+/-t*Se
xbar=mean(delta)
n=10
df=n-1
t=qt(.99,df)
Se=sd(delta)/sqrt(length(delta))
shadet(df=df, pcts=c(.01,.99))

(interval=c(xbar-t*Se, xbar+t*Se))

## [1] -2.766534  2.966534

text(x=-3, y=.05, round(interval[1],3))
text(x=3, y=.05, round(interval[2],3))

Question 11, 2T, Z, Reject, 0.00015

The U.S. Census Bureau conducts annual surveys to obtain information on the percentage of the voting-age population that is registered to vote. Suppose that 391 employed persons and 510 unemployed persons are independently and randomly selected, and that 195 of the employed persons and 193 of the unemployed persons have registered to vote. Can we conclude that the percentage of employed workers p1 who have registered to vote, exceeds the percentage of unemployed workers p2 who have registered to vote? Use a significance level of 0.05 for the test. Show all work and hypothesis testing steps.

#Ho:  pi1-pi2<=0, Ha:  pi1-pi2>0
#alpha=.05
#Z

n1=391
n2=510
x1=195
x2=193
p1=x1/n1
p2=x2/n2
pbar=(x1+x2)/(n1+n2)
qbar=1-pbar
correction=1/n1+1/n2
Se=sqrt(pbar*qbar*correction)
Z=(p1-p2)/Se
shadenorm(mu=0,sig=Se)
lines(rep(p1-p2,10), seq(0,20,length.out=10),col='red')

1-pnorm(Z)

## [1] 0.000150744

#Reject, p<alphta

Homework 5, Data Analysis