Function to Reject or Not

myp=function(p, alpha){ifelse(p<alpha, return(noquote('REJECT Ho')),return(noquote('FAIL 2 REJECT')))}

Function for Shading Normal

shadenorm = function(below=NULL, above=NULL, 
                     pcts = c(0.025,0.975), mu=0, sig=1, 
                     numpts = 500, color = "gray", dens = 40, 
                     justabove= FALSE, justbelow = FALSE, 
                     lines=FALSE,between=NULL,outside=NULL, test_stat=Inf)
  {

    if(is.null(between)){
         below = ifelse(is.null(below), qnorm(pcts[1],mu,sig), below)
         above = ifelse(is.null(above), qnorm(pcts[2],mu,sig), above)
    }
    if(is.null(outside)==FALSE){
         below = min(outside)
         above = max(outside)
    }
    lowlim = mu - 4*sig
    uplim  = mu + 4*sig
    x.grid = seq(lowlim,uplim, length= numpts)
    dens.all = dnorm(x.grid,mean=mu, sd = sig)
    if(lines==FALSE){
          plot(x.grid, dens.all, type="l", xlab="X", ylab="Density")
    }

    if(lines==TRUE){
          lines(x.grid,dens.all)
    }
    if(justabove==FALSE){
        x.below    = x.grid[x.grid<below]
        dens.below = dens.all[x.grid<below]
        polygon(c(x.below,rev(x.below)),c(rep(0,length(x.below)),rev(dens.below)),col=color,density=dens)
        lines(x=rep(test_stat, 10), seq(0,max(dens.all), length.out=10),col='red')
    }
    if(justbelow==FALSE){
        x.above    = x.grid[x.grid>above]
        dens.above = dens.all[x.grid>above]
        polygon(c(x.above,rev(x.above)),c(rep(0,length(x.above)),rev(dens.above)),col=color,density=dens)
        lines(x=rep(test_stat, 10), seq(0,max(dens.all), length.out=10),col='red')
    }
    if(is.null(between)==FALSE){
         from = min(between)
         to   = max(between)
         x.between    = x.grid[x.grid>from&x.grid<to]
         dens.between = dens.all[x.grid>from&x.grid<to]
         polygon(c(x.between,rev(x.between)),c(rep(0,length(x.between)),rev(dens.between)),col=color,density=dens)
         lines(x=rep(test_stat, 10), seq(0,max(dens.all), length.out=10),col='red')
    }
}

Function for Shading t

shadet = function(below=NULL, above=NULL, pcts = c(0.025,0.975), df=1, numpts = 500, color = "gray", dens = 40,   justabove= FALSE, justbelow = FALSE, lines=FALSE,between=NULL,outside=NULL, test_stat=Inf){

    if(is.null(between)){
         below = ifelse(is.null(below), qt(pcts[1],df), below)
         above = ifelse(is.null(above), qt(pcts[2],df), above)
    }
    if(is.null(outside)==FALSE){
         below = min(outside)
         above = max(outside)
    }
    lowlim = -5
    uplim  = 5
    x.grid = seq(lowlim,uplim, length= numpts)
    dens.all = dt(x.grid,df)
    if(lines==FALSE){
          plot(x.grid, dens.all, type="l", xlab="X", ylab="Density")
    }

    if(lines==TRUE){
          lines(x.grid,dens.all)
    }
    if(justabove==FALSE){
        x.below    = x.grid[x.grid<below]
        dens.below = dens.all[x.grid<below]
        polygon(c(x.below,rev(x.below)),c(rep(0,length(x.below)),rev(dens.below)),col=color,density=dens)
        lines(x=rep(test_stat, 10), seq(0,max(dens.all), length.out=10),col='red')
    }
    if(justbelow==FALSE){
        x.above    = x.grid[x.grid>above]
        dens.above = dens.all[x.grid>above]
        polygon(c(x.above,rev(x.above)),c(rep(0,length(x.above)),rev(dens.above)),col=color,density=dens)
        lines(x=rep(test_stat, 10), seq(0,max(dens.all), length.out=10),col='red')
    }
    if(is.null(between)==FALSE){
         from = min(between)
         to   = max(between)
         x.between    = x.grid[x.grid>from&x.grid<to]
         dens.between = dens.all[x.grid>from&x.grid<to]
         polygon(c(x.between,rev(x.between)),c(rep(0,length(x.between)),rev(dens.between)),col=color,density=dens)
         lines(x=rep(test_stat, 10), seq(0,max(dens.all), length.out=10),col='red')
    }
}

Function for Shading Chi Square

shadechi = function(below=NULL, above=NULL, pcts = c(0.025,0.975), df=1, numpts = 500, color = "gray", dens = 40,   justabove= FALSE, justbelow = FALSE, lines=FALSE,between=NULL,outside=NULL, test_stat=Inf){

    if(is.null(between)){
         below = ifelse(is.null(below), qchisq(pcts[1],df), below)
         above = ifelse(is.null(above), qchisq(pcts[2],df), above)
    }
    if(is.null(outside)==FALSE){
         below = min(outside)
         above = max(outside)
    }
    lowlim = 0
    uplim  = qchisq(.995,df)
    x.grid = seq(lowlim,uplim, length= numpts)
    dens.all = dchisq(x.grid,df)
    if(lines==FALSE){
          plot(x.grid, dens.all, type="l", xlab="X", ylab="Density")
    }

    if(lines==TRUE){
          lines(x.grid,dens.all)
    }
    if(justabove==FALSE){
        x.below    = x.grid[x.grid<below]
        dens.below = dens.all[x.grid<below]
        polygon(c(x.below,rev(x.below)),c(rep(0,length(x.below)),rev(dens.below)),col=color,density=dens)
        lines(x=rep(test_stat, 10), seq(0,max(dens.all), length.out=10),col='red')

    }
    if(justbelow==FALSE){
        x.above    = x.grid[x.grid>above]
        dens.above = dens.all[x.grid>above]
        polygon(c(x.above,rev(x.above)),c(rep(0,length(x.above)),rev(dens.above)),col=color,density=dens)
        lines(x=rep(test_stat, 10), seq(0,max(dens.all), length.out=10),col='red')

    }
    if(is.null(between)==FALSE){
         from = min(between)
         to   = max(between)
         x.between    = x.grid[x.grid>from&x.grid<to]
         dens.between = dens.all[x.grid>from&x.grid<to]
         polygon(c(x.between,rev(x.between)),c(rep(0,length(x.between)),rev(dens.between)),col=color,density=dens)
         lines(x=rep(test_stat, 10), seq(0,max(dens.all), length.out=10),col='red')

    }
}

Question 1

Using traditional methods it takes 108 hours to receive an advanced driving license. A new training technique using Computer Aided Instruction (CAI) has been proposed. A researcher believes the new technique may lengthen training time and decides to perform a hypothesis test. After performing the test on 200 students, the researcher fails to reject the null hypothesis at a 0.02 level of significance.

What is the conclusion?

There is sufficient evidence at the 0.02 level of significance that the new technique lengthens training time.

B) There is not sufficient evidence at the 0.02 level of significance that the new technique lengthens training time.

By definition, failing to reject the null means there is not sufficient evidence at the designated \(\alpha\) to reject the null hypothesis. Statement B is correct.

Question 2

A sample of 1600 computer chips revealed that 49% of the chips do not fail in the first 1000 hours of their use. The company’s promotional literature states that 51% of the chips do not fail in the first 1000 hours of their use. The quality control manager wants to test the claim that the actual percentage that do not fail is different from the stated percentage. Is there enough evidence at the 0.02 level to support the manager’s claim?

There is sufficient evidence to support the claim that the percentage of chips that do not fail is different from 51%.

B) There is not sufficient evidence to support the claim that the percentage of chips that do not fail is different from 51%.

\(H_o: \pi=0.51\)

\(H_a: \pi \ne 0.51\)

\(\alpha =0.02\)

Test Statistic: Z (normal approximation)

\(Z=\frac{p-\pi}{\sqrt{\pi (1-\pi)/n}}\)

PZ=2*pnorm((.49-.51)/(sqrt(.51*.49/1600)))
noquote(paste0('p-value: ',PZ))

## [1] p-value: 0.109527590943306

noquote(paste0(c('Interval: ',qbinom(.01,1600,.51)/1600, qbinom(.99,1600,.51)/1600)))

## [1] Interval:  0.480625   0.53875

myp(PZ,.02)

## [1] FAIL 2 REJECT

shadenorm(mu=0.51, sig=sqrt(.51*.49/1600), pcts=c(.01, .99), test_stat=.49)

Since \(Z=0.11 > \alpha =0.02\), there is not sufficient evidence. B is correct. Since \(p\) is in the interval, there is not sufficient evidence. B is correct.

Question 3

Our environment is very sensitive to the amount of ozone in the upper atmosphere. The level of ozone normally found is 4.4 parts/million (ppm). A researcher believes that the current ozone level is not at a normal level. The mean of 51 samples is 4.8 ppm with a standard deviation of 1.2. Assume the population is normally distributed. A level of significance of 0.01 will be used. State the null and alternative hypotheses.

\(H_o: \mu=4.4\)

\(H_a: \mu \ne 4.4\)

shadet(df=50, pct=c(.005,.995), test_stat=(4.8-4.4)/(4.8/sqrt(51)))

Question 4

In a school district, all sixth grade students take the same standardized test. The superintendant of the school district takes a random sample of 23 scores from all of the students who took the test. She sees that the mean score is 159 with a standard deviation of 9.6841. The superintendant wants to know if the standard deviation has changed this year. Previously, the population standard deviation was 10. Is there evidence that the standard deviation of test scores has decreased at the α = 0.025 level? Assume the population is normally distributed.

Step 1:

\(H_o: \sigma \ge 10\)

\(H_a: \sigma < 10\)

\(\alpha=.025\)

Distribution: \(\chi^2\)

##Step 2: Critical Value: \(\chi{^2_{0.025}}=10.982\)

Step 3:

Test Statistic: \(\chi{^2_{22}}= \frac {df \times s^2}{\sigma^2}=20.632\)

We evaluate whether the Test Statistic is LESS than the Critical Value.

Step 4:

Fail to Reject (see R Code)

noquote(paste0(c(22*9.6841^2/10^2," is not less than ",qchisq(0.025, df=22), " Fail to Reject")))

## [1] 20.6319944182       is not less than  10.9823207344737    Fail to Reject

myp(pchisq(22*9.6841^2/10^2, 22), .025)

## [1] FAIL 2 REJECT

Step 5:

There is not sufficient evidence to show that the standard deviation of the test scores has decreased.

shadechi(pcts = .025, df=22, numpts = 500, test_stat=22*9.6841^2/10^2)

Question 5

A highway department executive claims that the number of fatal accidents which occur in her state does not vary from month to month. The results of a study of 170 fatal accidents were recorded. Is there enough evidence to reject the highway department executive’s claim about the distribution of fatal accidents between each month?

\[ \begin{matrix} & Jan & Feb & Mar & Apr & May & Jun & Jul & Aug & Sep & Oct & Nov & Dec\\ Fatalities & 11 & 19 & 24 & 16 &11 & 7 & 7 & 17 & 9 &19 & 18 & 12 \end{matrix} \]

Step 1:

\(H_o: \pi_1=\pi_2=..\pi_{12}\) \(H_a: \pi \ne \pi_j, \exists_{i \ne j}\)

These hypotheses are equivalent to B.

\(H_o:\) Number of fatal accidents does not vary from month to month. \(H_a:\) Number of fatal accidents does vary from month to month.

Step 2.

What does the null hypothesis indicate about the proportions of fatal accidents during each month? The null hypothesis, \(H_o\), implies that A) The proportions of fatal accidents during each month are all thought to be equal.

Step 3.

See Step 1.

Steps 4 and 5.

Find the expected value for the number of fatal accidents that occurred in January. Round your answer to two decimal places.

The expected number for each month is equivalent under the uniform distribution assumption, \(\sum_i{X_i}/12=170/12=14.167\).

O=c(11,19,24,16,11,7,7, 17,9,19,18,12)
sum(O)/12

## [1] 14.16667

Step 6.

Find the value of the test statistic. Round your answer to three decimal places.

The distribution of the test statistics is \(\chi^2\) with the degrees of freedom of \(n-1\).

The test statistic is \(\sum_i{\frac {(O_i-E_i)^2}{E_i}}\), where \(O\) are the observed values and \(E\) are the expected. That value is 27.84174.

(test_stat=sum((O-rep(170/12, 12))^2/(sum(O)/12)))

## [1] 22.84706

Step 7.

Find the degrees of freedom associated with the test statistic for this problem.

The degrees of freedom are the sample size minus one (\(12-1 = 11\)) for this distributional test.

Step 8.

Find the critical value of the test at the 0.01 level of significance. Round your answer to three decimal places. NOTE: \(\chi^2\) is not a symmetric distribution. The distributional test is always conducted in the upper tail to look for extremes.

shadechi(df=11, pcts = c(0,.99), numpts = 1000, test_stat=test_stat)

qchisq(.99, 11)

## [1] 24.72497

Step 9.

Make the decision to reject or fail to reject the null hypothesis at the 0.01 level of significance.

A) Fail to Reject Null Hypothesis

Reject Null Hypothesis

Since the test statistic is less than the critical value, we fail to reject the null, option B.

Step 10.

State the conclusion of the hypothesis test at the 0.01 level of significance.

A) There is not enough evidence to reject the claim that the number of fatal accidents does not vary from month to month. This must be since we failed to reject the null.

There is enough evidence to reject the claim that the number of fatal accidents does not vary from month to month.

Question 6

A manufacturer claims that the calling range (in feet) of its 900-MHz cordless telephone is greater than that of its leading competitor. A sample of 19 phones from the manufacturer had a mean range of 1060 feet with a standard deviation of 41 feet. A sample of 13 similar phones from its competitor had a mean range of 1000 feet with a standard deviation of 24 feet. Do the results support the manufacturer’s claim? Let μ_1 be the true mean range of the manufacturer’s cordless telephone and μ_2 be the true mean range of the competitor’s cordless telephone. Use a significance level of α = 0.01 for the test. Assume that the population variances are equal and that the two populations are normally distributed.

Step 1.

State the null and alternative hypotheses for the test.

\(H_o: \mu_1-\mu_2 \le 0\)

\(H_a: \mu_1-\mu_2 > 0\)

\(\alpha = 0.01\)

Test Distribution: equal variance Student’s t-distribution (sample sd and small samples, assumed normal)

Test Statistic:

\(\frac {\bar x -\mu}{\sqrt{s{^2_p}(\frac{1}{n1}+\frac{1}{n2})}}\)

\(s{^2_p}=\frac{df_1 \times s{^2_1}+df_2 \times s{^2_2}}{n_1+n_2-2}\)

\(df=n_1-n_2-2\)

Step 2.

Compute the value of the t test statistic. Round your answer to three decimal places.

n1=19; n2=13;df1=n1-1; df2=n2-2; df=df1+df2; mu1=1060; mu2=1000;sd1=41; sd2=24
s2p=(df1*sd1^2+df2*sd2^2)/df
(t=(mu1-mu2)/sqrt(s2p*(1/n1+1/n2)))

## [1] 4.692657

Step 3.

Determine the decision rule for rejecting the null hypothesis H_0. Round your answer to three decimal places.

Reject the null if the test statistic is greater than 2.462 (see R code below) of if the p-value calculated in the lower tail is less than 0.05.

qt(.99,df)

## [1] 2.462021

Step 4.

State the test’s conclusion.

A) Reject Null Hypothesis B) Fail to Reject Null Hypothesis

Since the test statistic (4.69) is greater than the critical value (2.46) and the test is a ‘greater than’ test, we reject the null. Additionally, we can calculate the p-value in the lower tail and determine that the p-value is less than alpha.

1-pt(4.69,df)

## [1] 3.000061e-05

shadet(pcts=c(.01,.99),between=c(2.462021,5) ,df=df)

Question 7

Given two dependent random samples with the following results:

\[ \begin{matrix} Population 1 & 29 & 47 & 29 & 40 & 36 & 17 & 37\\ Population 2 & 44 & 38 & 21 & 49 & 49 & 25 & 35\\ \end{matrix} \]

Use this data to find the 98% confidence interval for the true difference between the population means.
Let d = (Population 1 entry) - (Population 2 entry). Assume that both populations are normally distributed.

Step 1.

Find the mean of the paired differences, \(\bar{d}\). Round your answer to one decimal place.

(mymat=matrix(c(29,47,29,40,36,17,37,44,38,21,49,49,25,35), byrow=T, nrow=2))

##      [,1] [,2] [,3] [,4] [,5] [,6] [,7]
## [1,]   29   47   29   40   36   17   37
## [2,]   44   38   21   49   49   25   35

(dbar=mean(mymat[1,]-mymat[2,]))

## [1] -3.714286

Step 2.

Find the critical value that should be used in constructing the confidence interval. Round your answer to three decimal places.

Legitimately, there are two critical values, but by symmetry, we use the positive one.

(cv=round(c(qt(.01,6), qt(.99,6)),3))

## [1] -3.143  3.143

Step 3.

Find the standard deviation of the paired differences to be used in constructing the confidence interval. Round your answer to one decimal place.

(mysd=round(sd(mymat[1,]-mymat[2,]),1))

## [1] 9.9

Step 4.

Construct the 98% confidence interval. Round your answers to one decimal place.

(ci=c(round(dbar+cv[1]*mysd/sqrt(7),1), round(dbar+cv[2]*mysd/sqrt(7),0)))

## [1] -15.5   8.0

shadet(pcts=c(.01,.99),between=c(-3.143,3.143) ,df=6)

Question 8

A standardized test is given to a fifth grade class and an eighth grade class. The superintendent believes that the variance in performance from the fifth grade class is different than the variance in performance from the eighth grade class. The sample variance of a sample of 11 test scores from the fifth grade class is 26.57. The sample variance of a sample of 4 test scores from the eighth grade class is 30.78. Test the claim using a 0.05 level of significance. Let σ_1^2 represent the population variance for fifth grade class.

Step 1.

State the null and alternative hypotheses for the test.

\(H_o: \frac{\sigma{^2_1}}{\sigma{^2_2}}=1\)

\(H_a: \frac{\sigma{^2_1}}{\sigma{^2_2}} \ne 1\)

This is the same as the following

\(H_o: \sigma{^2_1}=\sigma{^2_2}\)

\(H_a: \sigma{^2_1}\ne \sigma{^2_2}\)

Step 2.

Determine the critical value(s) of the test statistic. If the test is two-tailed, separate the values with a comma. Round your answer(s) to four decimal places.

(cvs=round(c(qf(.025,10,3), qf(.975,10,3)),4))

## [1]  0.2072 14.4189

Step 3.

Compute the value of the test statistic. Round your answer to four decimal places.

(F=round(26.47/30.78, 4))

## [1] 0.86

Step 4.

Make a decision.

Reject Null Hypothesis

B) Fail to Reject Null Hypothesis

Because the test statistic lies within the interval, we fail to reject the null.

Step 5.

State the test’s conclusion. Does the evidence support the claim?

B) No

Variances are not different.

Question 9,

Consider the following table: \[ \begin{matrix} & SS & DF & MS & F\\ Among & 2209.62 & ?? & 736.54 & 1.65\\ Between & ?? & ?? & ?? & ??\\ Total & 7129.15 & 14 & ?? & ??\\ \end{matrix} \] ## Step 1.

Calculate the sum of squares of experimental error. Please round your answer to two decimal places.

\(SS_{Among}-SS_{Between}=7129.15-2209.62=4919.53\)

noquote(paste0('SS_Between: ', 7129.15-2209.62))

## [1] SS_Between: 4919.53

\[ \begin{matrix} & SS & DF & MS & F\\ Among & 2209.62 & ?? & 736.54 & 1.65\\ Between & 4919.53 & ?? & ?? & ??\\ Total & 7129.15 & 14 & ?? & ??\\ \end{matrix} \]

Step 2.

Calculate the degrees of freedom among treatments.

Easy.
Calculate the \(df_{Among}\) among treatments by dividing \(\frac{SS_{Among}}{MS_{Among}}=\frac {2209.62}{736.54}=3.\)

noquote(paste0('df_Among: ',2209.62/736.54))

## [1] df_Among: 3

\[ \begin{matrix} & SS & DF & MS & F\\ Among & 2209.62 & 3 & 736.54 & 1.65\\ Between & 4919.53 & ?? & ?? & ??\\ Total & 7129.15 & 14 & ?? & ??\\ \end{matrix} \]

Step 3.

Calculate the degrees of freedom of experimental error.

The \(df_{Between}= df_{Total}-df_{Among}=14-3=11\).

noquote(paste0('df_Between: ', 14-3))

## [1] df_Between: 11

\[ \begin{matrix} & SS & DF & MS & F\\ Among & 2209.62 & 3 & 736.54 & 1.65\\ Between & 4919.53 & 11 & ?? & ??\\ Total & 7129.15 & 14 & ?? & ??\\ \end{matrix} \] ## Step 4.

Calculate the mean square of the experimental error. Please round your answer to two decimal places.

\(MS_{Between}=SS_{Between}/df_{Between}=4919.53/11=447.23\)

noquote(paste0('MS_Between: ', 4919.53/11))

## [1] MS_Between: 447.23

\[ \begin{matrix} & SS & DF & MS & F\\ Among & 2209.62 & 3 & 736.54 & 1.65\\ Between & 4919.53 & 11 & 447.23 & ??\\ Total & 7129.15 & 14 & ?? & ??\\ \end{matrix} \]

Step 5.

What is the sum of squares of sample means about the grand mean? Please round your answer to two decimal places.

This is the \(SS_{Among}=2209.62\). \(\sum_i(\bar x_i-\bar{\bar x})^2\)

Step 6.

What is the variation of the individual measurements about their respective means? Please round your answer to two decimal places.

\(\sum_{i,j}(x_{ij}-\bar{x_i})^2\)

This is the \(SS_{Between}=4919.53\).

Step 7.

What is the critical value of F at the 0.05 level? Please round your answer to four decimal places, if necessary.

Let’s complete the table for fun, shall we? \(MS_{Total}=7129.15/14=509.23\).

7129.15/14

## [1] 509.225

\[ \begin{matrix} & SS & DF & MS & F\\ Among & 2209.62 & 3 & 736.54 & 1.65\\ Between & 4919.53 & 11 & 447.23 & \\ Total & 7129.15 & 14 & 509.23 & \\ \end{matrix} \]

noquote(paste0('Critical Value of F: ', qf(.95, 3, 11)))

## [1] Critical Value of F: 3.58743370242049

Step 8.

Is F significant at 0.05?

B) No

Because the F-statistic is less than the critical value at the specified \(\alpha\) level, the statistics is not statistically significant.

HW 5, 2023 Fall Data Analysis

lvf

Some Day

Function to Reject or Not

Function for Shading Normal

Function for Shading t

Function for Shading Chi Square

Question 1

Question 2

Question 3

Question 4

Step 1:

Step 3:

Step 4:

Step 5:

Question 5

Step 1:

Step 2.

Step 3.

Steps 4 and 5.

Step 6.

Step 7.

Step 8.

Step 9.

Step 10.

Question 6

Step 1.

Step 2.

Step 3.

Step 4.

Question 7

Step 1.

Step 2.

Step 3.

Step 4.

Question 8

Step 1.

Step 2.

Step 3.

Step 4.

Step 5.

Question 9,

Step 2.

Step 3.

Step 5.

Step 6.

Step 7.

Step 8.