HW5

Problem 1(a)

alpha <- 0.01
sigma <- 0.001
n <- 15
xbar <- 74.036

#level of significance(Z0.05/2)

(z.alpha0.5 <- qnorm(alpha/2,lower.tail = F))

## [1] 2.575829

#construct Confidence Interval:

lb <- xbar-z.alpha0.5 *sigma/sqrt(n)
ub <- xbar+z.alpha0.5*sigma/sqrt(n)
CI <- c(round(lb,4),round(ub,4))
CI

## [1] 74.0353 74.0367

level of significance(Z0.05/2)

alpha <- 0.05
sigma <- 0.001
n <- 15
xbar <- 74.036

# Step 1: We need to get level of significance(Z0.05) as only lb is asked for
(z.alpha0.5 <- qnorm(alpha,lower.tail = F))

## [1] 1.644854

#Step 2: Need t construct Confidence Interval:
#Lower/upper confidence limits:
lb <- xbar-z.alpha0.5 *sigma/sqrt(n)
CI <- c(round(lb,5),"inf")
CI

## [1] "74.03558" "inf"

Problem 2 (a) What is the type I error probability if the critical region is defined as ¯x < 11.5 kilograms?

# The formula for the Type-I error is as follows:
# alpha = P(Type1- error)

sigma <- 0.5
n <- 4
xbar <- 12
mu0 <- 11.5

z0 <- (xbar-mu0)/(sigma/sqrt(n))
z0

## [1] 2

(p.value <- pnorm(z0,lower.tail = F))

## [1] 0.02275013

# From the Type I error probability there is 2.275% chance of rejecting H0.

(b)The formula for the Type-II error is as follows:
alpha = P(TypeII Error)

sigma <- 0.5
n <- 4
xbar <- 12
mu0 <- 11.5
mu1 <- 11.25
# For P(xbar>11.5 when mu1 = 11.25)
z0 <- (mu0-mu1)/(sigma/sqrt(n))
z0

## [1] 1

(p.value <- pnorm(z0,lower.tail = F))

## [1] 0.1586553

#From the Type I error probability there is 15.867% chance of rejecting H0.

Problem 3 (a)

lb <-  98.5
ub <-  101.5
mu <- 100
sigma <- 2
n <- 9

z1 <- (lb-mu)/(sigma/sqrt(n))
z1

## [1] -2.25

z2 <- (ub-mu)/(sigma/sqrt(n))
z2

## [1] 2.25

# alpha = 1 − P(98.5 ≤ xbar ≤ 101.5|mu = 100) = P(Z ≤ −2.25) + P(Z > 2.25)
alpha <- pnorm(z1) + pnorm(z2, lower.tail = FALSE)
alpha

## [1] 0.02444895

lb <-  101.5
ub <-  98.5
mu <- 103
sigma <- 2
n <- 9

z1 <- (lb-mu)/(sigma/sqrt(n))
z1

## [1] -2.25

z2 <- (ub-mu)/(sigma/sqrt(n))
z2

## [1] -6.75

# beta = P(98.5 ≤ xbar ≤ 101.5|mu0 = 103) = P(−6.75 ≤ Z ≤ −2.25) = P(Z ≤ −2.25) − P(Z ≤ −6.75)
beta <- pnorm(z1) - pnorm(z2)
beta

## [1] 0.01222447

lb <-  101.5
ub <-  98.5
mu <- 105
sigma <- 2
n <- 9

z1 <- (lb-mu)/(sigma/sqrt(n))
z1

## [1] -5.25

z2 <- (ub-mu)/(sigma/sqrt(n))
z2

## [1] -9.75

# beta = P(98.5 ≤ xbar ≤ 101.5|mu0 = 105) = P(−6.75 ≤ Z ≤ −2.25) = P(Z ≤ −2.25) − P(Z ≤ −6.75)
beta <- pnorm(z1) - pnorm(z2)
beta

## [1] 7.604961e-08

# Remark: 
# The value of beta is decreasing as the sample mean increases (till 105). Hence it is easier to detect the difference in means within sample and population.

Problem 4 (a)

# To support whether the compression strength is normally distributed we can use the normal- Q-Q(quantile-quantile) plot.

specimen <- c(2216, 2237, 2225, 2301, 2318, 2255,
              2249, 2204, 2281, 2263, 2275, 2295)
specimen

##  [1] 2216 2237 2225 2301 2318 2255 2249 2204 2281 2263 2275 2295

#qq-plot of the specimen

qqnorm(specimen)

qqline(specimen)

#This implies that compression strength is normally distributed.

Test the normality of the data by Shapiro-Wilk test?

# Checking normality of the data
concrete_sample_test <- shapiro.test(specimen)
concrete_sample_test

## 
##  Shapiro-Wilk normality test
## 
## data:  specimen
## W = 0.97909, p-value = 0.9798

#Since p value> alpha=0.05, we accept that data provided is normally distributed,

Construct a 95% two-sided confidence interval on the mean strength.

xbar <- mean(specimen)
n <- length(specimen)
s <- sd(specimen)
alpha <- 0.05

t.alpha0.5 <- qt(alpha/2,n-1,lower.tail = F)
t.alpha0.5

## [1] 2.200985

lb <- xbar-t.alpha0.5*s/sqrt(n)
ub <- xbar+t.alpha0.5*s/sqrt(n)

CI <- c(lb,ub)
CI

## [1] 2237.317 2282.516

Construct a 95% lower-confidence bound on the mean strength.

t.alpha0.5 <- qt(alpha,n-1,lower.tail = F)
t.alpha0.5

## [1] 1.795885

lb <- xbar-t.alpha0.5*s/sqrt(n)
CI <- c(lb,"inf")
CI

## [1] "2241.47657706579" "inf"

Problem 5(a)

alpha <- 0.05
sigma <- 0.9
n <- 15
xbar <- 2.78
mu0 <- 3

z0 <- (xbar-mu0)/(sigma/sqrt(n))
z0

## [1] -0.9467293

(z.alpha0.5 <- qnorm(alpha/2,lower.tail = F))

## [1] 1.959964

# Since abs(z0)<z.alpha0.5 we fail to reject H0.

What is the power of this test if mu = 3.25?

library(pwr)
mu_1 <- 3.25
pwr.test <- pwr.norm.test(d=(mu0-mu_1)/sigma,n=15,sig.level=0.05)
pwr.test

## 
##      Mean power calculation for normal distribution with known variance 
## 
##               d = 0.2777778
##               n = 15
##       sig.level = 0.05
##           power = 0.1895111
##     alternative = two.sided

What sample size would be required to detect a true mean of 3.75 if we wanted the power to be at least 0.9?

mu_2 <- 3.75
pwr.test <- pwr.norm.test(d=(mu0-mu_2)/sigma,sig.level=0.05,power=0.9)
pwr.test

## 
##      Mean power calculation for normal distribution with known variance 
## 
##               d = 0.8333333
##               n = 15.13068
##       sig.level = 0.05
##           power = 0.9
##     alternative = two.sided

# Hence n = 15.13068 or approx 16
# n=16 for a power of 0.9

Explain how the question in part (a) could be answered by using p-value and confidence interval.

alpha <- 0.05
sigma <- 0.9
n <- 15
xbar <- 2.78
mu0 <- 3

p.value <- 2*pnorm(abs(mu0),lower.tail= F)

lb <- xbar-z.alpha0.5*sigma/sqrt(n)
ub <- xbar+z.alpha0.5*sigma/sqrt(n)
CI <- c(lb,ub)
CI

## [1] 2.324546 3.235454

# As mu0 belongs to the range c(lb,ub) of CI, we cannot reject H0

Problem 6(a) H0: mu0 lesser than or equal to 40 H1: mu0 greater 40

# Critical region for Type 1 error(giving speeding ticket when driver is not speeding)
threshold <- qnorm(0.04, 40, 5/sqrt(3), lower.tail = F)
threshold

## [1] 45.0538

# Threshold should be set at 45.0538 to have only 4% error

mu <- 45
mu0 <- 40
sigma <- 5  
(d=(mu-mu0)/sigma)

## [1] 1

pwr=pnorm(threshold, mu, sigma/sqrt(3), lower.tail = F)
pwr

## [1] 0.492566

pwr.norm.test(d=d,sig.level = 0.04,n=3,alternative="greater")

## 
##      Mean power calculation for normal distribution with known variance 
## 
##               d = 1
##               n = 3
##       sig.level = 0.04
##           power = 0.492566
##     alternative = greater

# As can be seen from above, both the methods give same output

pwr.norm.test(d=d,sig.level = 0.04,power=0.9,alternative="greater")

## 
##      Mean power calculation for normal distribution with known variance 
## 
##               d = 1
##               n = 9.194466
##       sig.level = 0.04
##           power = 0.9
##     alternative = greater

# From above, N=10

Problem 7 (a) Use alpha = 0.01.

library(readxl)
rainfall<-read_excel("/Users/seventails/Downloads/Stat/Week 7/HW5_data.xlsx",sheet = "Problem 7")
rainfall1 <- rainfall$Rainfall
rainfall1

##  [1] 18.0 30.7 19.8 27.1 22.3 18.8 31.8 23.4 21.2 27.9 31.9 27.1 25.0 24.7 26.9
## [16] 21.8 29.2 34.8 26.7 31.6

xbar <- mean(rainfall1)
n <- length(rainfall1)
s <- sd(rainfall1)
alpha <- 0.01
mu0 <- 25
 
t0=(xbar-mu0)/(s/sqrt(n))
t.alpha=qt(alpha,n-1,lower.tail = F)
print(paste("t0=",round(t0,4),", t.alpha=",round(t.alpha,4)))

## [1] "t0= 0.9674 , t.alpha= 2.5395"

# Since t0 = 0.9674 < t.t.alpha = 2.5395 we can’t reject H0.Hence we will accept the mean rainfall

par(mfrow=c(2,1))
hist(rainfall1)
qqnorm(rainfall1)
qqline(rainfall1)

# to check normalcy in data 
shapiro.test(rainfall1)

## 
##  Shapiro-Wilk normality test
## 
## data:  rainfall1
## W = 0.97025, p-value = 0.7601

# We can reject the null hypothesis and say that data is normally distributed

xbar <- mean(rainfall1)
n <- length(rainfall1)
s <- sd(rainfall1)
alpha <- 0.01
mu0 <- 25
mu1 <- 27

pwr.t.test(d=(mu1-mu0)/s, n = length(rainfall1), sig.level = 0.01,alternative = "greater", type = "one.sample")

## 
##      One-sample t test power calculation 
## 
##               n = 20
##               d = 0.4179934
##       sig.level = 0.01
##           power = 0.2780838
##     alternative = greater

xbar <- mean(rainfall1)
n <- length(rainfall1)
s <- sd(rainfall1)
alpha <- 0.01
mu0 <- 25
mu1 <- 27.5

pwr.t.test(d=(mu1-mu0)/s, sig.level = 0.01,power=0.9,alternative = "greater", type = "one.sample")

## 
##      One-sample t test power calculation 
## 
##               n = 50.43191
##               d = 0.5224917
##       sig.level = 0.01
##           power = 0.9
##     alternative = greater

95% CI for part(a)

lb=round(xbar-t.alpha*s/sqrt(n),3)
print(paste(1-alpha,"% lower CI for mu is [",lb,", inf)"))

## [1] "0.99 % lower CI for mu is [ 23.318 , inf)"

p.value=pt(t0,n-1,lower.tail = F)
p.value

## [1] 0.1727551

# Since P-value > α(0.01), we can’t reject H0

Problem 8 (a)

xbar <- 11
mu0 <- 10
n <- 16
s <- 2
alpha <- 0.05

t0=(xbar-mu0)/(s/sqrt(n))
t0

## [1] 2

t.alpha0.5 <- qt(alpha/2, n-1, lower.tail = F)
t.alpha0.5

## [1] 2.13145

# As abs(t0)<t.alpha0.5 , we can't reject H0

# This is a one-sided t-test

t0 <- qt(alpha, n-1, lower.tail = F)
t0

## [1] 1.75305

# We reject H0

Problem 9 (a)

n <- 10
s <- 4.8
sigma2 <- 18
alpha=0.05

# Hypothesis
# H0: Variance =18
# H1:Variance !=18
# The test-statistic we are going to use is Chi-Square
(chisq0 <- (n-1)*s^2/sigma2)

## [1] 11.52

#Getting upper and lower boundaries to accpet or reject H0 using chisq0
chisqLB=qchisq(alpha/2,n-1)
chisqUB=qchisq(alpha/2,n-1,lower.tail = F)
print(paste("chisq0=",chisq0,", chisqLB=",
            round(chisqLB,4),", chisqUB",round(chisqUB,4)))

## [1] "chisq0= 11.52 , chisqLB= 2.7004 , chisqUB 19.0228"

# Since chisq0 leis b/w chisqLB & chisqUB, we can't reject H0 hypothesis

p.value <- 2*min(c((1 - pchisq(11.52, 10-1)), pchisq(11.52, 10-1)))
p.value

## [1] 0.4834818

lb=round((n-1)*s^2/chisqUB,3)
ub=round((n-1)*s^2/chisqLB,3)
print(paste(1-alpha,"% CI for variance is [",lb,",",ub,"]"))

## [1] "0.95 % CI for variance is [ 10.901 , 76.789 ]"

lb=round(sqrt((n-1)*s^2/chisqUB),3)
ub=round(sqrt((n-1)*s^2/chisqLB),3)
print(paste(1-alpha,"% CI for sigma is [",lb,",",ub,"]"))

## [1] "0.95 % CI for sigma is [ 3.302 , 8.763 ]"

alpha=0.1
lb=round((n-1)*s^2/qchisq(alpha,n-1,lower.tail = F),3)
print(paste(1-alpha,"% lower CI for variance is [",lb,", + inf)"))

## [1] "0.9 % lower CI for variance is [ 14.122 , + inf)"

lb=round(sqrt((n-1)*s^2/qchisq(alpha,n-1,lower.tail = F)),3)
print(paste(1-alpha,"% lower CI for sigma is [",lb,", + inf)"))

## [1] "0.9 % lower CI for sigma is [ 3.758 , + inf)"

Because the 95% Confidence interval does include sigma2=18, we can’t reject H0.

lambda=sqrt(40/18);alpha=0.05;
beta<-function(n){
  p<-pchisq(qchisq(alpha/2,n-1,lower.tail = F)/lambda^2,n-1)-
    pchisq(qchisq(1-alpha/2,n-1,lower.tail = F)/lambda^2,n-1)
  return(p)
}
beta(24)

## [1] 0.1973324

beta(25)

## [1] 0.1832983

curve(beta, from=10, to=50,n=41, xlab="n",ylab="beta(n)")
abline(h=0.1,col="red",lty=2)
abline(v=33,col="blue",lty=2)

HW5

Yashashri Kulkarni

10/31/2021