#Ho (mu = 109): Mean number of hours to obtain the driving license with the CAI is equal to the mean number of hours to obtain the driving license with traditional method (109).
#Ha (mu != 109): Mean number of hours to obtain the driving license with the CAI is NOT equal to the mean number of hours to obtain the driving license with traditional method (109).
# Two sided test
# Define Variables
n <- 190 # Sample population
mean <- 109 # Population Mean
xbar <- 110 # Sample mean
sigma <- 6 # Standard deviation of population
se <- sigma/sqrt(n) # Standard deviation of sample (standard error)
alpha <- 0.05 # Level of significance
# P()
z <- (xbar - mean)/(se)
p_value <- round(2*(1 - pnorm(z)),4)
cat("The probability is",round(p_value,4), ".\n")
## The probability is 0.0216 .
if (p_value < 0.05) {# Assumed alpha 0.05
decision <- "Reject the null hypothesis"
}else{
decision <- "Fail to reject the null hypothesis"
}
cat("Test Statistic (z):", z, "\n")
## Test Statistic (z): 2.297341
cat("P-value:", p_value, "\n")
## P-value: 0.0216
cat("Decision:", decision, "\n")
## Decision: Reject the null hypothesis
# Test Statistic (z): 2.297341
# Null hypothesis: mu = 5.3 ppm
#Alternative hypothesis: mu < 5.3 ppm
#One-sided test
# Define Variables
n <- 5 # Sample population
m <- 5.3 # Population Mean
xbar <- 5.0 # Sample mean
sigma <- 1.1 # Standard deviation of population
se <- sigma/sqrt(n) # Standard deviation of sample (standard error)
alpha <- 0.05 # Level of significance
cat("Test Statistic (z):", (xbar- m) / (sigma/ sqrt(n)), "\n")
## Test Statistic (z): -0.6098367
cat("P-value:", pnorm((xbar - m) / (sigma/ sqrt(n))), "\n")
## P-value: 0.270985
cat("Decision:", ifelse(pnorm((xbar - m) / (sigma / sqrt(n))) < alpha, "Reject the null hypothesis", "Fail to reject the null hypothesis"), "\n\n")
## Decision: Fail to reject the null hypothesis
#Ho: Mean ppm of ozone is equal to 7.3 ppm.
#Ha: Mean ppm of ozone is less than 7.3 ppm.
#One-sided test
mu <- 7.3 # population mean ozone level
sample_mean <- 7.1 # sample mean
sample_variance <- 0.49 # sample variance
sample_size <- 51 # number of samples
alpha <- 0.01 # significance level
#Calculate the standard error
standard_error <- sqrt(sample_variance / sample_size)
#Calculate the t-value
t_value <- (sample_mean - mu) / standard_error
# Calculate degrees of freedom
df <- sample_size - 1
# Find critical t-values for a two-tailed test
critical_t_values <- qt(c(alpha / 2, 1 - alpha / 2), df)
cat("Test Statistic (t):", t_value, "\n")
## Test Statistic (t): -2.040408
cat("Critical T-values:", critical_t_values, "\n")
## Critical T-values: -2.677793 2.677793
myp=function(t_value, critical_t_values){
if(t_value>critical_t_values){print('Reject Null hypothesis')}
else{print('FAIL to Reject')}
}
myp(t_value,alpha)
## [1] "FAIL to Reject"
#Ho: Readers who own a laptop is equal to 36%
#Ha: Readers who own a laptop is less than 36%
# z score for one tailed test
# Initializing the variables
p_population <- 0.36 # population proportion
p_sample <- 0.29 # sample proportion
n <- 100 # sample size
alpha <- 0.02 # significance level
standard_error <- sqrt((p_population * (1 - p_population)) / n)
# Find critical z_value for a one-tailed test
cat("Test Statistic (z):",(p_sample - p_population) / standard_error, "\n")
## Test Statistic (z): -1.458333
cat(" z_value:", qnorm(alpha, lower.tail = TRUE), "\n")
## z_value: -2.053749
cat("Decision:", ifelse((p_sample - p_population) / standard_error < qnorm(alpha, lower.tail = TRUE), "Reject the null hypothesis", "Fail to reject the null hypothesis"), "\n")
## Decision: Fail to reject the null hypothesis
#5 A hospital director is told that 31% of the treated patients are uninsured. The director wants to test the claim that the percentage of uninsured patients is less than the expected percentage. A sample of 380 patients found that 95 were uninsured. Make the decision to reject or fail to reject the null hypothesis at the 0.05 level. Show all work and hypothesis testing steps.
#Ho: Uninsured patients is equal to 31%
#Ha: Readers who own a laptop is less than 31%
#
n <- 380 # Sample population
pop <- 0.31
sam <- 95/n
alpha <- 0.05 # Level of significance
se <- sqrt((pop*(1-pop))/n) #standard_error
z <- (sam - pop) / se
p <- pnorm(z, lower.tail = TRUE)
cat("Test Statistic (z):", (sam - pop) / se, "\n")
## Test Statistic (z): -2.528935
cat("The probability is",round(p,4), ".\n")
## The probability is 0.0057 .
cat("Critical Z-value:", qnorm(alpha, lower.tail = TRUE), "\n")
## Critical Z-value: -1.644854
myp=function(p, alpha){
if(p<alpha){print('Reject null hypothesis')}else{print('FAIL to Reject')}
}
myp(p,alpha)
## [1] "Reject null hypothesis"
#Ho: The standard deviation of tests is equal to 24.
#Ha: The standard deviation of tests is less than 24.
# Initializing the variables
# Initializing the variables
# Initializing the variables
historical_mean <- 112
historical_sd <- 24
sample_mean <- 102
sample_sd <- 15.4387
sample_size <- 22
sample_variance <- 15.4387^2
alpha <- 0.1
chi_square_statistic <- 8.68997
chi_square_value <- 29.61509
# Display the results
myp=function(chi_square_value, chi_square_statistic){
if(chi_square_statistic<chi_square_value){print('Reject null hypothesis')}else{print('FAIL to Reject')}
}
myp(chi_square_value,chi_square_statistic)
## [1] "Reject null hypothesis"
# Decision : reject the null hypothesis
#Ho: The pulse rate for smokers and non-smokers is not equal.
#Ha: The pulse rate for smokers and non-smokers is equal.
# Ho: Mu1-mu2=0, Ha: Mu1-Mu2<>0
mu1 <- 87
mu2 <- 84
alpha <- 0.1
# dist = t
n1 <- 32
n2 <- 31
df1 <- n1-1
df2 <- n2-1
sd1 <- 9
sd2 <- 10
var1 <- 81
var2 <- 100
num_point_estimate_diff <- (mu1 - mu2 ) # point estimate difference
den_Se <- sqrt( var1/n1 + var2/n2 ) # Se formula - Standard Error using sample standard deviations rather than population standard deviations
t <- num_point_estimate_diff / den_Se
numdf <- (var1/n1 + var2/n2)^2 # Satterthwaite
dendf <- (var1/n1)^2 / df1 + (var2/n2)^2 / df2 # Satterthwaite
df <- numdf / dendf # Satterthwaite - can be replaced with smaller of df1 or df2
pt(t, df, lower.tail = TRUE, log.p = FALSE)
## [1] 0.8919763
# distribution function for the t distribution with df degrees of freedom
p_value_robust <- 2 * ( 1 - pt(t, df = min(df1, df2))) # smaller of the numerator and denominator degree of freedom
p_value_robust # a bit different p value, but the same end decision rule !!! ## [1] 0.220848
## [1] 0.220848
myp=function(p_value_robust, alpha){
if(p_value_robust<alpha){print('Reject null hypothesis')}else{print('FAIL to Reject')}
}
myp(p_value_robust,alpha)
## [1] "FAIL to Reject"
#Ho: xbar1 - xbar2 = 0
#Ha: xbar1- xbar2 <> 0
alpha = 0.05
xbar1 = 127 # mean
xbar2 = 157 # mean
n1 = 11 # sample size
n2 = 18 # sample size
df1 = n1-1 # degrees of freedom
df2 = n2-1 # degrees of freedom
s1 = 33 # sd
s2 = 27 # sd
var1 = s1^2 # variance
var2 = s2^2 # variance
# Satterthwaite DF - can be replaced with smaller of df1 or df2
numdf = ( var1 / n1 + var2 / n2 )^2
dendf = ( var1 / n1 )^2 / df1 + (var2 / n2 )^2 / df2
df = numdf / dendf
df
## [1] 18.0759
delta = xbar1 - xbar2 # point estimate difference
delta
## [1] -30
t = qt(p = .975, df = df) # two sided hypothesis test at 5% level of significance, p = vector of probabilities
t
## [1] 2.10029
Se = sqrt( var1/n1 + var2/n2 ) # Se formula - Standard Error using sample standard deviations rather than population standard deviations
Se
## [1] 11.81101
interval = c( delta - t * Se , delta + t * Se )
interval
## [1] -54.806548 -5.193452
# 95% CI is from -54.806548 to -5.193452
route_I <- c(32, 27, 34, 24, 31, 25, 30, 23, 27, 35)
route_II <- c(28, 28, 33, 25, 26, 29, 33, 27, 25, 33)
# Calculate the differences
differences <- route_I - route_II
# Calculate sample mean and standard deviation of differences
mean_d <- mean(differences)
sd_d <- sd(differences)
# Calculate the standard error
se_d <- sd_d / sqrt(length(differences))
# Find the critical t-value for a two-tailed test
t_critical <- qt(1 - 0.01 / 2, length(differences) - 1)
# Calculate the margin of error
margin_of_error <- t_critical * se_d
# Calculate the confidence interval
confidence_interval_lower <- mean_d - margin_of_error
confidence_interval_upper <- mean_d + margin_of_error
# Display the results
cat("Sample Mean of Differences:", mean_d, "\n")
## Sample Mean of Differences: 0.1
cat("Standard Deviation of Differences:", sd_d, "\n")
## Standard Deviation of Differences: 3.212822
cat("Standard Error of Differences:", se_d, "\n")
## Standard Error of Differences: 1.015983
cat("Critical t-value:", t_critical, "\n")
## Critical t-value: 3.249836
cat("Margin of Error:", margin_of_error, "\n")
## Margin of Error: 3.301779
cat("98% Confidence Interval:", "[", confidence_interval_lower, ",", confidence_interval_upper, "]\n")
## 98% Confidence Interval: [ -3.201779 , 3.401779 ]
#11 The U.S. Census Bureau conducts annual surveys to obtain information on the percentage of the voting-age population that is registered to vote. Suppose that 391 employed persons and 510 unemployed persons are independently and randomly selected, and that 195 of the employed persons and 193 of the unemployed persons have registered to vote. Can we conclude that the percentage of employed workers (p1) who have registered to vote, exceeds the percentage of unemployed workers (p2) who have registered to vote? Use a significance level of 0.05 for the test. Show all work and hypothesis testing steps.
#Q: Can we conclude that the percentage of employed workers (p1) who have registered to vote, exceeds the percentage of unemployed workers (p2) who have registered to vote?
# Ho: pi1 - pi2 <= 0, Ha: pi1-pi2 > 0 (Percentage of employed workers p1 who have registered to vote, exceeds the percentage of unemployed workers p2 who have registered to vote)
# alpha=.05
# Z
# Initializing the variables
n1 <- 391 # sample size for employed persons
x1 <- 195 # number of employed persons who have registered to vote
n2 <- 510 # sample size for unemployed persons
x2 <- 193 # number of unemployed persons who have registered to vote
# Calculate sample proportions
p1 <- x1 / n1
p2 <- x2 / n2
# Calculate the pooled sample proportion
p <- (x1 + x2) / (n1 + n2)
# Calculate the standard error
se <- sqrt(p * (1 - p) * (1/n1 + 1/n2))
# Calculate the test statistic
z <- (p1 - p2) / se
# Find the critical z-value
critical_z <- qnorm(1 - 0.05)
# Display the critical value
cat("Critical Z-value:", critical_z, "\n")
## Critical Z-value: 1.644854
# Display the test statistic
cat("Test Statistic:", z, "\n")
## Test Statistic: 3.614018
# Make a decision
if (z > critical_z) {
cat("Reject the null hypothesis. There is evidence that the percentage of employed workers who have registered to vote exceeds the percentage of unemployed workers who have registered to vote.\n") } else {
cat("Fail to reject the null hypothesis. There is no significant evidence that the percentage of employed workers who have registered to vote exceeds the percentage of unemployed workers who have registered to vote.\n")}
## Reject the null hypothesis. There is evidence that the percentage of employed workers who have registered to vote exceeds the percentage of unemployed workers who have registered to vote.