X <- c(1, 0, 2, 0, 3, 1, 0, 1, 2, 0)
Y <- c(16, 9, 17, 12, 22, 13, 8, 15, 19, 11)
data <- data.frame(Transfers = X, Broken = Y)
#Problem 1
# (a)
model <- lm(Broken ~ Transfers, data = data)
summary_model <- summary(model)
summary_model
##
## Call:
## lm(formula = Broken ~ Transfers, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2 -1.2 0.3 0.8 1.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.2000 0.6633 15.377 3.18e-07 ***
## Transfers 4.0000 0.4690 8.528 2.75e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.483 on 8 degrees of freedom
## Multiple R-squared: 0.9009, Adjusted R-squared: 0.8885
## F-statistic: 72.73 on 1 and 8 DF, p-value: 2.749e-05
coefficients <- coef(model)
coefficients
## (Intercept) Transfers
## 10.2 4.0
#Estimated function: Y_hat= 10.2+4* X_hat
#Both R-squared and adjusted R-squared values are also pretty high
# (b)
Y_hat_1 <- predict(model, newdata = data.frame(Transfers = 1))
round(Y_hat_1, 4)
## 1
## 14.2
#Expected broken ampules when X=1:14.2
# (c) Increase from 1 to 2 transfers
increase <- coefficients[2] * (2 - 1)
increase
## Transfers
## 4
# the increase in the expected number of ampules broken when there are
# 2 transfers as compared to 1 transfer:4
# (d)
X_bar <- mean(X)
Y_bar <- mean(Y)
Y_hat_bar <- coefficients[1] + coefficients[2] * X_bar
X_bar
## [1] 1
Y_bar
## [1] 14.2
Y_hat_bar
## (Intercept)
## 14.2
cat("Line passes through (X̄, Ȳ):", abs(Y_bar - Y_hat_bar) < 1e-10, "\n")
## Line passes through (X̄, Ȳ): TRUE
#Verified that the line passes through X bar, Y bar
# Plot
plot(X, Y, xlab = "Number of Transfers", ylab = "Broken Ampules",
main = "Broken Ampules vs Transfers", pch = 19, col = "blue")
abline(model, col = "red", lwd = 2)
#problem 2
# (a)95% CI for beta1
ci_beta1 <- confint(model, "Transfers", level = 0.95)
ci_beta1
## 2.5 % 97.5 %
## Transfers 2.918388 5.081612
# Interpretation: We are 95% confident that each additional transfer increases
# broken ampules by between 2.918 and 5.082 units
# (b)Two-sided test
t_stat <- summary_model$coefficients["Transfers", "t value"]
p_val_two <- summary_model$coefficients["Transfers", "Pr(>|t|)"]
df <- summary_model$df[2]
critical_t <- qt(0.975, df)
cat("Conclusion:", ifelse(p_val_two < 0.05, "Reject Null - Significant linear association",
"Fail to reject Null - No significant association"), "\n\n")
## Conclusion: Reject Null - Significant linear association
# (c) One-sided test for positive association
p_val_one <- p_val_two / 2
critical_t_one <- qt(0.95, df)
cat("Conclusion:", ifelse(p_val_one < 0.05, "Reject Null - Significant positive association",
"Fail to reject Null - No significant positive association"), "\n\n")
## Conclusion: Reject Null - Significant positive association
# (d) 95% CI for beta0
ci_beta0 <- confint(model, "(Intercept)", level = 0.95)
ci_beta0
## 2.5 % 97.5 %
## (Intercept) 8.67037 11.72963
#We are 95% confident that the mean broken ampules with
# no transfers is between 8.670 and 11.727
# (e) Test beta0 ≤ 9.0
beta0_hat <- coefficients[1]
se_beta0 <- summary_model$coefficients["(Intercept)", "Std. Error"]
t_stat_beta0 <- (beta0_hat - 9.0) / se_beta0
p_val_beta0 <- pt(t_stat_beta0, df, lower.tail = FALSE)
critical_t_beta0 <- qt(0.975, df)
p_val_beta0
## (Intercept)
## 0.05402227
cat("Conclusion:", ifelse(p_val_beta0 < 0.025, "Reject Null - Mean > 9.0",
"Fail to reject Null - Mean ≤ 9.0"), "\n")
## Conclusion: Fail to reject Null - Mean ≤ 9.0
#Here, p value is not less than 0.025, we couldn't reject the null hypothesis
#Problem3
# (a) 99% Confidence intervals for mean response
X_new <- data.frame(Transfers = c(2, 4))
conf_intervals <- predict(model, newdata = X_new,
interval = "confidence", level = 0.99)
cat("(a) 99% CONFIDENCE INTERVALS FOR MEAN BREAKAGE:\n")
## (a) 99% CONFIDENCE INTERVALS FOR MEAN BREAKAGE:
for(i in 1:nrow(X_new)) {
cat("X =", X_new$Transfers[i], "transfers:\n")
cat(" Predicted mean =", round(conf_intervals[i, "fit"], 4), "\n")
cat(" 99% CI: (", round(conf_intervals[i, "lwr"], 4), ",",
round(conf_intervals[i, "upr"], 4), ")\n")
cat(" Interpretation: We are 99% confident that the true mean number of\n")
cat(" broken ampules for all shipments with", X_new$Transfers[i],
"transfers is between", round(conf_intervals[i, "lwr"], 3),
"and", round(conf_intervals[i, "upr"], 3), "ampules.\n\n")
}
## X = 2 transfers:
## Predicted mean = 18.2
## 99% CI: ( 15.9743 , 20.4257 )
## Interpretation: We are 99% confident that the true mean number of
## broken ampules for all shipments with 2 transfers is between 15.974 and 20.426 ampules.
##
## X = 4 transfers:
## Predicted mean = 26.2
## 99% CI: ( 21.2232 , 31.1768 )
## Interpretation: We are 99% confident that the true mean number of
## broken ampules for all shipments with 4 transfers is between 21.223 and 31.177 ampules.
# (b) 99% Prediction interval for single observation at X = 2
pred_interval_single <- predict(model,
newdata = data.frame(Transfers = 2),
interval = "prediction", level = 0.99)
cat("(b) 99% PREDICTION INTERVAL FOR SINGLE SHIPMENT:\n")
## (b) 99% PREDICTION INTERVAL FOR SINGLE SHIPMENT:
cat("X = 2 transfers:\n")
## X = 2 transfers:
cat(" Predicted value =", round(pred_interval_single[1], 4), "\n")
## Predicted value = 18.2
cat(" 99% Prediction Interval: (", round(pred_interval_single[2], 4), ",",
round(pred_interval_single[3], 4), ")\n")
## 99% Prediction Interval: ( 12.7481 , 23.6519 )
cat(" Interpretation: We are 99% confident that a single shipment with 2 transfers\n")
## Interpretation: We are 99% confident that a single shipment with 2 transfers
cat(" will have between", round(pred_interval_single[2], 3), "and",
round(pred_interval_single[3], 3), "broken ampules.\n\n")
## will have between 12.748 and 23.652 broken ampules.
# (c)
m <- 3
X_star <- 2
X_matrix <- model.matrix(model)
n <- nrow(X_matrix)
p <- ncol(X_matrix)
X_mean <- mean(data$Transfers)
SXX <- sum((data$Transfers - X_mean)^2)
#SE for mean of m new observations
se_mean_m <- summary_model$sigma * sqrt(1/m + (X_star - X_mean)^2 / SXX)
#Predicted value
Y_hat <- coefficients[1] + coefficients[2] * X_star
#t-value for 99% confidence
t_val <- qt(0.995, df = n - p)
#Prediction interval
lower <- Y_hat - t_val * se_mean_m
upper <- Y_hat + t_val * se_mean_m
#We predict that the AVERAGE number of broken ampules
#across 3 future shipments (each with 2 transfers) will be between
#14.92 and 21.48 ampules
#Problem 4
#(a) ANOVA table
anova_model=anova(model)
print(anova_model)
## Analysis of Variance Table
##
## Response: Broken
## Df Sum Sq Mean Sq F value Pr(>F)
## Transfers 1 160.0 160.0 72.727 2.749e-05 ***
## Residuals 8 17.6 2.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
SSR <- anova_model$"Sum Sq"[1]
SSE <- anova_model$"Sum Sq"[2]
SST <- SSR + SSE
dfR <- anova_model$Df[1]
dfE <- anova_model$Df[2]
dfT <- dfR + dfE
MSR <- anova_model$"Mean Sq"[1]
MSE <- anova_model$"Mean Sq"[2]
F_stat <- anova_model$"F value"[1]
cat("Additive elements: Sum of Squares and Degrees of Freedom are additive\n")
## Additive elements: Sum of Squares and Degrees of Freedom are additive
cat("Mean Squares are not additive \n\n")
## Mean Squares are not additive
# (b) F-test for linear association
alpha <- 0.05
F_critical <- qf(1 - alpha, dfR, dfE)
p_value_F <- anova_model$"Pr(>F)"[1]
p_value_F
## [1] 2.748669e-05
cat("Conclusion:", ifelse(F_stat > F_critical,
"Reject H₀ - Significant linear association",
"Fail to reject H₀ - No significant association"), "\n\n")
## Conclusion: Reject H₀ - Significant linear association
#Strong evidence of linear association
# (c) t-statistic and equivalence to F
t_stat <- summary_model$coefficients["Transfers", "t value"]
t_squared <- t_stat^2
cat("Difference =", round(abs(t_squared - F_stat), 6), "\n")
## Difference = 0
#Numerical equivalence demonstrated that t sqr= F
# (d) R-squared and correlation
R_squared <- summary_model$r.squared
r <- cor(X, Y)
r_squared_from_cor <- r^2
#R² and r: R² = 0.9009, r = 0.949
#Interpretation: 90.1% of the variation in broken ampules is explained by the linear relationship with number of transfers