Problem 1
copier <- read.csv("/Users/caelynsobie/Downloads/MAT613/copier-maintenance.csv")
extra <- data.frame(y = c(132, 166), x = c(6, 5))
enlarged <- rbind(copier,extra)
lm <- lm(copier)
x <- copier$x
y <- copier$y
plot(x,y,pch=4)
abline(lm, col='green',lwd = 2)
en_model <- lm(enlarged)
X <- enlarged$x
Y <- enlarged$y
plot(X,Y,pch=4)
abline(en_model, col='blue',lwd = 2)
summary(en_model)
##
## Call:
## lm(formula = enlarged)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.980 -6.761 -2.323 3.903 88.567
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.8860 5.3672 0.351 0.727
## x 15.1094 0.9266 16.307 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.12 on 45 degrees of freedom
## Multiple R-squared: 0.8553, Adjusted R-squared: 0.852
## F-statistic: 265.9 on 1 and 45 DF, p-value: < 2.2e-16
res <- en_model$residuals
med_res <- median(res)
m <- abs(res - med_res)
MAD_en <- (1/.6745)*median(m)
en_ui <- res/MAD_en
k <- 1.345
huber_weights_en <- numeric(length(en_ui))
for (i in seq_along(en_ui)) {
if (abs(en_ui[i]) <= k) {
huber_weights_en[i] <- 1
} else {
huber_weights_en[i] <- k / abs(en_ui[i])
}
}
huber_weights_en
## [1] 0.9523743 1.0000000 1.0000000 1.0000000 1.0000000 0.7214324 1.0000000
## [8] 0.9966371 0.8871036 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## [15] 1.0000000 0.5064928 1.0000000 1.0000000 0.4437413 1.0000000 1.0000000
## [22] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 0.8173313 1.0000000
## [29] 1.0000000 1.0000000 1.0000000 0.9173322 0.9523743 1.0000000 0.8237182
## [36] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## [43] 1.0000000 1.0000000 1.0000000 0.2921668 0.1301640
w_en_model <- lm(enlarged, weights = huber_weights_en)
summary(w_en_model)
##
## Call:
## lm(formula = enlarged, weights = huber_weights_en)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -17.072 -5.918 -0.497 5.115 32.524
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.9235 3.1753 -0.291 0.773
## x 15.3552 0.5571 27.562 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.79 on 45 degrees of freedom
## Multiple R-squared: 0.9441, Adjusted R-squared: 0.9428
## F-statistic: 759.7 on 1 and 45 DF, p-value: < 2.2e-16
#obtain residuals
res_w_en <- w_en_model$residuals
#calculate ui with new residuals
med_res_w <- median(res_w_en)
m_w <- abs(res_w_en - med_res_w)
MAD_en_w <- (1/.6745)*median(m_w)
en_ui_w <- res_w_en/MAD_en_w
#get weights
huber_weights_en_w <- numeric(length(en_ui_w))
for (i in seq_along(en_ui_w)) {
if (abs(en_ui_w[i]) <= k) {
huber_weights_en_w[i] <- 1
huber_weights_en_w[i] <- k / abs(en_ui_w[i])
}
}
huber_weights_en_w
## [1] 1.176856 23.156488 13.426909 1.027185 4.736427 0.000000 1.466756
## [8] 0.000000 1.104114 5.204564 1.712296 3.416398 6.426704 1.259858
## [15] 1.001325 0.000000 7.368954 1.268239 0.000000 3.293269 1.169016
## [22] 2.070430 4.726289 6.217120 3.518572 2.118430 0.000000 4.037665
## [29] 2.595879 2.403465 10.647277 0.000000 1.176856 6.445463 1.007531
## [36] 3.293269 1.135055 2.184123 2.118430 0.000000 1.835969 2.733865
## [43] 4.132750 22.916178 10.038244 0.000000 0.000000
#second iteration
w_en_model2 <- lm(enlarged, weights = huber_weights_en_w)
#obtain residuals
res_w_en2 <- w_en_model2$residuals
#calculate ui with new residuals
med_res_w2 <- median(res_w_en2)
m_w2 <- abs(res_w_en2 - med_res_w2)
MAD_en_w2 <- (1/.6745)*median(m_w2)
en_ui_w2 <- res_w_en2/MAD_en_w2
#get weights
huber_weights_en_w2 <- numeric(length(en_ui_w2))
for (i in seq_along(en_ui_w2)) {
if (abs(en_ui_w2[i]) <= k) {
huber_weights_en_w2[i] <- 1
huber_weights_en_w2[i] <- k / abs(en_ui_w2[i])
}
}
huber_weights_en_w2
## [1] 1.269071 112.028626 6.951622 0.000000 10.764606 0.000000
## [7] 1.420439 0.000000 1.225537 3.371215 2.030207 6.288707
## [13] 7.276000 1.315810 0.000000 0.000000 5.099248 1.359209
## [19] 0.000000 3.570433 1.130230 1.793262 6.044911 6.184142
## [25] 2.431442 2.290530 1.032262 3.966871 2.888535 2.447442
## [31] 79.046531 0.000000 1.269071 15.406708 1.103324 3.570433
## [37] 1.083537 1.689056 2.290530 0.000000 1.465351 2.094693
## [43] 6.439535 12.276319 9.136768 0.000000 0.000000
#do regression again
w_en_model3 <- lm(enlarged, weights = huber_weights_en_w2)
summary(w_en_model3)
##
## Call:
## lm(formula = enlarged, weights = huber_weights_en_w2)
##
## Weighted Residuals:
## Min 1Q Median 3Q Max
## -10.188 -6.734 0.000 4.186 10.843
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3.9176 0.9858 -3.974 0.000336 ***
## x 15.9002 0.1740 91.395 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7.246 on 35 degrees of freedom
## Multiple R-squared: 0.9958, Adjusted R-squared: 0.9957
## F-statistic: 8353 on 1 and 35 DF, p-value: < 2.2e-16
plot(X,Y,pch=4)
lines(X, en_model$fitted.values, col = rgb(70/255, 130/255, 180/255), lwd = 2, pch = 1) # OLS fit
lines(X, w_en_model3$fitted.values, col = rgb(95/255, 158/255, 72/255), pch = 1) # IRLS fit
legend("topleft", legend = c("OLS Fit", "IRLS Fit"),
col = c( rgb(70/255, 130/255, 180/255), rgb(95/255, 158/255, 72/255)), lwd = 2)
PROBLEM 2
logistic_function <- function(X, beta0, beta1) {
1 / (1 + exp(-(beta0 + beta1 * X)))
}
log_x <- seq(0, 120, length.out = 100)
# Parameters
beta0 <- 20
beta1 <- -0.2
mean_response <- logistic_function(log_x, beta0, beta1)
plot(log_x, mean_response,type = "l", col = rgb(95/255, 158/255, 72/255), lwd = 2,
xlab = "X",
ylab = "Mean Response",
main = "Logistic Mean Response Function")
odds_125 <- exp(beta0 + beta1 * 125)
odds_126 <- exp(beta0 + beta1 * 126)
odds_ratio <- odds_126 / odds_125
exp_beta_1 <- exp(beta1)
cat("Odds at X = 125:", odds_125, "\n")
## Odds at X = 125: 0.006737947
cat("Odds at X = 126:", odds_126, "\n")
## Odds at X = 126: 0.005516564
cat("Odds Ratio:", odds_ratio, "\n")
## Odds Ratio: 0.8187308
cat("exp(beta1):", exp_beta_1, "\n")
## exp(beta1): 0.8187308
PROBLEM 3
performance <- read.csv("/Users/caelynsobie/Downloads/MAT613/performance-ability.csv")
model_perform <- glm(performance, family = binomial)
beta_0 <- model_perform$coefficients[1]
beta_1 <- model_perform$coefficients[2]
print(paste("Estimated Intercept beto0:", beta_0))
## [1] "Estimated Intercept beto0: -10.3089251781477"
print(paste("Estimated Slope beta1:", beta_1))
## [1] "Estimated Slope beta1: 0.0189198293852957"
print(paste("Fitted Response Function: logit =", beta_0, "+", beta_1, "x"))
## [1] "Fitted Response Function: logit = -10.3089251781477 + 0.0189198293852957 x"
#lowess model
lowess <- loess(performance)
# Plot the original data
plot(performance$X, performance$Y)
seq <- seq(min(performance$X), max(performance$X), length.out = 100)
fitted_probs <- predict(model_perform, newdata = data.frame(X = seq), type = "response")
fitted_lowess <- predict(lowess, newdata = data.frame(X = seq), type = "response")
lines(seq, fitted_probs, lwd = 2, col = "green")
lines(seq, fitted_lowess, lwd = 2, col = "blue")
c)
exp(beta_1)
## X
## 1.0191
logistic_function(550,beta_0,beta_1)
## (Intercept)
## 0.5242263
(log(.7/.3) - beta_0)/beta_1
## (Intercept)
## 589.6577
PROBLEM 4
below <- data.frame(
value = c(29,42,38,40,43,40,30,42),
group = "Below_Average"
)
avg <- data.frame(
value = c(30,35,39,28,31,31,29,35,29,33),
group = "Average"
)
above <- data.frame(
value = c(26,32,21,20,23,22),
group = "Above_Average"
)
rehab <- rbind(below, avg, above)
# Create strip chart
stripchart(value ~ group,
data = rehab,
pch = 1,
xlab = "Prior Physical Fitness Status",
ylab = "Days to Recovery",
vertical = TRUE)
Below_Average = c(29,42,38,40,43,40,30,42, NA, NA)
Average = c(30,35,39,28,31,31,29,35,29,33)
Above_Average = c(26,32,21,20,23,22, NA, NA, NA, NA)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
knee <- bind_cols(
Below_Average = Below_Average,
Average = Average,
Above_Average = Above_Average
)
#fitted values
anov <- aov(value ~ group, data = rehab)
anov_fit <- anov$fitted.values
anov_fit
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## 38 38 38 38 38 38 38 38 32 32 32 32 32 32 32 32 32 32 24 24 24 24 24 24
anov_res <- anov$residuals
sum(anov_res)
## [1] -2.642331e-14
#anova table
anov_table <- summary(anov)
anov_table
## Df Sum Sq Mean Sq F value Pr(>F)
## group 2 672 336.0 16.96 4.13e-05 ***
## Residuals 21 416 19.8
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#H_0 : mu1 = mu2 = mu3
#H_a : not all mu_i are equal
MSTr <- anov_table[[1]]$`Mean Sq`[1]
MSE <- anov_table[[1]]$`Mean Sq`[2]
# Calculate the test statistic
test_stat <- MSTr / MSE
r <- 3
nt<- 8+10+6
df1 <- r-1
df2 <-nt - r
alpha <- 0.01
#critical value
critical_value <- pf(1-alpha, df1, df2)
print(paste("Test Statistic: ", test_stat))
## [1] "Test Statistic: 16.9615384615385"
print(paste("Critical Value: ", critical_value))
## [1] "Critical Value: 0.611735808180911"
print("Test Statistic > Critical Value. Hence conclude Ha. The mean number of days required for successful rehabilitation is not the same for the
three fitness groups.")
## [1] "Test Statistic > Critical Value. Hence conclude Ha. The mean number of days required for successful rehabilitation is not the same for the\nthree fitness groups."