Problem 1

copier <- read.csv("/Users/caelynsobie/Downloads/MAT613/copier-maintenance.csv")
extra <- data.frame(y = c(132, 166), x = c(6, 5))
enlarged <- rbind(copier,extra)
lm <- lm(copier)
x <- copier$x
y <- copier$y
plot(x,y,pch=4)
abline(lm, col='green',lwd = 2)

en_model <- lm(enlarged)
X <- enlarged$x
Y <- enlarged$y
plot(X,Y,pch=4)
abline(en_model, col='blue',lwd = 2)

summary(en_model)
## 
## Call:
## lm(formula = enlarged)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -25.980  -6.761  -2.323   3.903  88.567 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   1.8860     5.3672   0.351    0.727    
## x            15.1094     0.9266  16.307   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.12 on 45 degrees of freedom
## Multiple R-squared:  0.8553, Adjusted R-squared:  0.852 
## F-statistic: 265.9 on 1 and 45 DF,  p-value: < 2.2e-16
res <- en_model$residuals
med_res <- median(res)
m <- abs(res - med_res)
MAD_en <- (1/.6745)*median(m)
en_ui <- res/MAD_en
k <- 1.345

huber_weights_en <- numeric(length(en_ui))
for (i in seq_along(en_ui)) {
  if (abs(en_ui[i]) <= k) {
  huber_weights_en[i] <- 1
  } else {
  huber_weights_en[i] <- k / abs(en_ui[i])
  }
}
huber_weights_en
##  [1] 0.9523743 1.0000000 1.0000000 1.0000000 1.0000000 0.7214324 1.0000000
##  [8] 0.9966371 0.8871036 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## [15] 1.0000000 0.5064928 1.0000000 1.0000000 0.4437413 1.0000000 1.0000000
## [22] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 0.8173313 1.0000000
## [29] 1.0000000 1.0000000 1.0000000 0.9173322 0.9523743 1.0000000 0.8237182
## [36] 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000 1.0000000
## [43] 1.0000000 1.0000000 1.0000000 0.2921668 0.1301640
w_en_model <- lm(enlarged, weights = huber_weights_en)
summary(w_en_model)
## 
## Call:
## lm(formula = enlarged, weights = huber_weights_en)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.072  -5.918  -0.497   5.115  32.524 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -0.9235     3.1753  -0.291    0.773    
## x            15.3552     0.5571  27.562   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.79 on 45 degrees of freedom
## Multiple R-squared:  0.9441, Adjusted R-squared:  0.9428 
## F-statistic: 759.7 on 1 and 45 DF,  p-value: < 2.2e-16
#obtain residuals
res_w_en <- w_en_model$residuals

#calculate ui with new residuals
med_res_w <- median(res_w_en)
m_w <- abs(res_w_en - med_res_w)
MAD_en_w <- (1/.6745)*median(m_w)
en_ui_w <- res_w_en/MAD_en_w

#get weights
huber_weights_en_w <- numeric(length(en_ui_w))
for (i in seq_along(en_ui_w)) {
  if (abs(en_ui_w[i]) <= k) {
  huber_weights_en_w[i] <- 1
  
  huber_weights_en_w[i] <- k / abs(en_ui_w[i])
  }
}
huber_weights_en_w
##  [1]  1.176856 23.156488 13.426909  1.027185  4.736427  0.000000  1.466756
##  [8]  0.000000  1.104114  5.204564  1.712296  3.416398  6.426704  1.259858
## [15]  1.001325  0.000000  7.368954  1.268239  0.000000  3.293269  1.169016
## [22]  2.070430  4.726289  6.217120  3.518572  2.118430  0.000000  4.037665
## [29]  2.595879  2.403465 10.647277  0.000000  1.176856  6.445463  1.007531
## [36]  3.293269  1.135055  2.184123  2.118430  0.000000  1.835969  2.733865
## [43]  4.132750 22.916178 10.038244  0.000000  0.000000
#second iteration
w_en_model2 <- lm(enlarged, weights = huber_weights_en_w)

#obtain residuals
res_w_en2 <- w_en_model2$residuals

#calculate ui with new residuals
med_res_w2 <- median(res_w_en2)
m_w2 <- abs(res_w_en2 - med_res_w2)
MAD_en_w2 <- (1/.6745)*median(m_w2)
en_ui_w2 <- res_w_en2/MAD_en_w2

#get weights
huber_weights_en_w2 <- numeric(length(en_ui_w2))
for (i in seq_along(en_ui_w2)) {
  if (abs(en_ui_w2[i]) <= k) {
  huber_weights_en_w2[i] <- 1
  
  huber_weights_en_w2[i] <- k / abs(en_ui_w2[i])
  }
}
huber_weights_en_w2
##  [1]   1.269071 112.028626   6.951622   0.000000  10.764606   0.000000
##  [7]   1.420439   0.000000   1.225537   3.371215   2.030207   6.288707
## [13]   7.276000   1.315810   0.000000   0.000000   5.099248   1.359209
## [19]   0.000000   3.570433   1.130230   1.793262   6.044911   6.184142
## [25]   2.431442   2.290530   1.032262   3.966871   2.888535   2.447442
## [31]  79.046531   0.000000   1.269071  15.406708   1.103324   3.570433
## [37]   1.083537   1.689056   2.290530   0.000000   1.465351   2.094693
## [43]   6.439535  12.276319   9.136768   0.000000   0.000000
#do regression again

w_en_model3 <- lm(enlarged, weights = huber_weights_en_w2)
summary(w_en_model3)
## 
## Call:
## lm(formula = enlarged, weights = huber_weights_en_w2)
## 
## Weighted Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.188  -6.734   0.000   4.186  10.843 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  -3.9176     0.9858  -3.974 0.000336 ***
## x            15.9002     0.1740  91.395  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.246 on 35 degrees of freedom
## Multiple R-squared:  0.9958, Adjusted R-squared:  0.9957 
## F-statistic:  8353 on 1 and 35 DF,  p-value: < 2.2e-16
plot(X,Y,pch=4)
lines(X, en_model$fitted.values, col = rgb(70/255, 130/255, 180/255), lwd = 2, pch = 1)  # OLS fit
lines(X, w_en_model3$fitted.values, col = rgb(95/255, 158/255, 72/255), pch = 1)  # IRLS fit

legend("topleft", legend = c("OLS Fit", "IRLS Fit"),
       col = c( rgb(70/255, 130/255, 180/255), rgb(95/255, 158/255, 72/255)), lwd = 2)

PROBLEM 2

logistic_function <- function(X, beta0, beta1) {
  1 / (1 + exp(-(beta0 + beta1 * X)))
}

log_x <- seq(0, 120, length.out = 100)

# Parameters
beta0 <- 20
beta1 <- -0.2
mean_response <- logistic_function(log_x, beta0, beta1)

plot(log_x, mean_response,type = "l", col = rgb(95/255, 158/255, 72/255), lwd = 2,
     xlab = "X",
     ylab = "Mean Response",
     main = "Logistic Mean Response Function")

odds_125 <- exp(beta0 + beta1 * 125)
odds_126 <- exp(beta0 + beta1 * 126)
odds_ratio <- odds_126 / odds_125

exp_beta_1 <- exp(beta1)

cat("Odds at X = 125:", odds_125, "\n")
## Odds at X = 125: 0.006737947
cat("Odds at X = 126:", odds_126, "\n")
## Odds at X = 126: 0.005516564
cat("Odds Ratio:", odds_ratio, "\n")
## Odds Ratio: 0.8187308
cat("exp(beta1):", exp_beta_1, "\n")
## exp(beta1): 0.8187308

PROBLEM 3

performance <- read.csv("/Users/caelynsobie/Downloads/MAT613/performance-ability.csv")
model_perform <- glm(performance,  family = binomial)
beta_0 <- model_perform$coefficients[1]  

beta_1 <- model_perform$coefficients[2]  



print(paste("Estimated Intercept beto0:", beta_0))
## [1] "Estimated Intercept beto0: -10.3089251781477"
print(paste("Estimated Slope beta1:", beta_1))
## [1] "Estimated Slope beta1: 0.0189198293852957"
print(paste("Fitted Response Function: logit =", beta_0, "+", beta_1, "x"))
## [1] "Fitted Response Function: logit = -10.3089251781477 + 0.0189198293852957 x"
#lowess model

lowess <- loess(performance)
# Plot the original data
plot(performance$X, performance$Y)

seq <- seq(min(performance$X), max(performance$X), length.out = 100)

fitted_probs <- predict(model_perform, newdata = data.frame(X = seq), type = "response")
fitted_lowess <- predict(lowess, newdata = data.frame(X = seq), type = "response")

lines(seq, fitted_probs, lwd = 2, col = "green")
lines(seq, fitted_lowess, lwd = 2, col = "blue")

c)

exp(beta_1)
##      X 
## 1.0191
logistic_function(550,beta_0,beta_1)
## (Intercept) 
##   0.5242263
(log(.7/.3) - beta_0)/beta_1
## (Intercept) 
##    589.6577

PROBLEM 4

below <- data.frame(
  value = c(29,42,38,40,43,40,30,42),
  group = "Below_Average"
)

avg <- data.frame(
  value = c(30,35,39,28,31,31,29,35,29,33),
  group = "Average"
)

above <- data.frame(
  value = c(26,32,21,20,23,22),
  group = "Above_Average"
)

rehab <- rbind(below, avg, above)

# Create strip chart
stripchart(value ~ group,
           data = rehab,
           pch = 1,
           xlab = "Prior Physical Fitness Status",
           ylab = "Days to Recovery",
           vertical = TRUE)

Below_Average = c(29,42,38,40,43,40,30,42, NA, NA)
Average = c(30,35,39,28,31,31,29,35,29,33)
Above_Average = c(26,32,21,20,23,22, NA, NA, NA, NA)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
knee <- bind_cols(
  Below_Average = Below_Average,
  Average = Average,
  Above_Average = Above_Average
)
#fitted values
anov <- aov(value ~ group, data = rehab)
anov_fit <- anov$fitted.values
anov_fit
##  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 
## 38 38 38 38 38 38 38 38 32 32 32 32 32 32 32 32 32 32 24 24 24 24 24 24
anov_res <- anov$residuals
sum(anov_res)
## [1] -2.642331e-14
#anova table

anov_table <- summary(anov)
anov_table
##             Df Sum Sq Mean Sq F value   Pr(>F)    
## group        2    672   336.0   16.96 4.13e-05 ***
## Residuals   21    416    19.8                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#H_0 : mu1 = mu2 = mu3 
#H_a : not all mu_i are equal


MSTr <- anov_table[[1]]$`Mean Sq`[1]
MSE <- anov_table[[1]]$`Mean Sq`[2]

# Calculate the test statistic
test_stat <- MSTr / MSE
r <- 3
nt<- 8+10+6
  
df1 <- r-1
df2 <-nt - r

alpha <- 0.01

#critical value
critical_value <- pf(1-alpha, df1, df2)

print(paste("Test Statistic: ", test_stat))
## [1] "Test Statistic:  16.9615384615385"
print(paste("Critical Value: ", critical_value))
## [1] "Critical Value:  0.611735808180911"
print("Test Statistic > Critical Value. Hence conclude Ha. The mean number of days required for successful rehabilitation is not the same for the
three fitness groups.")
## [1] "Test Statistic > Critical Value. Hence conclude Ha. The mean number of days required for successful rehabilitation is not the same for the\nthree fitness groups."