eth = 0
sex = 1
lrn = 1
actual_missed_days_of_school = 2
calculated_missed_days_of_school = (-9.11 * eth) + (3.10 * sex) + (2.15 * lrn) + 18.93
calculated_missed_days_of_school
## [1] 24.18
actual_missed_days_of_school - calculated_missed_days_of_school
## [1] -22.18
proportion_variance_not_explained <- 240.57/264.17
unadjusted_Rsquared <- 1 - proportion_variance_not_explained
n = 146
k = 3
adjustment_factor <- (n - 1)/(n - k - 1)
adjusted_Rsquared <- 1 - (proportion_variance_not_explained*adjustment_factor)
round(unadjusted_Rsquared,digits=4)
## [1] 0.0893
round(adjusted_Rsquared,digits=4)
## [1] 0.0701
calculated_probabilities <- c()
for(temperature in seq(from=0,to=100,by=5))
{
calculated_probabilities <- c(calculated_probabilities,exp((-0.2162 * temperature) + 11.6630)/(1 + exp((-0.2162 * temperature) + 11.6630)))
}
plot(seq(from=0,to=100,by=5),calculated_probabilities,type="o",
xlab="Temperature",
ylab="Calculated probability of O-ring failure")
original_data <- data.frame(Temperature = c(53,57,58,63,66,67,67,67,68,69,70,70,70,70,72,73,75,75,76,76,78,79,81),
Num.damaged = c(5,1,1,1,rep(0,times=6),1,0,1,rep(0,times=4),1,rep(0,times=5)),
stringsAsFactors=FALSE)
original_data <- data.frame(original_data,Failure.rate = original_data$Num.damaged/6,stringsAsFactors=FALSE)
calculated_probabilities <- c()
for(temperature in c(51,53,55,57,59,61,63,65,67,69,71))
{
calculated_probabilities <- c(calculated_probabilities,exp((-0.2162 * temperature) + 11.6630)/(1 + exp((-0.2162 * temperature) + 11.6630)))
}
calculated_probabilities[1:3]
## [1] 0.6540297 0.5509228 0.4432456
ggplot(original_data,
aes(Temperature,Failure.rate)) +
geom_point(alpha=1/10,size=4) +
xlab("Temperature (Farenheit)") +
ylab("Probability of damage") +
coord_cartesian(xlim = c(51,81)) +
coord_cartesian(ylim = c(0,1)) +
geom_point(data = data.frame(Temperature = seq(from=51,to=71,by=2),Failure.rate = calculated_probabilities),size=4,colour="red") +
geom_smooth(data = data.frame(Temperature = seq(from=51,to=71,by=2),Failure.rate = calculated_probabilities),colour="red")
## `geom_smooth()` using method = 'loess'
Let’s also plot actual and predicted logit(p) vs. temperature.
logit <- function(p){return(log(p/(1 - p)))}
actual_logit_p <- logit(original_data$Failure.rate)
calculated_probabilities <- c()
for(temperature in original_data$Temperature)
{
calculated_probabilities <- c(calculated_probabilities,exp((-0.2162 * temperature) + 11.6630)/(1 + exp((-0.2162 * temperature) + 11.6630)))
}
calculated_logit_p <- logit(calculated_probabilities)
logit(0) = -Inf, so some values won’t plot. Let’s plot these as if actual probability were very low but non-zero.
actual_logit_p[actual_logit_p == -Inf] <- min(calculated_logit_p)
plot(original_data$Temperature,
actual_logit_p,
xlab="Temperature",
ylab="Logit(p)")
lines(original_data$Temperature,
calculated_logit_p,
col="red",
type="o")
for(i in 1:5)
{
abline(h = logit((i/6)),lty=2)
text(80,logit(i/6),paste0("logit(",i,"/6)"))
}
abline(h = logit(min(calculated_probabilities)),lty=2)
text(55,logit(min(calculated_probabilities)) + 0.1,paste0("logit(",signif(min(calculated_probabilities),2),")"))