upload dataset

library(gamair)
data(hubble)

# Figure 1.1 from Wood 2006
plot(hubble$x,hubble$y,xlab="Distance (Mpc)",
 ylab=expression("Velocity (km"*s^{-1}*")"))

1a) Fit a linear model that corresponds to Hubble’s Law. Estimate all parameter(s) using maximum likelihood estimation. Please show all relevant R code.

# Linear Model without an intercept
m1 <- lm(y ~ x -1, data = hubble)

# Summary of the model
summary(m1)
## 
## Call:
## lm(formula = y ~ x - 1, data = hubble)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -736.5 -132.5  -19.0  172.2  558.0 
## 
## Coefficients:
##   Estimate Std. Error t value Pr(>|t|)    
## x   76.581      3.965   19.32 1.03e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 258.9 on 23 degrees of freedom
## Multiple R-squared:  0.9419, Adjusted R-squared:  0.9394 
## F-statistic: 373.1 on 1 and 23 DF,  p-value: 1.032e-15

1b) Using the parameter estimates from part a, what is the most likely age of the universe in years? Note that distance is measured in Mega parsecs. A Mega parsecs is 3.09e19 km.

# Parameters
hubble_constant <- 3.09e19
hubble_constant
## [1] 3.09e+19
# Slope
beta1 <- coef(m1)[1]
beta1
##        x 
## 76.58117
# Calculating age of universe
age<- 1/beta1*hubble_constant
age
##            x 
## 4.034934e+17
# Year of Universe Conversion
age_universe<- age/60/60/24/365

# Year of Universe
age_universe
##           x 
## 12794692825

1c) Calculate a 95% confidence interval for the estimated age from part b.

# CI Lower Limit age of universe
confLL <- 1/confint(m1)[1]*hubble_constant
confLL
## [1] 4.518907e+17
# Year of Universe Conversion
LLage_universe<- confLL/60/60/24/365

# Year of Universe
LLage_universe
## [1] 14329359295
# CI Upper Limit age of universe
confUL <- 1/confint(m1)[2]*hubble_constant
confUL
## [1] 3.6446e+17
# Year of Universe Conversion
ULage_universe<- confUL/60/60/24/365

# Year of Universe
ULage_universe
## [1] 11556949874

1d) Explain how to interpret the 95% confidence interval from part c.

# Based on 95% confidence intervals of Model 1 (m1), we can be 95% confident the age of the universe is between 11,556,949,874 billion years old and 14,329,359,295 billion years old. 

Bootstrap data 1000 times

# save 1000 bootstraps of slope 
library(mosaic)
## Registered S3 method overwritten by 'mosaic':
##   method                           from   
##   fortify.SpatialPolygonsDataFrame ggplot2
## 
## The 'mosaic' package masks several functions from core packages in order to add 
## additional features.  The original behavior of these functions should not be affected by this.
## 
## Attaching package: 'mosaic'
## The following objects are masked from 'package:dplyr':
## 
##     count, do, tally
## The following object is masked from 'package:Matrix':
## 
##     mean
## The following object is masked from 'package:ggplot2':
## 
##     stat
## The following objects are masked from 'package:stats':
## 
##     binom.test, cor, cor.test, cov, fivenum, IQR, median, prop.test,
##     quantile, sd, t.test, var
## The following objects are masked from 'package:base':
## 
##     max, mean, min, prod, range, sample, sum
set.seed(705)
bs<- do(1000)*coef(lm(y ~ x -1, data = resample(hubble)))[1]

hist(bs$x)

mean(bs$x)
## [1] 76.86499
# inverse of slope = age of universe
hist(1/bs$x)

mean(1/bs$x)
## [1] 0.0130614
quantile(1/bs$x, pro = c(0.025, 0.975))
##       2.5%      97.5% 
## 0.01159727 0.01477001
LLbsconfint<- quantile(1/bs$x, pro = c(0.025))
ULbsconfint<- quantile(1/bs$x, pro = c(0.975))
ULbsconfint
##      97.5% 
## 0.01477001
LLbsconfint
##       2.5% 
## 0.01159727
# CI Lower Limit age of universe
bsconfLL <- LLbsconfint*hubble_constant
bsconfLL
##         2.5% 
## 3.583557e+17
# Year of Universe Conversion
bsLLage_universe<- bsconfLL/60/60/24/365

# Year of Universe
bsLLage_universe
##        2.5% 
## 11363385705
# CI Upper Limit age of universe
bsconfUL <- ULbsconfint*hubble_constant
bsconfUL
##        97.5% 
## 4.563933e+17
# Year of Universe Conversion
bsULage_universe<- bsconfUL/60/60/24/365

# Year of Universe
bsULage_universe
##       97.5% 
## 14472135418

Extra Bootstrap Confidence Interval for age of universe

# When examining our bootstrapped model (bs) that produced 1000 different slopes for our model, we see the confidence intervals are slightly wider compared to our m1 model where we can be 95% certain the true age of the universe is between 11,477,272,321 billion years old and 14,665,736,416 billion years old. 

Question 2

# Uploading Dataset
df.challenger <- read.csv("https://www.dropbox.com/s/ezxj8d48uh7lzhr/challenger.csv?dl=1")
plot(df.challenger$Temp,df.challenger$O.ring,xlab="Temperature",ylab="Number of incidents")

2a) Refer to url link for image of the model written out

url<- "https://drive.google.com/file/d/1lLYyPM5Mw8U8uOLTmYoOqqg2WERxu5F4/view?usp=sharing"

2b) Fit the linear model from (a) to the data from Dalal et al. (1989).

data <- data.frame(df.challenger)
y<-data[,2] #temp
x<-data[,4] #o.ring
m2 <- lm (y~x,data = data)
summary(m1)
## 
## Call:
## lm(formula = y ~ x - 1, data = hubble)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -736.5 -132.5  -19.0  172.2  558.0 
## 
## Coefficients:
##   Estimate Std. Error t value Pr(>|t|)    
## x   76.581      3.965   19.32 1.03e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 258.9 on 23 degrees of freedom
## Multiple R-squared:  0.9419, Adjusted R-squared:  0.9394 
## F-statistic: 373.1 on 1 and 23 DF,  p-value: 1.032e-15

2c) Obtain 95% confidence intervals for all parameter(s) that will enable you to determine if temperature has an influence on the number of O-rings having some thermal distress.

#confidence interval (95% confidence around O.ring)
confint(m1, level=.95)
##      2.5 %   97.5 %
## x 68.37937 84.78297

2d) Two-Tailed Hypothesis Testing: NHT for H0; beta = 0, H1; beta ≠ 0

beta1.null <- 0
beta1.hat <- coef(m2)[2]
sigma2.beta1.hat <- vcov(m2)[2,2]
t <- (beta1.hat - beta1.null)/sqrt(sigma2.beta1.hat)
n <- 23
p <- 2

pt(t,n-p,lower.tail=TRUE) + pt(abs(t),n-p,lower.tail=FALSE)       
##          x 
## 0.00532143
#OR p.value from:
summary(m2) 
## 
## Call:
## lm(formula = y ~ x, data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.723 -4.252 -1.752  3.763 13.306 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   71.752      1.431  50.132  < 2e-16 ***
## x             -5.029      1.618  -3.108  0.00532 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.978 on 21 degrees of freedom
## Multiple R-squared:  0.3151, Adjusted R-squared:  0.2825 
## F-statistic: 9.661 on 1 and 21 DF,  p-value: 0.005321

2e) One-Tailed Hypothesis Testing: NHT for H0; beta =0, H1; beta<0

beta1.null <-0
beta1.hat <- coef(m2)[2]
sigma2.beta1.hat <- vcov(m2)[2,2]
t <- (beta1.hat - beta1.null)/sqrt(sigma2.beta1.hat)
n <- 23
p <- 1

pt(t,n-p,lower.tail=TRUE)
##           x 
## 0.002562902

2f) Write 3-5 sentences explaining what influence, if any, temperature has on O-ring failure. Please make sure reference any statistical evidence obtained from parts a-e.

#Based on the above statistical findings, we can conclude temperature and thermal distress on O-Rings are inversely related. Finding that both two tailed (p = .005) and one tailed t-tests (p = .003) provide support for the alternative hypothesis that temperature is a significant negative predictor of O-ring thermal distress incidents that can lead to catastrophic failure of said O-rings. The higher the thermal distress score, the higher chance of O-ring failure during launch. As temperature increased, the number of incidents decreased. For every one temperature degree increase, there is a -0.06266 decrease in likelihood of thermal distress causing o-ring incidents.