Import Earthquake Data

The data set give the locations of 1000 seismic events of MB > 4.0. The events occurred in a cube near Fiji since 1964.

library(tidyverse)
data(quakes)

Always plot your data!

## EX1
ggplot(quakes, aes(x = mag, y = stations)) +
  geom_jitter()

Use the lm() function

## EX2
m1 <- lm(stations ~ mag, data = quakes)
summary(m1)
## 
## Call:
## lm(formula = stations ~ mag, data = quakes)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -48.871  -7.102  -0.474   6.783  50.244 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -180.4243     4.1899  -43.06   <2e-16 ***
## mag           46.2822     0.9034   51.23   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 11.5 on 998 degrees of freedom
## Multiple R-squared:  0.7245, Adjusted R-squared:  0.7242 
## F-statistic:  2625 on 1 and 998 DF,  p-value: < 2.2e-16
slope <- m1$coefficients[2]
slope
##      mag 
## 46.28221
X_bar<-mean(quakes$mag)

beta1 <- m1$coefficients[2]
beta1
##      mag 
## 46.28221
# verify the standard error calculation
beta1SE <- sqrt((sum(m1$residuals^2))/998/sum((quakes$mag - X_bar)^2))
beta1SE
## [1] 0.9033955
testStat<- beta1/beta1SE
testStat
##     mag 
## 51.2314
pt(-abs(testStat), 998, lower.tail=TRUE)*2
##           mag 
## 1.212549e-281
intercept <- m1$coefficients[1]
intercept
## (Intercept) 
##   -180.4243

Add the fitted line

ggplot(quakes, aes(x = mag, y = stations)) + 
  geom_point() + 
  geom_abline(slope = slope, intercept = intercept, color = "blue" )

Verify estimates

## EX4
X_bar <- mean(quakes$mag)
Y_bar <- mean(quakes$stations)
beta1 <- sum((quakes$mag - X_bar)*(quakes$stations - Y_bar)) / sum((quakes$mag - X_bar)^2)
beta0 <- Y_bar - beta1 * X_bar
beta1
## [1] 46.28221
beta0
## [1] -180.4243

Construct Confidence Interval for Slope

## EX5

## HEATHER's SOLUTION
beta1SE <- sqrt((sum(m1$residuals^2))/998/sum((quakes$mag - X_bar)^2))

beta1CI<- beta1+c(-1,1)*qt(0.975, 1000-2) * beta1SE
beta1CI
## [1] 44.50944 48.05498
confint(m1)
##                  2.5 %     97.5 %
## (Intercept) -188.64628 -172.20238
## mag           44.50944   48.05498

Predict A Single New Value

## EX6
newdata=data.frame(mag=7)
predict(m1, newdata)
##        1 
## 143.5511

Adding Confidence and Prediction Bands

confBand<-predict(m1, interval="confidence")
predBand<-predict(m1, interval="predict")
## Warning in predict.lm(m1, interval = "predict"): predictions on current data refer to _future_ responses
colnames(predBand)<-c("fit2", "lwr2", "upr2")


newDF<-cbind(quakes, confBand, predBand)

ggplot(newDF, aes(x=mag, y=stations))+
  geom_point(alpha=.3)+
  geom_abline(slope=m1$coefficients[2], intercept=m1$coefficients[1],
              color="blue", lty=2, lwd=1)+
  geom_line(aes(y=lwr), color="green", lty=2, lwd=1)+
  geom_line(aes(y=upr), color="green", lty=2, lwd=1)+
  geom_line(aes(y=lwr2), color="red", lty=2, lwd=1)+
  geom_line(aes(y=upr2), color="red", lty=2, lwd=1)+
  theme_bw()

ANOVA

anova(m1)
## Analysis of Variance Table
## 
## Response: stations
##            Df Sum Sq Mean Sq F value    Pr(>F)    
## mag         1 347148  347148  2624.7 < 2.2e-16 ***
## Residuals 998 132000     132                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1