The data set give the locations of 1000 seismic events of MB > 4.0. The events occurred in a cube near Fiji since 1964.
library(tidyverse)
data(quakes)
## EX1
ggplot(quakes, aes(x = mag, y = stations)) +
geom_jitter()
## EX2
m1 <- lm(stations ~ mag, data = quakes)
summary(m1)
##
## Call:
## lm(formula = stations ~ mag, data = quakes)
##
## Residuals:
## Min 1Q Median 3Q Max
## -48.871 -7.102 -0.474 6.783 50.244
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -180.4243 4.1899 -43.06 <2e-16 ***
## mag 46.2822 0.9034 51.23 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 11.5 on 998 degrees of freedom
## Multiple R-squared: 0.7245, Adjusted R-squared: 0.7242
## F-statistic: 2625 on 1 and 998 DF, p-value: < 2.2e-16
slope <- m1$coefficients[2]
slope
## mag
## 46.28221
X_bar<-mean(quakes$mag)
beta1 <- m1$coefficients[2]
beta1
## mag
## 46.28221
# verify the standard error calculation
beta1SE <- sqrt((sum(m1$residuals^2))/998/sum((quakes$mag - X_bar)^2))
beta1SE
## [1] 0.9033955
testStat<- beta1/beta1SE
testStat
## mag
## 51.2314
pt(-abs(testStat), 998, lower.tail=TRUE)*2
## mag
## 1.212549e-281
intercept <- m1$coefficients[1]
intercept
## (Intercept)
## -180.4243
ggplot(quakes, aes(x = mag, y = stations)) +
geom_point() +
geom_abline(slope = slope, intercept = intercept, color = "blue" )
## EX4
X_bar <- mean(quakes$mag)
Y_bar <- mean(quakes$stations)
beta1 <- sum((quakes$mag - X_bar)*(quakes$stations - Y_bar)) / sum((quakes$mag - X_bar)^2)
beta0 <- Y_bar - beta1 * X_bar
beta1
## [1] 46.28221
beta0
## [1] -180.4243
## EX5
## HEATHER's SOLUTION
beta1SE <- sqrt((sum(m1$residuals^2))/998/sum((quakes$mag - X_bar)^2))
beta1CI<- beta1+c(-1,1)*qt(0.975, 1000-2) * beta1SE
beta1CI
## [1] 44.50944 48.05498
confint(m1)
## 2.5 % 97.5 %
## (Intercept) -188.64628 -172.20238
## mag 44.50944 48.05498
## EX6
newdata=data.frame(mag=7)
predict(m1, newdata)
## 1
## 143.5511
confBand<-predict(m1, interval="confidence")
predBand<-predict(m1, interval="predict")
## Warning in predict.lm(m1, interval = "predict"): predictions on current data refer to _future_ responses
colnames(predBand)<-c("fit2", "lwr2", "upr2")
newDF<-cbind(quakes, confBand, predBand)
ggplot(newDF, aes(x=mag, y=stations))+
geom_point(alpha=.3)+
geom_abline(slope=m1$coefficients[2], intercept=m1$coefficients[1],
color="blue", lty=2, lwd=1)+
geom_line(aes(y=lwr), color="green", lty=2, lwd=1)+
geom_line(aes(y=upr), color="green", lty=2, lwd=1)+
geom_line(aes(y=lwr2), color="red", lty=2, lwd=1)+
geom_line(aes(y=upr2), color="red", lty=2, lwd=1)+
theme_bw()
anova(m1)
## Analysis of Variance Table
##
## Response: stations
## Df Sum Sq Mean Sq F value Pr(>F)
## mag 1 347148 347148 2624.7 < 2.2e-16 ***
## Residuals 998 132000 132
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1