This is an hypothesis testing of the ‘quakes’ data in R. To determine the correlation between the Earth quake epicenter to surface distance versus the Magnitude of the earth quake.
row.has.na3 <- apply(quakes, 1, function(x){any(is.na(x))})
sum(row.has.na3)
## [1] 0
filter.quakes <- row.has.na3[!row.has.na3]
library(plyr)
library(ggplot2)
str(quakes)
## 'data.frame': 1000 obs. of 5 variables:
## $ lat : num -20.4 -20.6 -26 -18 -20.4 ...
## $ long : num 182 181 184 182 182 ...
## $ depth : int 562 650 42 626 649 195 82 194 211 622 ...
## $ mag : num 4.8 4.2 5.4 4.1 4 4 4.8 4.4 4.7 4.3 ...
## $ stations: int 41 15 43 19 11 12 43 15 35 19 ...
cor.test(quakes$depth,quakes$mag)
##
## Pearson's product-moment correlation
##
## data: quakes$depth and quakes$mag
## t = -7.488, df = 998, p-value = 1.535e-13
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
## -0.2885057 -0.1710909
## sample estimates:
## cor
## -0.2306377
plot(quakes$depth,quakes$mag)
cor(quakes$depth, quakes$mag, method = "spearman")
## [1] -0.2666593
cor(quakes$depth, quakes$mag, method = "kendall")
## [1] -0.1863759
cor(quakes$depth, quakes$mag, method = "pearson")
## [1] -0.2306377
There exists a Negative not a signifiant co rrelation between the variables. plotting graphs
library(graphics)
pairs(quakes, panel = panel.smooth, main = "quakes data")
## Understanding the accuracy with Observed and Predicted
The goodness of fit of the data
mod1 <- lm(quakes$depth ~ quakes$mag, data=quakes)
summary(mod1)
##
## Call:
## lm(formula = quakes$depth ~ quakes$mag, data = quakes)
##
## Residuals:
## Min 1Q Median 3Q Max
## -318.26 -191.44 -57.56 213.42 473.56
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 881.63 76.44 11.533 < 2e-16 ***
## quakes$mag -123.42 16.48 -7.488 1.54e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 209.8 on 998 degrees of freedom
## Multiple R-squared: 0.05319, Adjusted R-squared: 0.05225
## F-statistic: 56.07 on 1 and 998 DF, p-value: 1.535e-13
plot(mod1)
mod2 <- lm(quakes$depth ~ quakes$mag + quakes$stations, data=quakes)
summary(mod2)
##
## Call:
## lm(formula = quakes$depth ~ quakes$mag + quakes$stations, data = quakes)
##
## Residuals:
## Min 1Q Median 3Q Max
## -415.45 -174.36 -50.73 200.94 452.06
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1673.1340 125.5069 13.331 < 2e-16 ***
## quakes$mag -326.4578 30.4971 -10.705 < 2e-16 ***
## quakes$stations 4.3869 0.5609 7.822 1.32e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 203.8 on 997 degrees of freedom
## Multiple R-squared: 0.1079, Adjusted R-squared: 0.1061
## F-statistic: 60.31 on 2 and 997 DF, p-value: < 2.2e-16
summary(mod1$residuals)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -318.30 -191.40 -57.56 0.00 213.40 473.60
hist(mod1$residuals)
## Plot correlation between Magnitude of earth quake and Depth of epi centre from the surface
ggplot(quakes, aes(x = depth, y = mag)) +
xlab("depth") +
ylab("mag") +
geom_point() +
geom_line() +
ggtitle("Relationship between 'depth' and 'mag'") +
stat_smooth(method = "loess", formula = y ~ x, size = 1, col = "blue")