Analysis of the quakes data set:
Locations of Earthquakes off Fiji
The data set give the locations of 1000 seismic events of MB > 4.0. The events occurred in a cube near Fiji since 1964.
#install.packages("ggplot2")
require(ggplot2)
## Loading required package: ggplot2
#Inspect data
head(quakes)
## lat long depth mag stations
## 1 -20.42 181.62 562 4.8 41
## 2 -20.62 181.03 650 4.2 15
## 3 -26.00 184.10 42 5.4 43
## 4 -17.97 181.66 626 4.1 19
## 5 -20.42 181.96 649 4.0 11
## 6 -19.68 184.31 195 4.0 12
str(quakes)
## 'data.frame': 1000 obs. of 5 variables:
## $ lat : num -20.4 -20.6 -26 -18 -20.4 ...
## $ long : num 182 181 184 182 182 ...
## $ depth : int 562 650 42 626 649 195 82 194 211 622 ...
## $ mag : num 4.8 4.2 5.4 4.1 4 4 4.8 4.4 4.7 4.3 ...
## $ stations: int 41 15 43 19 11 12 43 15 35 19 ...
summary(quakes)
## lat long depth mag
## Min. :-38.59 Min. :165.7 Min. : 40.0 Min. :4.00
## 1st Qu.:-23.47 1st Qu.:179.6 1st Qu.: 99.0 1st Qu.:4.30
## Median :-20.30 Median :181.4 Median :247.0 Median :4.60
## Mean :-20.64 Mean :179.5 Mean :311.4 Mean :4.62
## 3rd Qu.:-17.64 3rd Qu.:183.2 3rd Qu.:543.0 3rd Qu.:4.90
## Max. :-10.72 Max. :188.1 Max. :680.0 Max. :6.40
## stations
## Min. : 10.00
## 1st Qu.: 18.00
## Median : 27.00
## Mean : 33.42
## 3rd Qu.: 42.00
## Max. :132.00
#boxplot quake magnitudes: few outliers
boxplot(quakes$mag)
#histogram of magnitude frequency: clear bellcurve with longer right tail
ggplot(data = quakes) + geom_histogram(aes(x = mag), bin = .1, fill = 'grey30')
#boxplot quake depths: no outliers, large IQR
boxplot(quakes$depth)
#histogram with frequency of depth: large IQR due to two different depth curves
ggplot(quakes) + geom_histogram(aes(x = depth), bin = 7, fill = 'grey30')
#boxplot quake stations reporting: large group of outliers...
boxplot(quakes$stations)
#histogram of stations reporting: most quakes under 50 reports, but long tail of higher counts
ggplot(quakes) + geom_histogram(aes(x = stations), bin = 3, fill = 'grey30')
#scatter plot of quake depth and mag: no clear eye catching conclusions
ggplot(quakes) + geom_point(aes(x = depth, y = mag))
#splitting depth curves to study each...
low_depth_quakes <- quakes[which(quakes$depth > 400), ]
high_depth_quakes <- quakes[which(quakes$depth <= 400), ]
#weak pearson correlation between magnitude and second depth curve
cor(high_depth_quakes$depth, high_depth_quakes$mag)
## [1] -0.3296122
#nearly 0 pearson correlation between maginitudee and first depth curve
cor(low_depth_quakes$depth, low_depth_quakes$mag)
## [1] 0.001745948
#scatter plot of depth by stations reporting: no clear eye catching conclusions
ggplot(quakes) + geom_point(aes(x = depth, y = stations))
#very weak pearson correlation between stations reporting and second depth curve
cor(high_depth_quakes$depth, high_depth_quakes$stations)
## [1] -0.1712382
#nearly 0 pearson correlation between stations reporting and first depth curve
cor(low_depth_quakes$depth, low_depth_quakes$stations)
## [1] -0.005546236
#scatter plot of magnitude by stations reporting: magnitude appears to positively affect station reporting
ggplot(quakes) + geom_point(aes(x = mag, y = stations))
#strong positive pearson correlation between magnitude and stations reporting
cor(quakes$mag,quakes$stations)
## [1] 0.8511824
#faceted scatter plot of magnitude by long/lat: lower magnitude quakes are more typically east of mainland
ggplot(quakes) + geom_point(aes(x = long, y = lat, color = mag)) + facet_wrap(~mag)
#bin depth data
quakes$depthbin9 <- cut_number(quakes$depth, 9)
#faceted scatter plot of depth by long/lat: higher depth quakes are typically east of mainland
ggplot(quakes) + geom_point(aes(x = long, y = lat, color = depthbin9)) + facet_wrap(~depthbin9)