Analysis of the quakes data set:

Locations of Earthquakes off Fiji

The data set give the locations of 1000 seismic events of MB > 4.0. The events occurred in a cube near Fiji since 1964.

#install.packages("ggplot2")
require(ggplot2)
## Loading required package: ggplot2
#Inspect data
head(quakes)
##      lat   long depth mag stations
## 1 -20.42 181.62   562 4.8       41
## 2 -20.62 181.03   650 4.2       15
## 3 -26.00 184.10    42 5.4       43
## 4 -17.97 181.66   626 4.1       19
## 5 -20.42 181.96   649 4.0       11
## 6 -19.68 184.31   195 4.0       12
str(quakes)
## 'data.frame':    1000 obs. of  5 variables:
##  $ lat     : num  -20.4 -20.6 -26 -18 -20.4 ...
##  $ long    : num  182 181 184 182 182 ...
##  $ depth   : int  562 650 42 626 649 195 82 194 211 622 ...
##  $ mag     : num  4.8 4.2 5.4 4.1 4 4 4.8 4.4 4.7 4.3 ...
##  $ stations: int  41 15 43 19 11 12 43 15 35 19 ...
summary(quakes)
##       lat              long           depth            mag      
##  Min.   :-38.59   Min.   :165.7   Min.   : 40.0   Min.   :4.00  
##  1st Qu.:-23.47   1st Qu.:179.6   1st Qu.: 99.0   1st Qu.:4.30  
##  Median :-20.30   Median :181.4   Median :247.0   Median :4.60  
##  Mean   :-20.64   Mean   :179.5   Mean   :311.4   Mean   :4.62  
##  3rd Qu.:-17.64   3rd Qu.:183.2   3rd Qu.:543.0   3rd Qu.:4.90  
##  Max.   :-10.72   Max.   :188.1   Max.   :680.0   Max.   :6.40  
##     stations     
##  Min.   : 10.00  
##  1st Qu.: 18.00  
##  Median : 27.00  
##  Mean   : 33.42  
##  3rd Qu.: 42.00  
##  Max.   :132.00
#boxplot quake magnitudes: few outliers
boxplot(quakes$mag)

#histogram of magnitude frequency: clear bellcurve with longer right tail
ggplot(data = quakes) + geom_histogram(aes(x = mag), bin = .1, fill = 'grey30')

#boxplot quake depths: no outliers, large IQR
boxplot(quakes$depth)

#histogram with frequency of depth: large IQR due to two different depth curves
ggplot(quakes) + geom_histogram(aes(x = depth), bin = 7, fill = 'grey30')

#boxplot quake stations reporting: large group of outliers...
boxplot(quakes$stations)

#histogram of stations reporting: most quakes under 50 reports, but long tail of higher counts
ggplot(quakes) + geom_histogram(aes(x = stations), bin = 3, fill = 'grey30')

#scatter plot of quake depth and mag: no clear eye catching conclusions
ggplot(quakes) + geom_point(aes(x = depth, y = mag))

#splitting depth curves to study each...
low_depth_quakes <- quakes[which(quakes$depth > 400), ]
high_depth_quakes <- quakes[which(quakes$depth <= 400), ]

#weak pearson correlation between magnitude and second depth curve 
cor(high_depth_quakes$depth, high_depth_quakes$mag)
## [1] -0.3296122
#nearly 0 pearson correlation between maginitudee and first depth curve
cor(low_depth_quakes$depth, low_depth_quakes$mag)
## [1] 0.001745948
#scatter plot of depth by stations reporting: no clear eye catching conclusions
ggplot(quakes) + geom_point(aes(x = depth, y = stations))

#very weak pearson correlation between stations reporting and second depth curve 
cor(high_depth_quakes$depth, high_depth_quakes$stations)
## [1] -0.1712382
#nearly 0 pearson correlation between stations reporting and first depth curve
cor(low_depth_quakes$depth, low_depth_quakes$stations)
## [1] -0.005546236
#scatter plot of magnitude by stations reporting: magnitude appears to positively affect station reporting
ggplot(quakes) + geom_point(aes(x = mag, y = stations))

#strong positive pearson correlation between magnitude and stations reporting
cor(quakes$mag,quakes$stations)
## [1] 0.8511824
#faceted scatter plot of magnitude by long/lat: lower magnitude quakes are more typically east of mainland
ggplot(quakes) + geom_point(aes(x = long, y = lat, color = mag)) + facet_wrap(~mag)

#bin depth data
quakes$depthbin9 <- cut_number(quakes$depth, 9)

#faceted scatter plot of depth by long/lat: higher depth quakes are typically east of mainland
ggplot(quakes) + geom_point(aes(x = long, y = lat, color = depthbin9)) + facet_wrap(~depthbin9)