First, lets read the data.

data <- read.csv(file = "earthquakes.csv")
data <- data[, c("time", "latitude", "longitude", "depth", "mag", "nst", "gap", "dmin", "rms")]
summary(data)
##                   time           latitude        longitude     
##  2017-01-01 03:51:07:     2   Min.   :-64.99   Min.   :-180.0  
##  2017-01-01 07:40:33:     2   1st Qu.: 35.64   1st Qu.:-149.0  
##  2017-01-01 22:20:16:     2   Median : 38.79   Median :-119.0  
##  2017-01-02 01:07:42:     2   Mean   : 40.49   Mean   :-111.9  
##  2017-01-03 05:11:52:     2   3rd Qu.: 57.83   3rd Qu.:-116.5  
##  2017-01-03 07:19:29:     2   Max.   : 87.00   Max.   : 180.0  
##  (Other)            :126943                                    
##      depth             mag              nst              gap       
##  Min.   : -3.47   Min.   :-9.990   Min.   :  0.00   Min.   :  9.0  
##  1st Qu.:  3.70   1st Qu.: 0.790   1st Qu.:  8.00   1st Qu.: 75.0  
##  Median :  8.60   Median : 1.300   Median : 14.00   Median :113.0  
##  Mean   : 25.44   Mean   : 1.599   Mean   : 18.17   Mean   :128.7  
##  3rd Qu.: 17.30   3rd Qu.: 2.000   3rd Qu.: 23.00   3rd Qu.:168.0  
##  Max.   :664.03   Max.   : 8.200   Max.   :259.00   Max.   :359.0  
##                   NA's   :16       NA's   :55580    NA's   :37947  
##       dmin             rms        
##  Min.   :  0.00   Min.   :0.0000  
##  1st Qu.:  0.03   1st Qu.:0.0916  
##  Median :  0.07   Median :0.1900  
##  Mean   :  0.65   Mean   :0.3168  
##  3rd Qu.:  0.26   3rd Qu.:0.4900  
##  Max.   :141.16   Max.   :4.0700  
##  NA's   :39084    NA's   :49

Lets’ try to plot data on the world map.

library(ggmap)
## Loading required package: ggplot2
ggplot(data = data) +
  borders("world", colour="gray50", fill="gray50") +
  geom_point(aes(x=longitude, y=latitude), alpha = 0.1, color="red", size=0.5)

Seems like lot of earthquakes are eather along the continent coastline og the Pacific ocean, eather along the rim of tectonic plates in the ocean.

Now lets plot count of the eqarthquakes by date.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
data %>%
  group_by(date = as.Date(time)) %>%
  summarise(count = n())  %>%
  ggplot() +
  geom_point(mapping = aes(x = date, y = count)) +
  scale_x_date(name = 'date', date_breaks = '1 month', date_labels = '%b')

We can see some seasonic trend: less in Febriary, more in May, less in June, more in August, etc.

Lets see how numeric data is correlated.

library(GGally)
## 
## Attaching package: 'GGally'
## The following object is masked from 'package:dplyr':
## 
##     nasa
df <- data[, c("depth", "mag", "nst", "gap", "dmin", "rms")]
df <- df[complete.cases(df),]
ggpairs(df)

We don’t see much of correlation here.

Now we can try to group earthquakes by geographic zones according to the latitude

as.geo.zone <- function(lat) {
  
  levels <- c("tropics", "subtropics", "temperate", "frigid")
  regions <- factor(x = levels, levels = levels, ordered =T)
  
  if (abs(lat) < 23.5) {
    result <- regions[1]
  }
  
  else if (abs(lat) < 35) {
    result <- regions[2]
  }
  else if (abs(lat) < 66.5) {
    result <- regions[3]
  }
  else {
    result <- regions[4]
  }
  return(result)
}

data$zone <-unlist(lapply(data$latitude, as.geo.zone))

df2 <- data %>%
  group_by(date = as.Date(time), zone = data$zone) %>%
  summarise(count = n())

ggplot(df2, aes(x = date, y = count, color = df2$zone)) +
  geom_point() +
  scale_x_date(name = 'date', date_breaks = '1 month', date_labels = '%b') + 
  scale_color_brewer(palette="Set1")

We see that most earthquakes happends in the temperate geographic zone, and also they are most variative.