data source

https://earthquake.usgs.gov/earthquakes/feed/v1.0/csv.php

Magnitude classifications : Low, Middle, Moderate, High

## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:lubridate':
## 
##     hour, isoweek, mday, minute, month, quarter, second, wday, week,
##     yday, year
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following object is masked from 'package:purrr':
## 
##     transpose
## [1] 10264    22
## [1] 5988
## Classes 'data.table' and 'data.frame':   10264 obs. of  22 variables:
##  $ time           : chr  "2019-12-30T19:42:21.073Z" "2019-12-30T19:37:29.590Z" "2019-12-30T19:36:32.570Z" "2019-12-30T19:34:34.199Z" ...
##  $ latitude       : num  65.5 38.8 59.2 63.5 36 ...
##  $ longitude      : num  -154 -123 -136 -147 -117 ...
##  $ depth          : num  40.4 3.47 4.2 74.8 2.62 ...
##  $ mag            : num  2.7 0.81 2.6 1.6 1.37 2.06 2.62 1.9 2.51 2.5 ...
##  $ magType        : chr  "ml" "md" "ml" "ml" ...
##  $ nst            : int  NA 20 NA NA 19 29 16 NA 14 NA ...
##  $ gap            : num  NA 58 NA NA 97 148 45 NA 216 NA ...
##  $ dmin           : num  NA 0.0116 NA NA 0.1152 ...
##  $ rms            : num  0.98 0.02 0.84 0.66 0.13 0.11 0.18 0.59 0.18 0.52 ...
##  $ net            : chr  "ak" "nc" "ak" "ak" ...
##  $ id             : chr  "ak019gqb6lyf" "nc73321516" "ak019gqb5civ" "ak019gqb4x9b" ...
##  $ updated        : chr  "2019-12-30T19:46:18.329Z" "2019-12-30T19:47:01.850Z" "2019-12-30T19:46:17.853Z" "2019-12-30T19:40:05.615Z" ...
##  $ place          : chr  "91km WNW of Tanana, Alaska" "6km NW of The Geysers, CA" "45km W of Haines, Alaska" "80km SW of Delta Junction, Alaska" ...
##  $ type           : chr  "earthquake" "earthquake" "earthquake" "earthquake" ...
##  $ horizontalError: num  NA 0.23 NA NA 0.28 1.31 0.57 NA 0.66 NA ...
##  $ depthError     : num  20.8 0.59 0.2 1.9 1.01 1.13 1.35 0.1 0.4 0.5 ...
##  $ magError       : num  NA 0.06 NA NA 0.115 0.02 0.23 NA 0.25 NA ...
##  $ magNst         : int  NA 3 NA NA 13 6 17 NA 12 NA ...
##  $ status         : chr  "automatic" "automatic" "automatic" "automatic" ...
##  $ locationSource : chr  "ak" "nc" "ak" "ak" ...
##  $ magSource      : chr  "ak" "nc" "ak" "ak" ...
##  - attr(*, ".internal.selfref")=<externalptr>

time: time when the event occurred latitude: decimal degrees latitude. Negative values for southern latitudes. Range is [-90.0,90.0] longitude: decimal degrees longitude. Negative values for western longitudes. [-180.0,180.0] depth: depth of the event in kilometers mag: magnitude for the event. Range [-1.0, 10.0] magType: method or algorithm used to calculate the preferred magnitude for the event nst: total number of seismic stations used to determine earthquake location gap: largest azimuthal gap between azimuthally adjacent stations (in degrees) dmin: horizontal distance from the epicenter to the nearest station (in degrees) rms: root-mean-square (RMS) travel time residual, in sec, using all weights net: ID of a data contributor. Identifies the network considered to be the preferred source of information for this event id: a unique identifier for the event. This is the current preferred id for the event, and may change over time updated: time when the event was most recently updated place: textual description of named geographic region near to the event. This may be a city name, or a Flinn-Engdahl Region name type: type of seismic event horizontalError: uncertainty of reported location of the event in kilometers depthError: uncertainty of reported depth of the event in kilometers magError: uncertainty of reported magnitude of the event magNst: total number of seismic stations used to calculate the magnitude for this earthquake status: indicates whether the event has been reviewed by a human locationSource: network that originally authored the reported location of this event magSource: network that originally authored the reported magnitude for this event

# Missing data # horizontalError: uncertainty of reported location of the event in kilometers # nst: total number of seismic stations used to determine earthquake location # dmin: horizontal distance from the epicenter to the nearest station (in degrees) # magError: uncertainty of reported magnitude of the event # magNst: total number of seismic stations used to calculate the magnitude for this earthquake # gap: largest azimuthal gap between azimuthally adjacent stations (in degrees)

#rms: root-mean-square (RMS) travel time residual, in sec, using all weights #mag: magnitude for the event. Range [-1.0, 10.0]

missing data to be be removed : rms & mag. These value are independent and critical values

the rest will be imputed, sinces they are mostly different method of recording activities

Bracketing earthquake magnitudes into brackets

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ca <- all_clean[str_detect(all_clean$place, "CA"),]
str(ca)
## 'data.frame':    4060 obs. of  27 variables:
##  $ time           : chr  "2019-12-30T19:37:29.590Z" "2019-12-30T19:27:05.370Z" "2019-12-30T19:15:20.730Z" "2019-12-30T18:53:16.840Z" ...
##  $ latitude       : num  38.8 36 39.8 33.6 33.6 ...
##  $ longitude      : num  -123 -117 -122 -117 -117 ...
##  $ depth          : num  3.47 2.62 17.1 5.32 3.35 ...
##  $ mag            : num  0.81 1.37 2.62 0.74 2.51 0.93 0.13 0.57 0.92 0.81 ...
##  $ magType        : chr  "md" "ml" "md" "ml" ...
##  $ nst            : num  20 19 16 20 93 11 10 17 24 25 ...
##  $ gap            : num  58 97 45 75 15 100 100 60 68 70 ...
##  $ dmin           : num  0.0116 0.1152 0.1979 0.0383 0.0455 ...
##  $ rms            : num  0.02 0.13 0.18 0.12 0.23 0.03 0.07 0.03 0.24 0.19 ...
##  $ net            : chr  "nc" "ci" "nc" "ci" ...
##  $ id             : chr  "nc73321516" "ci39017855" "nc73321511" "ci39017839" ...
##  $ updated        : chr  "2019-12-30T19:47:01.850Z" "2019-12-30T19:30:48.527Z" "2019-12-30T19:36:28.521Z" "2019-12-30T18:56:56.463Z" ...
##  $ place          : chr  "6km NW of The Geysers, CA" "24km N of Searles Valley, CA" "5km NNE of Hamilton City, CA" "13km WNW of Anza, CA" ...
##  $ type           : chr  "earthquake" "earthquake" "earthquake" "earthquake" ...
##  $ horizontalError: num  0.23 0.28 0.57 0.45 0.18 0.96 0.62 0.25 0.36 0.52 ...
##  $ depthError     : num  0.59 1.01 1.35 0.82 0.59 1.16 0.9 0.52 1.01 0.68 ...
##  $ magError       : 'impute' num  0.06 0.115 0.23 0.275 0.107 0.29 0.242 0.01 0.116 0.216 ...
##   ..- attr(*, "imputed")= int  52 61 133 171 220 222 228 242 262 280 ...
##  $ magNst         : num  3 13 17 5 26 10 9 3 11 25 ...
##  $ status         : chr  "automatic" "automatic" "automatic" "automatic" ...
##  $ locationSource : chr  "nc" "ci" "nc" "ci" ...
##  $ magSource      : chr  "nc" "ci" "nc" "ci" ...
##  $ class          : Factor w/ 7 levels "Negative","Zero",..: 2 3 4 2 4 2 2 2 2 2 ...
##  $ date           : POSIXct, format: "2019-12-30 19:37:29" "2019-12-30 19:27:05" ...
##  $ year           : int  2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
##  $ month          : int  12 12 12 12 12 12 12 12 12 12 ...
##  $ day            : int  30 30 30 30 30 30 30 30 30 30 ...
##  - attr(*, ".internal.selfref")=<externalptr>
plot_density(ca$mag)

plot_histogram(ca$mag)

summary(ca)
## 
##  126 values imputed to 0.2008374
##      time              latitude       longitude          depth       
##  Length:4060        Min.   :32.55   Min.   :-124.7   Min.   :-2.350  
##  Class :character   1st Qu.:35.60   1st Qu.:-118.9   1st Qu.: 2.570  
##  Mode  :character   Median :35.86   Median :-117.7   Median : 5.040  
##                     Mean   :36.03   Mean   :-118.6   Mean   : 5.952  
##                     3rd Qu.:37.43   3rd Qu.:-117.5   3rd Qu.: 8.340  
##                     Max.   :41.52   Max.   :-115.5   Max.   :42.040  
##                                                                      
##       mag           magType               nst              gap        
##  Min.   :-0.680   Length:4060        Min.   :  0.00   Min.   : 11.00  
##  1st Qu.: 0.660   Class :character   1st Qu.: 12.00   1st Qu.: 62.00  
##  Median : 0.960   Mode  :character   Median : 17.00   Median : 84.00  
##  Mean   : 1.047                      Mean   : 20.35   Mean   : 96.74  
##  3rd Qu.: 1.340                      3rd Qu.: 25.00   3rd Qu.:124.00  
##  Max.   : 4.290                      Max.   :144.00   Max.   :354.00  
##                                                                       
##       dmin                rms             net                 id           
##  Min.   :0.0002729   Min.   :0.0000   Length:4060        Length:4060       
##  1st Qu.:0.0212175   1st Qu.:0.0600   Class :character   Class :character  
##  Median :0.0466200   Median :0.1400   Mode  :character   Mode  :character  
##  Mean   :0.0526344   Mean   :0.1266                                        
##  3rd Qu.:0.0736700   3rd Qu.:0.1800                                        
##  Max.   :0.6198000   Max.   :0.3800                                        
##                                                                            
##    updated             place               type           horizontalError 
##  Length:4060        Length:4060        Length:4060        Min.   :0.0900  
##  Class :character   Class :character   Class :character   1st Qu.:0.2400  
##  Mode  :character   Mode  :character   Mode  :character   Median :0.3200  
##                                                           Mean   :0.3911  
##                                                           3rd Qu.:0.4200  
##                                                           Max.   :7.0800  
##                                                                           
##    depthError         magError          magNst          status         
##  Min.   : 0.1000   Min.   :0.0010   Min.   :  0.00   Length:4060       
##  1st Qu.: 0.4500   1st Qu.:0.1210   1st Qu.:  6.75   Class :character  
##  Median : 0.6200   Median :0.1610   Median : 11.00   Mode  :character  
##  Mean   : 1.3229   Mean   :0.1673   Mean   : 14.37                     
##  3rd Qu.: 0.8625   3rd Qu.:0.2008   3rd Qu.: 19.00                     
##  Max.   :31.6100   Max.   :0.8300   Max.   :304.00                     
##                                                                        
##  locationSource      magSource              class     
##  Length:4060        Length:4060        Negative:  42  
##  Class :character   Class :character   Zero    :2140  
##  Mode  :character   Mode  :character   First   :1611  
##                                        Second  : 241  
##                                        Third   :  24  
##                                        Fourth  :   2  
##                                        Fifth   :   0  
##       date                          year          month            day       
##  Min.   :2019-11-30 19:58:15   Min.   :2019   Min.   :11.00   Min.   : 1.00  
##  1st Qu.:2019-12-08 03:52:45   1st Qu.:2019   1st Qu.:12.00   1st Qu.: 8.00  
##  Median :2019-12-15 03:13:44   Median :2019   Median :12.00   Median :15.00  
##  Mean   :2019-12-15 09:34:30   Mean   :2019   Mean   :11.99   Mean   :15.12  
##  3rd Qu.:2019-12-22 04:37:38   3rd Qu.:2019   3rd Qu.:12.00   3rd Qu.:22.00  
##  Max.   :2019-12-30 19:37:29   Max.   :2019   Max.   :12.00   Max.   :30.00  
## 
plot_missing(ca)

profile_missing(ca)
##            feature num_missing pct_missing
## 1             time           0           0
## 2         latitude           0           0
## 3        longitude           0           0
## 4            depth           0           0
## 5              mag           0           0
## 6          magType           0           0
## 7              nst           0           0
## 8              gap           0           0
## 9             dmin           0           0
## 10             rms           0           0
## 11             net           0           0
## 12              id           0           0
## 13         updated           0           0
## 14           place           0           0
## 15            type           0           0
## 16 horizontalError           0           0
## 17      depthError           0           0
## 18        magError           0           0
## 19          magNst           0           0
## 20          status           0           0
## 21  locationSource           0           0
## 22       magSource           0           0
## 23           class           0           0
## 24            date           0           0
## 25            year           0           0
## 26           month           0           0
## 27             day           0           0
plot_str(ca)
str(ca)
## 'data.frame':    4060 obs. of  27 variables:
##  $ time           : chr  "2019-12-30T19:37:29.590Z" "2019-12-30T19:27:05.370Z" "2019-12-30T19:15:20.730Z" "2019-12-30T18:53:16.840Z" ...
##  $ latitude       : num  38.8 36 39.8 33.6 33.6 ...
##  $ longitude      : num  -123 -117 -122 -117 -117 ...
##  $ depth          : num  3.47 2.62 17.1 5.32 3.35 ...
##  $ mag            : num  0.81 1.37 2.62 0.74 2.51 0.93 0.13 0.57 0.92 0.81 ...
##  $ magType        : chr  "md" "ml" "md" "ml" ...
##  $ nst            : num  20 19 16 20 93 11 10 17 24 25 ...
##  $ gap            : num  58 97 45 75 15 100 100 60 68 70 ...
##  $ dmin           : num  0.0116 0.1152 0.1979 0.0383 0.0455 ...
##  $ rms            : num  0.02 0.13 0.18 0.12 0.23 0.03 0.07 0.03 0.24 0.19 ...
##  $ net            : chr  "nc" "ci" "nc" "ci" ...
##  $ id             : chr  "nc73321516" "ci39017855" "nc73321511" "ci39017839" ...
##  $ updated        : chr  "2019-12-30T19:47:01.850Z" "2019-12-30T19:30:48.527Z" "2019-12-30T19:36:28.521Z" "2019-12-30T18:56:56.463Z" ...
##  $ place          : chr  "6km NW of The Geysers, CA" "24km N of Searles Valley, CA" "5km NNE of Hamilton City, CA" "13km WNW of Anza, CA" ...
##  $ type           : chr  "earthquake" "earthquake" "earthquake" "earthquake" ...
##  $ horizontalError: num  0.23 0.28 0.57 0.45 0.18 0.96 0.62 0.25 0.36 0.52 ...
##  $ depthError     : num  0.59 1.01 1.35 0.82 0.59 1.16 0.9 0.52 1.01 0.68 ...
##  $ magError       : 'impute' num  0.06 0.115 0.23 0.275 0.107 0.29 0.242 0.01 0.116 0.216 ...
##   ..- attr(*, "imputed")= int  52 61 133 171 220 222 228 242 262 280 ...
##  $ magNst         : num  3 13 17 5 26 10 9 3 11 25 ...
##  $ status         : chr  "automatic" "automatic" "automatic" "automatic" ...
##  $ locationSource : chr  "nc" "ci" "nc" "ci" ...
##  $ magSource      : chr  "nc" "ci" "nc" "ci" ...
##  $ class          : Factor w/ 7 levels "Negative","Zero",..: 2 3 4 2 4 2 2 2 2 2 ...
##  $ date           : POSIXct, format: "2019-12-30 19:37:29" "2019-12-30 19:27:05" ...
##  $ year           : int  2019 2019 2019 2019 2019 2019 2019 2019 2019 2019 ...
##  $ month          : int  12 12 12 12 12 12 12 12 12 12 ...
##  $ day            : int  30 30 30 30 30 30 30 30 30 30 ...
##  - attr(*, ".internal.selfref")=<externalptr>
numeric_vars <- names(which(sapply(all_clean, class) == "numeric"))
integer_vars <- names(which(sapply(all_clean, class) == "integer"))
factor_vars <- names(which(sapply(all_clean,class)=="factor"))
character_vars <- names(which(sapply(all_clean, class) == "character"))

sapply(all_clean[, numeric_vars], summary)
##          latitude longitude     depth       mag       rms
## Min.    -60.75650 -179.9702 -10.00000 -1.380000 0.0000000
## 1st Qu.  33.77425 -148.8526   3.67000  0.870000 0.1200000
## Median   36.61913 -117.8538   7.68000  1.400000 0.1800000
## Mean     39.24526 -112.2350  20.03481  1.610608 0.2884168
## 3rd Qu.  52.53548 -116.2590  15.71500  2.050000 0.4100000
## Max.     83.32290  179.7916 623.63000  6.800000 4.4400000
describe(all_clean[, numeric_vars])
##           vars     n    mean    sd  median trimmed   mad     min    max  range
## latitude     1 10264   39.25 17.83   36.62   40.13 10.54  -60.76  83.32 144.08
## longitude    2 10264 -112.24 59.03 -117.85 -122.98 10.92 -179.97 179.79 359.76
## depth        3 10264   20.03 43.09    7.68   11.14  7.24  -10.00 623.63 633.63
## mag          4 10264    1.61  1.15    1.40    1.47  0.87   -1.38   6.80   8.18
## rms          5 10264    0.29  0.26    0.18    0.25  0.15    0.00   4.44   4.44
##            skew kurtosis   se
## latitude  -0.95     3.06 0.18
## longitude  3.11    10.66 0.58
## depth      7.34    77.40 0.43
## mag        1.17     1.54 0.01
## rms        1.91     8.37 0.00
pairs(all_clean[,numeric_vars])

class_zero <- filter(ca,mag>0)
class_one <- filter(ca, mag>=1)
class_two <- filter(ca,mag>=2 )
class_three <- filter(ca, mag>=3 )
class_four <- filter(ca,mag>=4 )
#hist_three <- class_three %>% select(mag, depth,class)
#ggplot(data = hist_three, aes(x = mag, y = depth,color=class)) + geom_point() + guides(fill=FALSE)


class_zero <- filter(all_clean,mag >0)
ggplot(data=class_zero,aes(x=mag,y=depth,color=class))+geom_point()

Mapping of earthquakes

## Registered S3 method overwritten by 'xts':
##   method     from
##   as.zoo.xts zoo
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use

Time series: grouping into each date with average magnitute regardless of location.