Image

USGS Earthquake and Hazards Program, 10/17/1989

Introduction & Variable Definitions

The dataset contains data from the United States Geological Survey (USGS), the dataset contains 8394 observations with 18 variables. I chose this topic as I want to explore what factors may tie into the significance of an earthquake, this leads to the question of, what factors determine the significance value for an earthquake? This is important to look at as significance may not be as simple as many believe, many may view it as just damages they may notice, but more factors could be at play, while reports from people are a factor of significance there is much more that influences this value. Knowing this can help not only myself but others understand what leads to an earthquake event being more significant than another.

Variables

The variables are id, which is the name/id given to the earthquake, impact.gap, which is the azimuthal gap, the value for it represents the largest angle between two stations, smaller values are more accurate (0-180), values that exceed 180 may include some uncertainty, impact.magnitude, which is the magnitude of the earthquake on a scale from 0 to 10, with 0 being the weak (and possibly small) and 10 being powerful, impact.significance is how significant the event was, larger numbers meaning they’re more significant and smaller ones meaning they’re not, scale from 0 to 1000, significance is calculated from many factors such as magnitude, MMI (Modified Mercalli Intensity, the effects of an earthquake, ex. damage, structural damage, human experiences), reports, and estimated impact, location.depth is the depth of the earthquake in km, location.distance is the distance of the earthquake from the reporting station, this is measured in degrees, 1 degree is about 111.2 km, the smaller the number the more reliable the calculated depth, location.full is the full name of the location of the earthquake, location.latitude is the latitude of the earthquake (-90 (south) to 90 (north)), location.longitude is the longitude of the earthquake (-180 (west) to 180 (east)), location.name is the name of the state or country the earthquake occurred in, time.day is the day of the month the earthquake occurred, time.epoch is the time that the earthquake occurred in seconds from 1/1/1970, time.full is the full date and time of when the event occurred, time.hour is the hour of when the event occurred, time.minute is the minute of when the event occurred, time.month is the month of when the event occurred, time.second is the second of when the event occurred, and time.year is the year of when the event occurred.

Load Libraries & Data

library(tidyverse)
library(highcharter)
library(RColorBrewer)
library(ggthemes)
library(GGally)

setwd("C:/Users/wesle/Downloads")
eqds <- readr::read_csv("earthquakes.csv")

Cleaning the Dataset

colSums(is.na(eqds)) # No NAs
##                  id          impact.gap    impact.magnitude impact.significance 
##                   0                   0                   0                   0 
##      location.depth   location.distance       location.full   location.latitude 
##                   0                   0                   0                   0 
##  location.longitude       location.name            time.day          time.epoch 
##                   0                   0                   0                   0 
##           time.full           time.hour         time.minute          time.month 
##                   0                   0                   0                   0 
##         time.second           time.year 
##                   0                   0
names(eqds) <- gsub("\\.","_",names(eqds)) # Changed column names from having . to separate words to _

Visualizations

colors <- brewer.pal(8, "Dark2") # Color list

highchart() |>
  hc_add_series(data = eqds, type = "column", hcaes(x = time_day, y = impact_significance, group = time_month)) |>
  hc_title(text = "Earthquake Significance vs Day seperated by Month") |>
  hc_caption(text = "USGS (US Geological Survey), Earthquake and Hazards Program") |>
  hc_xAxis(title = list(text = "Day of the Month")) |>
  hc_yAxis(title = list(text = "Earthquake Significance")) |>
  hc_colors(colors) |>
  hc_add_theme(hc_theme_bloom())
eqds1 <- eqds |>
  filter(impact_significance <= 1000) # One outlier with a significance over 1000 (2674)
highchart() |>
  hc_add_series(data = eqds1, type = "column", hcaes(x = time_day, y = impact_significance, group = time_month)) |>
  hc_title(text = "Earthquake Significance vs Day seperated by Month") |>
  hc_caption(text = "USGS (US Geological Survey), Earthquake and Hazards Program") |>
  hc_xAxis(title = list(text = "Day of the Month")) |>
  hc_yAxis(title = list(text = "Earthquake Significance")) |>
  hc_colors(colors) |>
  hc_add_theme(hc_theme_alone()) # Visualization without outlier
colors1 <- c("red", "orange", "yellow", "green", "blue", "purple", "pink", "brown", "black", "maroon", "mediumseagreen", "aquamarine", "violet", "gold", "darkslategrey", "khaki", "magenta", "tomato", "thistle", "turquoise", "olivedrab", "chartreuse","chocolate", "darkolivegreen", "rosybrown", "salmon", "white", "firebrick", "skyblue", "dodgerblue", "indianred")

highchart() |>
  hc_add_series(data = eqds1, type = "point", hcaes(x = impact_magnitude, y = impact_significance, group = time_day)) |>
  hc_title(text = 
             "Earthquake Significance vs Magnitude by Day of the Month") |>
  hc_caption(text = "USGS (US Geological Survey), Earthquake and Hazards Program") |>
  hc_xAxis(title = list(text = "Earthquake Magnitude")) |>
  hc_yAxis(title = list(text = "Earthquake Significance")) |>
  hc_colors(colors1) |>
  hc_add_theme(hc_theme_economist())

Correlation

cor(eqds1$impact_significance, eqds1$impact_magnitude)
## [1] 0.9439965

Multiple Linear Regression Model

mlrm <- lm(impact_significance ~ impact_gap + location_depth + location_latitude + location_longitude + location_distance + impact_magnitude, data = eqds1)

summary(mlrm)
## 
## Call:
## lm(formula = impact_significance ~ impact_gap + location_depth + 
##     location_latitude + location_longitude + location_distance + 
##     impact_magnitude, data = eqds1)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -136.68  -13.82   -2.87   10.82  619.34 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         2.606449   1.473678   1.769   0.0770 .  
## impact_gap         -0.025705   0.003714  -6.920 4.84e-12 ***
## location_depth      0.022874   0.006148   3.720   0.0002 ***
## location_latitude  -0.491819   0.019641 -25.040  < 2e-16 ***
## location_longitude  0.187942   0.005964  31.512  < 2e-16 ***
## location_distance   3.821198   0.223352  17.108  < 2e-16 ***
## impact_magnitude   62.745324   0.356850 175.831  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 25.9 on 8386 degrees of freedom
## Multiple R-squared:  0.9214, Adjusted R-squared:  0.9214 
## F-statistic: 1.639e+04 on 6 and 8386 DF,  p-value: < 2.2e-16

Equation

impact_significance = -0.025705(impact_gap) + 0.022874(location_depth) + -0.491819(location_latitude) + 0.187942(location_longitude) + 3.821198(location_distance) + 62.745324(impact_magnitude) + 2.606449

Correlation Plot

eqds2 <- eqds1 |>
  select(2:6, 8:9)

ggpairs(eqds2)

mlrm1 <- lm(impact_significance ~ impact_gap + location_depth + impact_magnitude, data = eqds1)

summary(mlrm1)
## 
## Call:
## lm(formula = impact_significance ~ impact_gap + location_depth + 
##     impact_magnitude, data = eqds1)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -47.29 -22.34  -4.21  17.57 576.19 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -62.751806   0.743295 -84.424  < 2e-16 ***
## impact_gap         0.036101   0.004088   8.830  < 2e-16 ***
## location_depth     0.028134   0.007184   3.916 9.06e-05 ***
## impact_magnitude  75.442243   0.319770 235.927  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 30.32 on 8389 degrees of freedom
## Multiple R-squared:  0.8923, Adjusted R-squared:  0.8923 
## F-statistic: 2.317e+04 on 3 and 8389 DF,  p-value: < 2.2e-16

Equation

impact_significance = 0.036101(impact_gap) + 0.028134(location_depth) + 75.442243(impact_magnitude) - 62.751806

Mutliple Linear Regression Model (With outlier)

mlrm2 <- lm(impact_significance ~ impact_gap + location_depth + location_latitude + location_longitude + location_distance + impact_magnitude, data = eqds)

summary(mlrm2)
## 
## Call:
## lm(formula = impact_significance ~ impact_gap + location_depth + 
##     location_latitude + location_longitude + location_distance + 
##     impact_magnitude, data = eqds)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -124.39  -14.11   -3.19   11.18 2292.37 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -0.272406   2.050730  -0.133    0.894    
## impact_gap         -0.025124   0.005170  -4.860  1.2e-06 ***
## location_depth      0.009869   0.008555   1.154    0.249    
## location_latitude  -0.457998   0.027334 -16.756  < 2e-16 ***
## location_longitude  0.190716   0.008301  22.974  < 2e-16 ***
## location_distance   3.412481   0.310819  10.979  < 2e-16 ***
## impact_magnitude   64.403447   0.496021 129.840  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 36.05 on 8387 degrees of freedom
## Multiple R-squared:  0.861,  Adjusted R-squared:  0.8609 
## F-statistic:  8661 on 6 and 8387 DF,  p-value: < 2.2e-16
mlrm3 <- lm(impact_significance ~ impact_gap + location_depth + impact_magnitude, data = eqds)

summary(mlrm3)
## 
## Call:
## lm(formula = impact_significance ~ impact_gap + location_depth + 
##     impact_magnitude, data = eqds)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
##  -45.43  -22.91   -4.20   17.90 2261.30 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)      -63.768482   0.958905 -66.501  < 2e-16 ***
## impact_gap         0.034217   0.005275   6.486 9.29e-11 ***
## location_depth     0.015014   0.009266   1.620    0.105    
## impact_magnitude  76.631962   0.412084 185.962  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 39.12 on 8390 degrees of freedom
## Multiple R-squared:  0.8364, Adjusted R-squared:  0.8363 
## F-statistic: 1.429e+04 on 3 and 8390 DF,  p-value: < 2.2e-16

Source(s):

https://corgis-edu.github.io/corgis/csv/earthquakes/