The dataset contains data from the United States Geological Survey (USGS), the dataset contains 8394 observations with 18 variables. I chose this topic as I want to explore what factors may tie into the significance of an earthquake, this leads to the question of, what factors determine the significance value for an earthquake? This is important to look at as significance may not be as simple as many believe, many may view it as just damages they may notice, but more factors could be at play, while reports from people are a factor of significance there is much more that influences this value. Knowing this can help not only myself but others understand what leads to an earthquake event being more significant than another.
The variables are id, which is the name/id given to the earthquake, impact.gap, which is the azimuthal gap, the value for it represents the largest angle between two stations, smaller values are more accurate (0-180), values that exceed 180 may include some uncertainty, impact.magnitude, which is the magnitude of the earthquake on a scale from 0 to 10, with 0 being the weak (and possibly small) and 10 being powerful, impact.significance is how significant the event was, larger numbers meaning they’re more significant and smaller ones meaning they’re not, scale from 0 to 1000, significance is calculated from many factors such as magnitude, MMI (Modified Mercalli Intensity, the effects of an earthquake, ex. damage, structural damage, human experiences), reports, and estimated impact, location.depth is the depth of the earthquake in km, location.distance is the distance of the earthquake from the reporting station, this is measured in degrees, 1 degree is about 111.2 km, the smaller the number the more reliable the calculated depth, location.full is the full name of the location of the earthquake, location.latitude is the latitude of the earthquake (-90 (south) to 90 (north)), location.longitude is the longitude of the earthquake (-180 (west) to 180 (east)), location.name is the name of the state or country the earthquake occurred in, time.day is the day of the month the earthquake occurred, time.epoch is the time that the earthquake occurred in seconds from 1/1/1970, time.full is the full date and time of when the event occurred, time.hour is the hour of when the event occurred, time.minute is the minute of when the event occurred, time.month is the month of when the event occurred, time.second is the second of when the event occurred, and time.year is the year of when the event occurred.
library(tidyverse)
library(highcharter)
library(RColorBrewer)
library(ggthemes)
library(GGally)
setwd("C:/Users/wesle/Downloads")
eqds <- readr::read_csv("earthquakes.csv")
colSums(is.na(eqds)) # No NAs
## id impact.gap impact.magnitude impact.significance
## 0 0 0 0
## location.depth location.distance location.full location.latitude
## 0 0 0 0
## location.longitude location.name time.day time.epoch
## 0 0 0 0
## time.full time.hour time.minute time.month
## 0 0 0 0
## time.second time.year
## 0 0
names(eqds) <- gsub("\\.","_",names(eqds)) # Changed column names from having . to separate words to _
colors <- brewer.pal(8, "Dark2") # Color list
highchart() |>
hc_add_series(data = eqds, type = "column", hcaes(x = time_day, y = impact_significance, group = time_month)) |>
hc_title(text = "Earthquake Significance vs Day seperated by Month") |>
hc_caption(text = "USGS (US Geological Survey), Earthquake and Hazards Program") |>
hc_xAxis(title = list(text = "Day of the Month")) |>
hc_yAxis(title = list(text = "Earthquake Significance")) |>
hc_colors(colors) |>
hc_add_theme(hc_theme_bloom())
eqds1 <- eqds |>
filter(impact_significance <= 1000) # One outlier with a significance over 1000 (2674)
highchart() |>
hc_add_series(data = eqds1, type = "column", hcaes(x = time_day, y = impact_significance, group = time_month)) |>
hc_title(text = "Earthquake Significance vs Day seperated by Month") |>
hc_caption(text = "USGS (US Geological Survey), Earthquake and Hazards Program") |>
hc_xAxis(title = list(text = "Day of the Month")) |>
hc_yAxis(title = list(text = "Earthquake Significance")) |>
hc_colors(colors) |>
hc_add_theme(hc_theme_alone()) # Visualization without outlier
colors1 <- c("red", "orange", "yellow", "green", "blue", "purple", "pink", "brown", "black", "maroon", "mediumseagreen", "aquamarine", "violet", "gold", "darkslategrey", "khaki", "magenta", "tomato", "thistle", "turquoise", "olivedrab", "chartreuse","chocolate", "darkolivegreen", "rosybrown", "salmon", "white", "firebrick", "skyblue", "dodgerblue", "indianred")
highchart() |>
hc_add_series(data = eqds1, type = "point", hcaes(x = impact_magnitude, y = impact_significance, group = time_day)) |>
hc_title(text =
"Earthquake Significance vs Magnitude by Day of the Month") |>
hc_caption(text = "USGS (US Geological Survey), Earthquake and Hazards Program") |>
hc_xAxis(title = list(text = "Earthquake Magnitude")) |>
hc_yAxis(title = list(text = "Earthquake Significance")) |>
hc_colors(colors1) |>
hc_add_theme(hc_theme_economist())
cor(eqds1$impact_significance, eqds1$impact_magnitude)
## [1] 0.9439965
mlrm <- lm(impact_significance ~ impact_gap + location_depth + location_latitude + location_longitude + location_distance + impact_magnitude, data = eqds1)
summary(mlrm)
##
## Call:
## lm(formula = impact_significance ~ impact_gap + location_depth +
## location_latitude + location_longitude + location_distance +
## impact_magnitude, data = eqds1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -136.68 -13.82 -2.87 10.82 619.34
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.606449 1.473678 1.769 0.0770 .
## impact_gap -0.025705 0.003714 -6.920 4.84e-12 ***
## location_depth 0.022874 0.006148 3.720 0.0002 ***
## location_latitude -0.491819 0.019641 -25.040 < 2e-16 ***
## location_longitude 0.187942 0.005964 31.512 < 2e-16 ***
## location_distance 3.821198 0.223352 17.108 < 2e-16 ***
## impact_magnitude 62.745324 0.356850 175.831 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 25.9 on 8386 degrees of freedom
## Multiple R-squared: 0.9214, Adjusted R-squared: 0.9214
## F-statistic: 1.639e+04 on 6 and 8386 DF, p-value: < 2.2e-16
impact_significance = -0.025705(impact_gap) + 0.022874(location_depth) + -0.491819(location_latitude) + 0.187942(location_longitude) + 3.821198(location_distance) + 62.745324(impact_magnitude) + 2.606449
eqds2 <- eqds1 |>
select(2:6, 8:9)
ggpairs(eqds2)
mlrm1 <- lm(impact_significance ~ impact_gap + location_depth + impact_magnitude, data = eqds1)
summary(mlrm1)
##
## Call:
## lm(formula = impact_significance ~ impact_gap + location_depth +
## impact_magnitude, data = eqds1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -47.29 -22.34 -4.21 17.57 576.19
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -62.751806 0.743295 -84.424 < 2e-16 ***
## impact_gap 0.036101 0.004088 8.830 < 2e-16 ***
## location_depth 0.028134 0.007184 3.916 9.06e-05 ***
## impact_magnitude 75.442243 0.319770 235.927 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 30.32 on 8389 degrees of freedom
## Multiple R-squared: 0.8923, Adjusted R-squared: 0.8923
## F-statistic: 2.317e+04 on 3 and 8389 DF, p-value: < 2.2e-16
impact_significance = 0.036101(impact_gap) + 0.028134(location_depth) + 75.442243(impact_magnitude) - 62.751806
mlrm2 <- lm(impact_significance ~ impact_gap + location_depth + location_latitude + location_longitude + location_distance + impact_magnitude, data = eqds)
summary(mlrm2)
##
## Call:
## lm(formula = impact_significance ~ impact_gap + location_depth +
## location_latitude + location_longitude + location_distance +
## impact_magnitude, data = eqds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -124.39 -14.11 -3.19 11.18 2292.37
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.272406 2.050730 -0.133 0.894
## impact_gap -0.025124 0.005170 -4.860 1.2e-06 ***
## location_depth 0.009869 0.008555 1.154 0.249
## location_latitude -0.457998 0.027334 -16.756 < 2e-16 ***
## location_longitude 0.190716 0.008301 22.974 < 2e-16 ***
## location_distance 3.412481 0.310819 10.979 < 2e-16 ***
## impact_magnitude 64.403447 0.496021 129.840 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 36.05 on 8387 degrees of freedom
## Multiple R-squared: 0.861, Adjusted R-squared: 0.8609
## F-statistic: 8661 on 6 and 8387 DF, p-value: < 2.2e-16
mlrm3 <- lm(impact_significance ~ impact_gap + location_depth + impact_magnitude, data = eqds)
summary(mlrm3)
##
## Call:
## lm(formula = impact_significance ~ impact_gap + location_depth +
## impact_magnitude, data = eqds)
##
## Residuals:
## Min 1Q Median 3Q Max
## -45.43 -22.91 -4.20 17.90 2261.30
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -63.768482 0.958905 -66.501 < 2e-16 ***
## impact_gap 0.034217 0.005275 6.486 9.29e-11 ***
## location_depth 0.015014 0.009266 1.620 0.105
## impact_magnitude 76.631962 0.412084 185.962 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 39.12 on 8390 degrees of freedom
## Multiple R-squared: 0.8364, Adjusted R-squared: 0.8363
## F-statistic: 1.429e+04 on 3 and 8390 DF, p-value: < 2.2e-16