names(faults)
## [1] "ogc_fid" "accuracy" "average_dip"
## [4] "is_active" "name" "dip_dir"
## [7] "downthrown_side_id" "last_movement" "net_slip_rate"
## [10] "strike_slip_rate" "vert_slip_rate" "fs_name"
## [13] "slip_type" "average_rake" "reference"
## [16] "exposure_quality" "epistemic_quality" "shortening_rate"
## [19] "notes" "geometry"
# Quick Plot
plot(st_geometry(faults), col = "red", lwd = 1)
parse_tuple_first_chr <- function(x) {
out <- rep(NA_real_, length(x))
idx <- which(!is.na(x))
if (length(idx) == 0) return(out)
s <- gsub("[()\\s]", "", x[idx])
out[idx] <- vapply(strsplit(s, ",", fixed = TRUE), function(parts) {
parts <- parts[parts != ""]
nums <- suppressWarnings(as.numeric(parts))
nums <- nums[!is.na(nums)]
if (length(nums) >= 1) nums[1] else NA_real_
}, numeric(1))
out
}
library(sf)
library(dplyr)
faults_df <- faults %>%
st_drop_geometry() %>%
mutate(
strike_pref = parse_tuple_first_chr(strike_slip_rate),
vert_pref = parse_tuple_first_chr(vert_slip_rate),
short_pref = parse_tuple_first_chr(shortening_rate),
net_pref = parse_tuple_first_chr(net_slip_rate),
dip_pref = parse_tuple_first_chr(average_dip)
)
summary(faults_df$short_pref)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -13.000 -0.100 -0.050 1.037 0.500 75.000 185
quantile(faults_df$short_pref, probs = c(0.05, 0.25, 0.5, 0.75, 0.95), na.rm = TRUE)
## 5% 25% 50% 75% 95%
## -1.50 -0.10 -0.05 0.50 2.00
library(ggplot2)
library(dplyr)
q1 <- quantile(faults_df$short_pref, 0.25, na.rm = TRUE)
q3 <- quantile(faults_df$short_pref, 0.75, na.rm = TRUE)
iqr <- q3 - q1
lower_bound <- q1 - 1.5 * iqr
upper_bound <- q3 + 1.5 * iqr
# Remove outliers
faults_df_trim <- faults_df %>%
filter(
!is.na(short_pref),
short_pref >= lower_bound,
short_pref <= upper_bound
)
# Replot
ggplot(faults_df_trim, aes(short_pref)) +
geom_histogram(bins = 40, fill = "steelblue", alpha = 0.8) +
geom_vline(xintercept = 0, linetype = "dashed", color = "red") +
labs(
title = "Distribution of Shortening Rates (Outliers Removed)",
x = "Shortening Rate (negative = extension)",
y = "Count"
)
Now we start to import the earthquake data and do some EDA
library(sf)
library(dplyr)
library(readr)
## Warning: package 'readr' was built under R version 4.5.2
eq_path <- "/Users/leoli/Desktop/UPenn 2025-2026/Quant/DE Shaw Case/central_am_earthquakes.tsv"
eq <- read_tsv(eq_path, show_col_types = FALSE)
names(eq)
## [1] "Search Parameters" "Year"
## [3] "Mo" "Dy"
## [5] "Hr" "Mn"
## [7] "Sec" "Tsu"
## [9] "Vol" "Location Name"
## [11] "Latitude" "Longitude"
## [13] "Focal Depth (km)" "Mag"
## [15] "MMI Int" "Deaths"
## [17] "Death Description" "Missing"
## [19] "Missing Description" "Injuries"
## [21] "Injuries Description" "Damage ($Mil)"
## [23] "Damage Description" "Houses Destroyed"
## [25] "Houses Destroyed Description" "Houses Damaged"
## [27] "Houses Damaged Description" "Total Deaths"
## [29] "Total Death Description" "Total Missing"
## [31] "Total Missing Description" "Total Injuries"
## [33] "Total Injuries Description" "Total Damage ($Mil)"
## [35] "Total Damage Description" "Total Houses Destroyed"
## [37] "Total Houses Destroyed Description" "Total Houses Damaged"
## [39] "Total Houses Damaged Description"
LON_COL <- "Longitude"
LAT_COL <- "Latitude"
Now we need to convert earthquakes into sf points (EPSG:4326)
First inspect and filter out missing coordinates
eq_clean <- eq %>%
filter(
!is.na(.data[[LON_COL]]),
!is.na(.data[[LAT_COL]])
)
summary(eq_clean)
## Search Parameters Year Mo Dy
## Length:182 Min. :1565 Min. : 1.000 Min. : 1.00
## Class :character 1st Qu.:1884 1st Qu.: 4.000 1st Qu.: 6.00
## Mode :character Median :1950 Median : 7.000 Median :13.00
## Mean :1920 Mean : 6.646 Mean :14.44
## 3rd Qu.:1999 3rd Qu.:10.000 3rd Qu.:22.75
## Max. :2025 Max. :12.000 Max. :31.00
## NA's :7 NA's :12
## Hr Mn Sec Tsu Vol
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 507 Min. :3551
## 1st Qu.: 4.75 1st Qu.:13.00 1st Qu.: 9.60 1st Qu.:1748 1st Qu.:3580
## Median :10.00 Median :27.00 Median :28.00 Median :5062 Median :3662
## Mean :10.83 Mean :27.21 Mean :27.19 Mean :3668 Mean :3856
## 3rd Qu.:17.00 3rd Qu.:42.00 3rd Qu.:44.00 3rd Qu.:5583 3rd Qu.:3664
## Max. :23.00 Max. :59.00 Max. :58.90 Max. :6080 Max. :4824
## NA's :50 NA's :51 NA's :63 NA's :143 NA's :177
## Location Name Latitude Longitude Focal Depth (km)
## Length:182 Min. : 6.50 Min. :-92.64 Min. : 1.00
## Class :character 1st Qu.:10.11 1st Qu.:-89.37 1st Qu.: 10.50
## Mode :character Median :13.00 Median :-87.81 Median : 28.00
## Mean :12.25 Mean :-87.19 Mean : 34.38
## 3rd Qu.:13.83 3rd Qu.:-84.70 3rd Qu.: 44.50
## Max. :15.89 Max. :-77.31 Max. :135.00
## NA's :76
## Mag MMI Int Deaths Death Description
## Min. :4.400 Min. : 4.000 Min. : 1.0 Min. :1.000
## 1st Qu.:5.750 1st Qu.: 6.000 1st Qu.: 2.0 1st Qu.:1.000
## Median :6.500 Median : 8.000 Median : 7.0 Median :1.000
## Mean :6.388 Mean : 7.203 Mean : 429.4 Mean :1.831
## 3rd Qu.:7.150 3rd Qu.: 8.000 3rd Qu.: 300.0 3rd Qu.:3.000
## Max. :7.900 Max. :11.000 Max. :10000.0 Max. :4.000
## NA's :43 NA's :118 NA's :121 NA's :105
## Missing Missing Description Injuries Injuries Description
## Min. :17 Min. :1 Min. : 1.00 Min. :1.000
## 1st Qu.:17 1st Qu.:1 1st Qu.: 4.00 1st Qu.:1.000
## Median :17 Median :1 Median : 25.00 Median :1.000
## Mean :17 Mean :1 Mean : 932.82 Mean :1.804
## 3rd Qu.:17 3rd Qu.:1 3rd Qu.: 81.25 3rd Qu.:3.000
## Max. :17 Max. :1 Max. :20000.00 Max. :4.000
## NA's :181 NA's :181 NA's :138 NA's :126
## Damage ($Mil) Damage Description Houses Destroyed
## Min. : 0.05 Min. :1.0 Min. : 2
## 1st Qu.: 2.00 1st Qu.:1.0 1st Qu.: 7
## Median : 15.00 Median :2.0 Median : 84
## Mean : 307.35 Mean :2.2 Mean : 6894
## 3rd Qu.: 210.00 3rd Qu.:3.0 3rd Qu.: 522
## Max. :2968.00 Max. :4.0 Max. :108226
## NA's :161 NA's :17 NA's :159
## Houses Destroyed Description Houses Damaged Houses Damaged Description
## Min. :1.000 Min. : 2 Min. :1.000
## 1st Qu.:1.000 1st Qu.: 41 1st Qu.:2.000
## Median :2.000 Median : 86 Median :2.000
## Mean :2.328 Mean : 6987 Mean :2.434
## 3rd Qu.:3.000 3rd Qu.: 554 3rd Qu.:3.000
## Max. :4.000 Max. :169632 Max. :4.000
## NA's :124 NA's :153 NA's :129
## Total Deaths Total Death Description Total Missing
## Min. : 1.0 Min. :1.000 Min. :17
## 1st Qu.: 2.0 1st Qu.:1.000 1st Qu.:17
## Median : 7.0 Median :1.000 Median :17
## Mean : 390.7 Mean :1.819 Mean :17
## 3rd Qu.: 158.8 3rd Qu.:3.000 3rd Qu.:17
## Max. :10000.0 Max. :4.000 Max. :17
## NA's :124 NA's :110 NA's :181
## Total Missing Description Total Injuries Total Injuries Description
## Min. :1 Min. : 1.00 Min. :1.000
## 1st Qu.:1 1st Qu.: 4.25 1st Qu.:1.000
## Median :1 Median : 28.50 Median :1.000
## Mean :1 Mean : 1122.46 Mean :1.828
## 3rd Qu.:1 3rd Qu.: 100.00 3rd Qu.:3.000
## Max. :1 Max. :20000.00 Max. :4.000
## NA's :181 NA's :136 NA's :124
## Total Damage ($Mil) Total Damage Description Total Houses Destroyed
## Min. : 0.05 Min. :1.000 Min. : 2.0
## 1st Qu.: 2.00 1st Qu.:1.000 1st Qu.: 7.0
## Median : 21.25 Median :2.000 Median : 262.5
## Mean : 322.71 Mean :2.218 Mean : 6710.6
## 3rd Qu.: 244.62 3rd Qu.:3.000 3rd Qu.: 650.8
## Max. :2968.00 Max. :4.000 Max. :108226.0
## NA's :162 NA's :40 NA's :158
## Total Houses Destroyed Description Total Houses Damaged
## Min. :1.000 Min. : 2.00
## 1st Qu.:1.000 1st Qu.: 31.75
## Median :2.000 Median : 75.00
## Mean :2.344 Mean : 6724.60
## 3rd Qu.:3.000 3rd Qu.: 537.25
## Max. :4.000 Max. :169632.00
## NA's :121 NA's :152
## Total Houses Damaged Description
## Min. :1.000
## 1st Qu.:1.000
## Median :2.000
## Mean :2.283
## 3rd Qu.:3.000
## Max. :4.000
## NA's :129
Now convert into sf
eq_sf <- st_as_sf(
eq_clean,
coords = c(LON_COL, LAT_COL),
crs = 4326,
remove = FALSE
)
summary(eq_sf)
## Search Parameters Year Mo Dy
## Length:182 Min. :1565 Min. : 1.000 Min. : 1.00
## Class :character 1st Qu.:1884 1st Qu.: 4.000 1st Qu.: 6.00
## Mode :character Median :1950 Median : 7.000 Median :13.00
## Mean :1920 Mean : 6.646 Mean :14.44
## 3rd Qu.:1999 3rd Qu.:10.000 3rd Qu.:22.75
## Max. :2025 Max. :12.000 Max. :31.00
## NA's :7 NA's :12
## Hr Mn Sec Tsu Vol
## Min. : 0.00 Min. : 0.00 Min. : 0.00 Min. : 507 Min. :3551
## 1st Qu.: 4.75 1st Qu.:13.00 1st Qu.: 9.60 1st Qu.:1748 1st Qu.:3580
## Median :10.00 Median :27.00 Median :28.00 Median :5062 Median :3662
## Mean :10.83 Mean :27.21 Mean :27.19 Mean :3668 Mean :3856
## 3rd Qu.:17.00 3rd Qu.:42.00 3rd Qu.:44.00 3rd Qu.:5583 3rd Qu.:3664
## Max. :23.00 Max. :59.00 Max. :58.90 Max. :6080 Max. :4824
## NA's :50 NA's :51 NA's :63 NA's :143 NA's :177
## Location Name Latitude Longitude Focal Depth (km)
## Length:182 Min. : 6.50 Min. :-92.64 Min. : 1.00
## Class :character 1st Qu.:10.11 1st Qu.:-89.37 1st Qu.: 10.50
## Mode :character Median :13.00 Median :-87.81 Median : 28.00
## Mean :12.25 Mean :-87.19 Mean : 34.38
## 3rd Qu.:13.83 3rd Qu.:-84.70 3rd Qu.: 44.50
## Max. :15.89 Max. :-77.31 Max. :135.00
## NA's :76
## Mag MMI Int Deaths Death Description
## Min. :4.400 Min. : 4.000 Min. : 1.0 Min. :1.000
## 1st Qu.:5.750 1st Qu.: 6.000 1st Qu.: 2.0 1st Qu.:1.000
## Median :6.500 Median : 8.000 Median : 7.0 Median :1.000
## Mean :6.388 Mean : 7.203 Mean : 429.4 Mean :1.831
## 3rd Qu.:7.150 3rd Qu.: 8.000 3rd Qu.: 300.0 3rd Qu.:3.000
## Max. :7.900 Max. :11.000 Max. :10000.0 Max. :4.000
## NA's :43 NA's :118 NA's :121 NA's :105
## Missing Missing Description Injuries Injuries Description
## Min. :17 Min. :1 Min. : 1.00 Min. :1.000
## 1st Qu.:17 1st Qu.:1 1st Qu.: 4.00 1st Qu.:1.000
## Median :17 Median :1 Median : 25.00 Median :1.000
## Mean :17 Mean :1 Mean : 932.82 Mean :1.804
## 3rd Qu.:17 3rd Qu.:1 3rd Qu.: 81.25 3rd Qu.:3.000
## Max. :17 Max. :1 Max. :20000.00 Max. :4.000
## NA's :181 NA's :181 NA's :138 NA's :126
## Damage ($Mil) Damage Description Houses Destroyed
## Min. : 0.05 Min. :1.0 Min. : 2
## 1st Qu.: 2.00 1st Qu.:1.0 1st Qu.: 7
## Median : 15.00 Median :2.0 Median : 84
## Mean : 307.35 Mean :2.2 Mean : 6894
## 3rd Qu.: 210.00 3rd Qu.:3.0 3rd Qu.: 522
## Max. :2968.00 Max. :4.0 Max. :108226
## NA's :161 NA's :17 NA's :159
## Houses Destroyed Description Houses Damaged Houses Damaged Description
## Min. :1.000 Min. : 2 Min. :1.000
## 1st Qu.:1.000 1st Qu.: 41 1st Qu.:2.000
## Median :2.000 Median : 86 Median :2.000
## Mean :2.328 Mean : 6987 Mean :2.434
## 3rd Qu.:3.000 3rd Qu.: 554 3rd Qu.:3.000
## Max. :4.000 Max. :169632 Max. :4.000
## NA's :124 NA's :153 NA's :129
## Total Deaths Total Death Description Total Missing
## Min. : 1.0 Min. :1.000 Min. :17
## 1st Qu.: 2.0 1st Qu.:1.000 1st Qu.:17
## Median : 7.0 Median :1.000 Median :17
## Mean : 390.7 Mean :1.819 Mean :17
## 3rd Qu.: 158.8 3rd Qu.:3.000 3rd Qu.:17
## Max. :10000.0 Max. :4.000 Max. :17
## NA's :124 NA's :110 NA's :181
## Total Missing Description Total Injuries Total Injuries Description
## Min. :1 Min. : 1.00 Min. :1.000
## 1st Qu.:1 1st Qu.: 4.25 1st Qu.:1.000
## Median :1 Median : 28.50 Median :1.000
## Mean :1 Mean : 1122.46 Mean :1.828
## 3rd Qu.:1 3rd Qu.: 100.00 3rd Qu.:3.000
## Max. :1 Max. :20000.00 Max. :4.000
## NA's :181 NA's :136 NA's :124
## Total Damage ($Mil) Total Damage Description Total Houses Destroyed
## Min. : 0.05 Min. :1.000 Min. : 2.0
## 1st Qu.: 2.00 1st Qu.:1.000 1st Qu.: 7.0
## Median : 21.25 Median :2.000 Median : 262.5
## Mean : 322.71 Mean :2.218 Mean : 6710.6
## 3rd Qu.: 244.62 3rd Qu.:3.000 3rd Qu.: 650.8
## Max. :2968.00 Max. :4.000 Max. :108226.0
## NA's :162 NA's :40 NA's :158
## Total Houses Destroyed Description Total Houses Damaged
## Min. :1.000 Min. : 2.00
## 1st Qu.:1.000 1st Qu.: 31.75
## Median :2.000 Median : 75.00
## Mean :2.344 Mean : 6724.60
## 3rd Qu.:3.000 3rd Qu.: 537.25
## Max. :4.000 Max. :169632.00
## NA's :121 NA's :152
## Total Houses Damaged Description geometry
## Min. :1.000 POINT :182
## 1st Qu.:1.000 epsg:4326 : 0
## Median :2.000 +proj=long...: 0
## Mean :2.283
## 3rd Qu.:3.000
## Max. :4.000
## NA's :129
plot(
st_geometry(faults),
col = "red",
lwd = 1,
main = "Central America Faults (red) and Earthquakes (blue)"
)
plot(
st_geometry(eq_sf),
col = rgb(0, 0, 1, 0.5), # semi-transparent blue
pch = 16,
cex = 0.4,
add = TRUE
)
MAG_COL <- "Mag"
plot(
st_geometry(faults),
col = "red",
lwd = 1,
main = "Faults (red) and Earthquakes by Magnitude (blue)"
)
plot(
st_geometry(eq_sf),
col = rgb(0, 0, 1, 0.4),
pch = 16,
cex = scales::rescale(eq_sf[[MAG_COL]], to = c(0.4, 1.5)),
add = TRUE
)
Now we project this and find the shortest distance between earthquake and fault for each earthquake
faults_utm <- st_transform(faults, 32616)
eq_utm <- st_transform(eq_sf, 32616)
nearest_idx <- st_nearest_feature(eq_utm, faults_utm)
nearest_dist_m <- st_distance(eq_utm, faults_utm[nearest_idx, ], by_element = TRUE)
result <- eq_clean %>%
mutate(
quake_id = row_number(),
nearest_fault_row = nearest_idx,
dist_to_fault_m = as.numeric(nearest_dist_m)
)
resultFaultAttached <- result %>%
left_join(
faults %>% st_drop_geometry() %>% mutate(nearest_fault_row = row_number()),
by = "nearest_fault_row"
)
resultFaultAttached
## # A tibble: 182 × 61
## `Search Parameters` Year Mo Dy Hr Mn Sec Tsu Vol
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 <NA> 1565 8 30 NA NA NA NA 3580
## 2 <NA> 1566 5 NA NA NA NA NA NA
## 3 <NA> 1575 6 2 NA NA NA NA NA
## 4 <NA> 1576 NA NA NA NA NA NA NA
## 5 <NA> 1579 3 16 NA NA NA 5646 NA
## 6 <NA> 1581 NA NA NA NA NA NA NA
## 7 <NA> 1586 NA NA NA NA NA NA NA
## 8 <NA> 1594 4 21 NA NA NA NA NA
## 9 <NA> 1621 5 2 NA NA NA NA NA
## 10 <NA> 1648 NA NA NA NA NA NA NA
## # ℹ 172 more rows
## # ℹ 52 more variables: `Location Name` <chr>, Latitude <dbl>, Longitude <dbl>,
## # `Focal Depth (km)` <dbl>, Mag <dbl>, `MMI Int` <dbl>, Deaths <dbl>,
## # `Death Description` <dbl>, Missing <dbl>, `Missing Description` <dbl>,
## # Injuries <dbl>, `Injuries Description` <dbl>, `Damage ($Mil)` <dbl>,
## # `Damage Description` <dbl>, `Houses Destroyed` <dbl>,
## # `Houses Destroyed Description` <dbl>, `Houses Damaged` <dbl>, …
Quick check
summary(result$dist_to_fault_m)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 136.1 8700.9 17419.2 30471.5 43028.9 252665.2
head(result %>% select(quake_id, nearest_fault_row, dist_to_fault_m))
## # A tibble: 6 × 3
## quake_id nearest_fault_row dist_to_fault_m
## <int> <int> <dbl>
## 1 1 31 8701.
## 2 2 31 8701.
## 3 3 93 16319.
## 4 4 31 8701.
## 5 5 185 31190.
## 6 6 93 16319.
# drop geometry from faults and keep attributes
faults_attr <- faults %>%
st_drop_geometry() %>%
mutate(nearest_fault_row = row_number())
# combine earthquake + fault data
reg_df <- result %>%
left_join(faults_attr, by = "nearest_fault_row")
LM: Magnitude vs. Distance
reg_simple <- result %>%
select(
magnitude = all_of(MAG_COL),
dist_to_fault_m
) %>%
filter(
!is.na(magnitude),
!is.na(dist_to_fault_m)
) %>%
mutate(
dist_km = dist_to_fault_m / 1000
)
nrow(reg_simple)
## [1] 139
summary(reg_simple)
## magnitude dist_to_fault_m dist_km
## Min. :4.400 Min. : 136.1 Min. : 0.1361
## 1st Qu.:5.750 1st Qu.: 8727.8 1st Qu.: 8.7278
## Median :6.500 Median : 19239.3 Median : 19.2393
## Mean :6.388 Mean : 32371.4 Mean : 32.3714
## 3rd Qu.:7.150 3rd Qu.: 46987.0 3rd Qu.: 46.9870
## Max. :7.900 Max. :252665.2 Max. :252.6652
lm_simple <- lm(magnitude ~ dist_km, data = reg_simple)
summary(lm_simple)
##
## Call:
## lm(formula = magnitude ~ dist_km, data = reg_simple)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.08848 -0.58040 0.06461 0.68967 1.64926
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.236726 0.097438 64.007 <2e-16 ***
## dist_km 0.004688 0.001947 2.408 0.0174 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8761 on 137 degrees of freedom
## Multiple R-squared: 0.0406, Adjusted R-squared: 0.0336
## F-statistic: 5.798 on 1 and 137 DF, p-value: 0.01737
ggplot(reg_simple, aes(x = dist_km, y = magnitude)) +
geom_point(alpha = 0.4) +
geom_smooth(method = "lm", se = TRUE, color = "blue") +
labs(
x = "Distance to nearest fault (km)",
y = "Earthquake magnitude",
title = "Earthquake Magnitude vs Distance to Nearest Fault"
)
## `geom_smooth()` using formula = 'y ~ x'
names(reg_df)
## [1] "Search Parameters" "Year"
## [3] "Mo" "Dy"
## [5] "Hr" "Mn"
## [7] "Sec" "Tsu"
## [9] "Vol" "Location Name"
## [11] "Latitude" "Longitude"
## [13] "Focal Depth (km)" "Mag"
## [15] "MMI Int" "Deaths"
## [17] "Death Description" "Missing"
## [19] "Missing Description" "Injuries"
## [21] "Injuries Description" "Damage ($Mil)"
## [23] "Damage Description" "Houses Destroyed"
## [25] "Houses Destroyed Description" "Houses Damaged"
## [27] "Houses Damaged Description" "Total Deaths"
## [29] "Total Death Description" "Total Missing"
## [31] "Total Missing Description" "Total Injuries"
## [33] "Total Injuries Description" "Total Damage ($Mil)"
## [35] "Total Damage Description" "Total Houses Destroyed"
## [37] "Total Houses Destroyed Description" "Total Houses Damaged"
## [39] "Total Houses Damaged Description" "quake_id"
## [41] "nearest_fault_row" "dist_to_fault_m"
## [43] "ogc_fid" "accuracy"
## [45] "average_dip" "is_active"
## [47] "name" "dip_dir"
## [49] "downthrown_side_id" "last_movement"
## [51] "net_slip_rate" "strike_slip_rate"
## [53] "vert_slip_rate" "fs_name"
## [55] "slip_type" "average_rake"
## [57] "reference" "exposure_quality"
## [59] "epistemic_quality" "shortening_rate"
## [61] "notes"
reg_joint <- reg_df %>%
select(
magnitude = Mag,
depth_km = `Focal Depth (km)`,
dist_m = dist_to_fault_m
) %>%
filter(
!is.na(magnitude),
!is.na(depth_km),
!is.na(dist_m)
) %>%
mutate(
dist_km = dist_m / 1000
)
nrow(reg_joint)
## [1] 106
summary(reg_joint)
## magnitude depth_km dist_m dist_km
## Min. :4.400 Min. : 1.00 Min. : 136.1 Min. : 0.1361
## 1st Qu.:5.600 1st Qu.: 10.50 1st Qu.: 8771.9 1st Qu.: 8.7719
## Median :6.400 Median : 28.00 Median : 21426.3 Median : 21.4263
## Mean :6.281 Mean : 34.38 Mean : 35652.1 Mean : 35.6521
## 3rd Qu.:6.975 3rd Qu.: 44.50 3rd Qu.: 54362.2 3rd Qu.: 54.3622
## Max. :7.900 Max. :135.00 Max. :252665.2 Max. :252.6652
lm_joint <- lm(
magnitude ~ depth_km + dist_km,
data = reg_joint
)
summary(lm_joint)
##
## Call:
## lm(formula = magnitude ~ depth_km + dist_km, data = reg_joint)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.98688 -0.59763 0.00751 0.67077 1.75065
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.839945 0.138967 42.024 <2e-16 ***
## depth_km 0.007451 0.002923 2.549 0.0123 *
## dist_km 0.005190 0.002032 2.554 0.0121 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8514 on 103 degrees of freedom
## Multiple R-squared: 0.1361, Adjusted R-squared: 0.1193
## F-statistic: 8.111 on 2 and 103 DF, p-value: 0.0005357
Adjust for heteroskedastic
install.packages(“sandwich”) install.packages(“lmtest”)
library(sandwich)
library(lmtest)
## Loading required package: zoo
## Warning: package 'zoo' was built under R version 4.5.2
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
coeftest(lm_joint, vcov = vcovHC(lm_joint, type = "HC3"))
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.8399450 0.1366384 42.7401 < 2e-16 ***
## depth_km 0.0074514 0.0029520 2.5242 0.01312 *
## dist_km 0.0051898 0.0028494 1.8214 0.07145 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
So depth_km is statistically significant
# Partial regression (added-variable) plots
par(mfrow = c(1, 2))
# Effect of depth controlling for dist
avPlot_x <- function(fit, xname) {
# residualize y and x against the other predictors
others <- setdiff(names(fit$model)[-1], xname)
y_res <- resid(lm(fit$model[[1]] ~ ., data = fit$model[others]))
x_res <- resid(lm(fit$model[[xname]] ~ ., data = fit$model[others]))
plot(x_res, y_res,
xlab = paste0(xname, " (residualized)"),
ylab = "magnitude (residualized)",
main = paste("Added-variable plot:", xname),
pch = 16, col = rgb(0,0,1,0.4))
abline(lm(y_res ~ x_res), lwd = 2, col = "black")
}
avPlot_x(lm_joint, "depth_km")
avPlot_x(lm_joint, "dist_km")
par(mfrow = c(1, 1))
# Create prediction grids
depth_grid <- seq(min(reg_joint$depth_km, na.rm = TRUE),
max(reg_joint$depth_km, na.rm = TRUE),
length.out = 100)
dist_grid <- seq(min(reg_joint$dist_km, na.rm = TRUE),
max(reg_joint$dist_km, na.rm = TRUE),
length.out = 100)
depth_med <- median(reg_joint$depth_km, na.rm = TRUE)
dist_med <- median(reg_joint$dist_km, na.rm = TRUE)
# Predicted vs depth (distance fixed at median)
new_depth <- data.frame(depth_km = depth_grid, dist_km = dist_med)
pred_depth <- predict(lm_joint, newdata = new_depth, interval = "confidence")
plot(depth_grid, pred_depth[, "fit"], type = "l", lwd = 2,
xlab = "Depth (km)", ylab = "Predicted magnitude",
main = paste0("Predicted magnitude vs depth (dist_km = ", round(dist_med,1), ")"))
lines(depth_grid, pred_depth[, "lwr"], lty = 2)
lines(depth_grid, pred_depth[, "upr"], lty = 2)
# Predicted vs distance (depth fixed at median)
new_dist <- data.frame(depth_km = depth_med, dist_km = dist_grid)
pred_dist <- predict(lm_joint, newdata = new_dist, interval = "confidence")
plot(dist_grid, pred_dist[, "fit"], type = "l", lwd = 2,
xlab = "Distance to nearest fault (km)", ylab = "Predicted magnitude",
main = paste0("Predicted magnitude vs distance (depth_km = ", round(depth_med,1), ")"))
lines(dist_grid, pred_dist[, "lwr"], lty = 2)
lines(dist_grid, pred_dist[, "upr"], lty = 2)
library(dplyr)
library(sf)
# Fault attributes table with a join key
faults_attr <- faults %>%
st_drop_geometry() %>%
mutate(nearest_fault_row = row_number())
# Count earthquakes per fault (from your nearest-fault assignment)
eq_counts <- reg_df %>% # or use `result` if that's the one containing nearest_fault_row
filter(!is.na(nearest_fault_row)) %>%
count(nearest_fault_row, name = "n_quakes") %>%
arrange(desc(n_quakes))
# Join counts back onto fault attributes
faults_ranked <- eq_counts %>%
left_join(faults_attr, by = "nearest_fault_row") %>%
arrange(desc(n_quakes))
faults_with_counts <- faults_attr %>%
left_join(eq_counts, by = "nearest_fault_row") %>%
mutate(n_quakes = ifelse(is.na(n_quakes), 0L, n_quakes))
# Slip type comparison
faults_with_counts %>%
group_by(slip_type) %>%
summarise(
n_faults = n(),
mean_quakes = mean(n_quakes),
median_quakes = median(n_quakes),
total_quakes = sum(n_quakes)
) %>%
arrange(desc(mean_quakes))
## # A tibble: 16 × 5
## slip_type n_faults mean_quakes median_quakes total_quakes
## <chr> <int> <dbl> <dbl> <int>
## 1 Subduction Thrust 1 19 19 19
## 2 Dextral 15 4.67 4 70
## 3 Sinistral Normal 1 2 2 2
## 4 Dextral-Normal 13 1.23 0 16
## 5 <NA> 7 0.857 0 6
## 6 Sinistral-Reverse 5 0.6 0 3
## 7 Normal 69 0.551 0 38
## 8 Thrust 21 0.381 0 8
## 9 Normal-Dextral 4 0.25 0 1
## 10 Sinistral Transform 9 0.222 0 2
## 11 Sinistral-Normal 10 0.2 0 2
## 12 Sinistral 69 0.174 0 12
## 13 Normal-Sinistral 8 0.125 0 1
## 14 Reverse 25 0.08 0 2
## 15 Reverse-Sinistral 1 0 0 0
## 16 Spreading Ridge 1 0 0 0
# Is_Active
faults_with_counts %>%
group_by(is_active) %>%
summarise(
n_faults = n(),
mean_quakes = mean(n_quakes),
median_quakes = median(n_quakes),
total_quakes = sum(n_quakes)
) %>%
arrange(desc(mean_quakes))
## # A tibble: 3 × 5
## is_active n_faults mean_quakes median_quakes total_quakes
## <int> <int> <dbl> <int> <int>
## 1 1 149 1.15 0 171
## 2 NA 69 0.101 0 7
## 3 2 41 0.0976 0 4
parse_tuple_first_chr <- function(x) {
out <- rep(NA_real_, length(x))
idx <- which(!is.na(x))
if (length(idx) == 0) return(out)
s <- gsub("[()\\s]", "", x[idx])
out[idx] <- vapply(strsplit(s, ",", fixed = TRUE), function(parts) {
parts <- parts[parts != ""]
nums <- suppressWarnings(as.numeric(parts))
nums <- nums[!is.na(nums)]
if (length(nums) >= 1) nums[1] else NA_real_
}, numeric(1))
out
}
faults_num <- faults_with_counts %>%
mutate(
dip_pref = parse_tuple_first_chr(average_dip),
net_pref = parse_tuple_first_chr(net_slip_rate),
strike_pref = parse_tuple_first_chr(strike_slip_rate),
vert_pref = parse_tuple_first_chr(vert_slip_rate),
short_pref = parse_tuple_first_chr(shortening_rate)
)
# Simple correlations with quake counts
cor(faults_num$n_quakes, faults_num$strike_pref, use = "complete.obs")
## [1] 0.1090592
cor(faults_num$n_quakes, faults_num$net_pref, use = "complete.obs")
## [1] 0.5881437
cor(faults_num$n_quakes, faults_num$vert_pref, use = "complete.obs")
## [1] 0.5303874
cor(faults_num$n_quakes, faults_num$dip_pref, use = "complete.obs")
## [1] 0.07939976
cor(faults_num$n_quakes, faults_num$short_pref, use = "complete.obs")
## [1] 0.7344656
pca_df <- faults_num %>%
select(
strike_pref,
short_pref,
dip_pref
) %>%
filter(complete.cases(.))
pca_fit <- prcomp(pca_df, scale. = TRUE)
summary(pca_fit)
## Importance of components:
## PC1 PC2 PC3
## Standard deviation 1.507 0.8538 7.841e-17
## Proportion of Variance 0.757 0.2430 0.000e+00
## Cumulative Proportion 0.757 1.0000 1.000e+00
pca_fit$rotation
## PC1 PC2 PC3
## strike_pref 0.5692008 0.60202852 -0.5599751
## short_pref -0.6624806 -0.06755787 -0.7460264
## dip_pref 0.4869599 -0.79561146 -0.3603782