1. Load libraries, load dataset, set randomization seed.
## R markdown link: http://rpubs.com/jasohosk/1425924

## Load libraries
packages <- c("dplyr",
              "ggplot2",
              "GGally",
              "psych",
              "corrplot",
              "boot",
              "tidyr")

installed <- packages %in% rownames(installed.packages())
if (any(!installed)) {
  install.packages(packages[!installed])
}
lapply(packages, library, character.only = TRUE)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## corrplot 0.95 loaded
## 
## Attaching package: 'boot'
## The following object is masked from 'package:psych':
## 
##     logit
## [[1]]
## [1] "dplyr"     "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [7] "methods"   "base"     
## 
## [[2]]
## [1] "ggplot2"   "dplyr"     "stats"     "graphics"  "grDevices" "utils"    
## [7] "datasets"  "methods"   "base"     
## 
## [[3]]
##  [1] "GGally"    "ggplot2"   "dplyr"     "stats"     "graphics"  "grDevices"
##  [7] "utils"     "datasets"  "methods"   "base"     
## 
## [[4]]
##  [1] "psych"     "GGally"    "ggplot2"   "dplyr"     "stats"     "graphics" 
##  [7] "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[5]]
##  [1] "corrplot"  "psych"     "GGally"    "ggplot2"   "dplyr"     "stats"    
##  [7] "graphics"  "grDevices" "utils"     "datasets"  "methods"   "base"     
## 
## [[6]]
##  [1] "boot"      "corrplot"  "psych"     "GGally"    "ggplot2"   "dplyr"    
##  [7] "stats"     "graphics"  "grDevices" "utils"     "datasets"  "methods"  
## [13] "base"     
## 
## [[7]]
##  [1] "tidyr"     "boot"      "corrplot"  "psych"     "GGally"    "ggplot2"  
##  [7] "dplyr"     "stats"     "graphics"  "grDevices" "utils"     "datasets" 
## [13] "methods"   "base"
## Import dataset
url <- "/Users/jasonhoskin/Desktop/final-combined-table.csv"
df <- read.csv(url)

## Remove unnecessary variables
rm(installed, packages, url)

## Filter out rows with any NAs
df1 <- df |> tidyr::drop_na(caregiver.burden, ipaq.continuous, network.size, 
                             effective.size, mean.degree, ego.density, 
                             work.hours.per.week, loneliness, qol)

## Set seed
set.seed(123)
  1. Generate scatterplot matrices for all continuous variable using Spearman correlations.
continuous_vars <- df1 |>
  select(
    caregiver.burden,
    ipaq.continuous,
    network.size,
    effective.size,
    mean.degree,
    ego.density,
    work.hours.per.week,
    loneliness,
    qol
  )

## Plot distributions with scatterplots and correlations.
ggpairs(
  continuous_vars,
  lower = list(
    continuous = wrap(
      "smooth",
      method = "loess",
      se = FALSE,
      alpha = 0.6,
      size = 1.5
    )
  ),
  upper = list(continuous = wrap(
    "cor", method = "spearman", size = 3
  )),
  diag  = list(continuous = wrap("densityDiag", alpha = 0.5))
) +
  theme_bw(base_size = 9) +
  theme(axis.text = element_text(size = 6), strip.text = element_text(size = 7)) +
  labs(title = "Scatterplot Matrix: All Continuous Variables", subtitle = "Spearman correlations (upper); LOESS smooth (lower); density (diagonal)")
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties

  1. Generate a focal plot that focuses on caregiver burden versus the other continuous variables.
burden_focal <- df1 |>
  select(
    caregiver.burden,
    ipaq.continuous,
    network.size,
    effective.size,
    mean.degree,
    ego.density,
    work.hours.per.week,
    loneliness,
    qol
  ) |>
  pivot_longer(
    cols = -caregiver.burden,
    names_to = "variable",
    values_to = "value"
  )

ggplot(burden_focal, aes(x = caregiver.burden, y = value)) +
  geom_point(alpha = 0.7, size = 2) +
  geom_smooth(
    method = "loess",
    se = TRUE,
    alpha = 0.2,
    color = "steelblue"
  ) +
  facet_wrap(~ variable, scales = "free_y", ncol = 3) +
  theme_bw() +
  labs(title = "Caregiver Burden vs. All Continuous Variables", x = "Caregiver Burden (Zarit)", y = "Value")
## `geom_smooth()` using formula = 'y ~ x'

  1. Use “psych” package to generate histograms with correlations sized by magnitude and LOESS curves on scatterplots.
pairs.panels(
  continuous_vars,
  method = "spearman",
  hist.col = "lightblue",
  density = TRUE,
  ellipses = FALSE,
  smooth = TRUE,
  lm = FALSE,
  cex.cor = 1,
  stars = FALSE
)

  1. Generate correlation heatmap.
cor_matrix <- cor(continuous_vars, method = "spearman", use = "complete.obs")

corrplot(
  cor_matrix,
  method = "color",
  type = "upper",
  order = "hclust",
  addCoef.col = "black",
  number.cex = 0.7,
  tl.col = "black",
  tl.srt = 45,
  diag = FALSE,
  title = "Spearman Correlation Heatmap",
  mar = c(0, 0, 2, 0)
)

  1. Assess network measures via a zoomed-in matrix.
network_vars <- df1 |>
  select(
    network.size,
    effective.size,
    mean.degree,
    ego.density,
    caregiver.burden,
    ipaq.continuous
  )

ggpairs(
  network_vars,
  lower = list(continuous = wrap(
    "smooth",
    method = "lm",
    se = TRUE,
    alpha = 0.5
  )),
  upper = list(continuous = wrap("cor", method = "spearman")),
  diag  = list(continuous = "densityDiag")
) +
  theme_bw() +
  labs(title = "Network Structure Measures + Burden + Activity", subtitle = "Linear fit shown to assess redundancy")
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties

  1. Generate bootstrap confidence intervals for all correlations to have bias-corrected accelerted (BCa) intervals.
# Function to compute Spearman correlation on a bootstrap sample
boot_spearman <- function(data, indices) {
  d <- data[indices, ]
  cor(d[[1]], d[[2]], method = "spearman")
}

# Function to run bootstrap for a pair of variables with BCa intervals (falls back to percentile if BCa fails)
run_boot_cor <- function(data, var1, var2, R = 10000) {
  pair_df <- data[, c(var1, var2)]
  b <- boot(pair_df, boot_spearman, R = R)
  ci <- tryCatch(
    boot.ci(b, type = "bca"),
    error = function(e)
      boot.ci(b, type = "perc")
  )
  ci_type <- if (!is.null(ci$bca))
    "BCa"
  else
    "percentile"
  ci_vals <- if (!is.null(ci$bca))
    ci$bca[4:5]
  else
    ci$percent[4:5]
  data.frame(
    var1 = var1,
    var2 = var2,
    rho  = round(b$t0, 3),
    lower = round(ci_vals[1], 3),
    upper = round(ci_vals[2], 3),
    ci_type = ci_type
  )
}

# Build all unique pairs of continuous variables
var_names <- names(continuous_vars)
pairs_grid <- t(combn(var_names, 2))

# Run bootstrap for every pair
boot_results <- do.call(rbind, lapply(seq_len(nrow(pairs_grid)), function(i) {
  run_boot_cor(continuous_vars, pairs_grid[i, 1], pairs_grid[i, 2], R = 10000)
}))

# Sort by absolute correlation strength
boot_results <- boot_results[order(-abs(boot_results$rho)), ]
print(boot_results, row.names = FALSE)
##                 var1                var2    rho  lower  upper ci_type
##         network.size      effective.size  0.893  0.623  0.965     BCa
##       effective.size         ego.density -0.845 -0.961 -0.652     BCa
##           loneliness                 qol -0.693 -0.876 -0.316     BCa
##         network.size         ego.density -0.642 -0.842 -0.306     BCa
##     caregiver.burden     ipaq.continuous -0.516 -0.792 -0.135     BCa
##         network.size         mean.degree  0.492  0.060  0.771     BCa
##     caregiver.burden          loneliness  0.472 -0.096  0.763     BCa
##     caregiver.burden work.hours.per.week -0.370 -0.769  0.147     BCa
##         network.size          loneliness  0.314 -0.194  0.697     BCa
##          mean.degree work.hours.per.week  0.303 -0.277  0.724     BCa
##       effective.size          loneliness  0.300 -0.160  0.669     BCa
##     caregiver.burden         ego.density -0.299 -0.683  0.167     BCa
##      ipaq.continuous work.hours.per.week  0.257 -0.357  0.714     BCa
##          ego.density work.hours.per.week  0.257 -0.208  0.639     BCa
##      ipaq.continuous         ego.density  0.242 -0.307  0.662     BCa
##      ipaq.continuous          loneliness -0.213 -0.623  0.266     BCa
##         network.size                 qol -0.213 -0.615  0.279     BCa
##       effective.size                 qol -0.183 -0.573  0.323     BCa
##       effective.size         mean.degree  0.171 -0.306  0.602     BCa
##     caregiver.burden      effective.size  0.170 -0.378  0.604     BCa
##      ipaq.continuous         mean.degree  0.161 -0.424  0.648     BCa
##     caregiver.burden        network.size  0.152 -0.392  0.606     BCa
##     caregiver.burden         mean.degree  0.148 -0.355  0.603     BCa
##          mean.degree         ego.density  0.141 -0.319  0.565     BCa
##     caregiver.burden                 qol -0.132 -0.560  0.353     BCa
##         network.size work.hours.per.week  0.125 -0.422  0.628     BCa
##          mean.degree          loneliness  0.123 -0.410  0.569     BCa
##          mean.degree                 qol -0.116 -0.484  0.379     BCa
##          ego.density          loneliness -0.107 -0.568  0.412     BCa
##      ipaq.continuous      effective.size -0.102 -0.583  0.432     BCa
##  work.hours.per.week                 qol  0.100 -0.359  0.556     BCa
##      ipaq.continuous        network.size  0.085 -0.440  0.551     BCa
##  work.hours.per.week          loneliness -0.055 -0.522  0.406     BCa
##          ego.density                 qol  0.049 -0.437  0.521     BCa
##      ipaq.continuous                 qol  0.045 -0.381  0.471     BCa
##       effective.size work.hours.per.week -0.024 -0.542  0.542     BCa
# Save full table
write.csv(boot_results, "bootstrap_correlations.csv", row.names = FALSE)

# ---- Focal bootstrap: burden vs. each variable, visualized ----
# Forest-plot-style display of burden's correlations with
# everything else, with bootstrap CIs.

burden_boot <- boot_results |>
  filter(var1 == "caregiver.burden" | var2 == "caregiver.burden") |>
  mutate(other_var = ifelse(var1 == "caregiver.burden", var2, var1)) |>
  arrange(rho)

burden_boot$other_var <- factor(burden_boot$other_var, levels = burden_boot$other_var)

ggplot(burden_boot, aes(x = rho, y = other_var)) +
  geom_vline(xintercept = 0,
             linetype = "dashed",
             color = "gray50") +
  geom_errorbar(aes(xmin = lower, xmax = upper), width = 0.2, orientation = "y") + 
  geom_point(size = 3, color = "steelblue") +
  theme_bw() +
  labs(
    title = "Caregiver Burden: Bootstrapped Spearman Correlations",
    subtitle = "Point estimates with 95% BCa bootstrap CIs (R = 10,000)",
    x = expression(Spearman ~ rho),
    y = NULL
  )

# ---- Focal bootstrap: IPAQ vs. each variable ----
ipaq_boot <- boot_results |>
  filter(var1 == "ipaq.continuous" | var2 == "ipaq.continuous") |>
  mutate(other_var = ifelse(var1 == "ipaq.continuous", var2, var1)) |>
  arrange(rho)

ipaq_boot$other_var <- factor(ipaq_boot$other_var, levels = ipaq_boot$other_var)

ggplot(ipaq_boot, aes(x = rho, y = other_var)) +
  geom_vline(xintercept = 0,
             linetype = "dashed",
             color = "gray50") +
  geom_errorbarh(aes(xmin = lower, xmax = upper), height = 0.2) +
  geom_point(size = 3, color = "darkorange") +
  theme_bw() +
  labs(
    title = "Physical Activity (IPAQ): Bootstrapped Spearman Correlations",
    subtitle = "Point estimates with 95% BCa bootstrap CIs (R = 10,000)",
    x = expression(Spearman ~ rho),
    y = NULL
  )
## Warning: `geom_errorbarh()` was deprecated in ggplot2 4.0.0.
## ℹ Please use the `orientation` argument of `geom_errorbar()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `height` was translated to `width`.