- Load libraries, load dataset, set randomization seed.
## R markdown link: http://rpubs.com/jasohosk/1425924
## Load libraries
packages <- c("dplyr",
"ggplot2",
"GGally",
"psych",
"corrplot",
"boot",
"tidyr")
installed <- packages %in% rownames(installed.packages())
if (any(!installed)) {
install.packages(packages[!installed])
}
lapply(packages, library, character.only = TRUE)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## corrplot 0.95 loaded
##
## Attaching package: 'boot'
## The following object is masked from 'package:psych':
##
## logit
## [[1]]
## [1] "dplyr" "stats" "graphics" "grDevices" "utils" "datasets"
## [7] "methods" "base"
##
## [[2]]
## [1] "ggplot2" "dplyr" "stats" "graphics" "grDevices" "utils"
## [7] "datasets" "methods" "base"
##
## [[3]]
## [1] "GGally" "ggplot2" "dplyr" "stats" "graphics" "grDevices"
## [7] "utils" "datasets" "methods" "base"
##
## [[4]]
## [1] "psych" "GGally" "ggplot2" "dplyr" "stats" "graphics"
## [7] "grDevices" "utils" "datasets" "methods" "base"
##
## [[5]]
## [1] "corrplot" "psych" "GGally" "ggplot2" "dplyr" "stats"
## [7] "graphics" "grDevices" "utils" "datasets" "methods" "base"
##
## [[6]]
## [1] "boot" "corrplot" "psych" "GGally" "ggplot2" "dplyr"
## [7] "stats" "graphics" "grDevices" "utils" "datasets" "methods"
## [13] "base"
##
## [[7]]
## [1] "tidyr" "boot" "corrplot" "psych" "GGally" "ggplot2"
## [7] "dplyr" "stats" "graphics" "grDevices" "utils" "datasets"
## [13] "methods" "base"
## Import dataset
url <- "/Users/jasonhoskin/Desktop/final-combined-table.csv"
df <- read.csv(url)
## Remove unnecessary variables
rm(installed, packages, url)
## Filter out rows with any NAs
df1 <- df |> tidyr::drop_na(caregiver.burden, ipaq.continuous, network.size,
effective.size, mean.degree, ego.density,
work.hours.per.week, loneliness, qol)
## Set seed
set.seed(123)
- Generate scatterplot matrices for all continuous variable using
Spearman correlations.
continuous_vars <- df1 |>
select(
caregiver.burden,
ipaq.continuous,
network.size,
effective.size,
mean.degree,
ego.density,
work.hours.per.week,
loneliness,
qol
)
## Plot distributions with scatterplots and correlations.
ggpairs(
continuous_vars,
lower = list(
continuous = wrap(
"smooth",
method = "loess",
se = FALSE,
alpha = 0.6,
size = 1.5
)
),
upper = list(continuous = wrap(
"cor", method = "spearman", size = 3
)),
diag = list(continuous = wrap("densityDiag", alpha = 0.5))
) +
theme_bw(base_size = 9) +
theme(axis.text = element_text(size = 6), strip.text = element_text(size = 7)) +
labs(title = "Scatterplot Matrix: All Continuous Variables", subtitle = "Spearman correlations (upper); LOESS smooth (lower); density (diagonal)")
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties

- Generate a focal plot that focuses on caregiver burden versus the
other continuous variables.
burden_focal <- df1 |>
select(
caregiver.burden,
ipaq.continuous,
network.size,
effective.size,
mean.degree,
ego.density,
work.hours.per.week,
loneliness,
qol
) |>
pivot_longer(
cols = -caregiver.burden,
names_to = "variable",
values_to = "value"
)
ggplot(burden_focal, aes(x = caregiver.burden, y = value)) +
geom_point(alpha = 0.7, size = 2) +
geom_smooth(
method = "loess",
se = TRUE,
alpha = 0.2,
color = "steelblue"
) +
facet_wrap(~ variable, scales = "free_y", ncol = 3) +
theme_bw() +
labs(title = "Caregiver Burden vs. All Continuous Variables", x = "Caregiver Burden (Zarit)", y = "Value")
## `geom_smooth()` using formula = 'y ~ x'

- Use “psych” package to generate histograms with correlations sized
by magnitude and LOESS curves on scatterplots.
pairs.panels(
continuous_vars,
method = "spearman",
hist.col = "lightblue",
density = TRUE,
ellipses = FALSE,
smooth = TRUE,
lm = FALSE,
cex.cor = 1,
stars = FALSE
)

- Generate correlation heatmap.
cor_matrix <- cor(continuous_vars, method = "spearman", use = "complete.obs")
corrplot(
cor_matrix,
method = "color",
type = "upper",
order = "hclust",
addCoef.col = "black",
number.cex = 0.7,
tl.col = "black",
tl.srt = 45,
diag = FALSE,
title = "Spearman Correlation Heatmap",
mar = c(0, 0, 2, 0)
)

- Assess network measures via a zoomed-in matrix.
network_vars <- df1 |>
select(
network.size,
effective.size,
mean.degree,
ego.density,
caregiver.burden,
ipaq.continuous
)
ggpairs(
network_vars,
lower = list(continuous = wrap(
"smooth",
method = "lm",
se = TRUE,
alpha = 0.5
)),
upper = list(continuous = wrap("cor", method = "spearman")),
diag = list(continuous = "densityDiag")
) +
theme_bw() +
labs(title = "Network Structure Measures + Burden + Activity", subtitle = "Linear fit shown to assess redundancy")
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties
## Warning in cor.test.default(x, y, method = method): Cannot compute exact
## p-value with ties

- Generate bootstrap confidence intervals for all correlations to have
bias-corrected accelerted (BCa) intervals.
# Function to compute Spearman correlation on a bootstrap sample
boot_spearman <- function(data, indices) {
d <- data[indices, ]
cor(d[[1]], d[[2]], method = "spearman")
}
# Function to run bootstrap for a pair of variables with BCa intervals (falls back to percentile if BCa fails)
run_boot_cor <- function(data, var1, var2, R = 10000) {
pair_df <- data[, c(var1, var2)]
b <- boot(pair_df, boot_spearman, R = R)
ci <- tryCatch(
boot.ci(b, type = "bca"),
error = function(e)
boot.ci(b, type = "perc")
)
ci_type <- if (!is.null(ci$bca))
"BCa"
else
"percentile"
ci_vals <- if (!is.null(ci$bca))
ci$bca[4:5]
else
ci$percent[4:5]
data.frame(
var1 = var1,
var2 = var2,
rho = round(b$t0, 3),
lower = round(ci_vals[1], 3),
upper = round(ci_vals[2], 3),
ci_type = ci_type
)
}
# Build all unique pairs of continuous variables
var_names <- names(continuous_vars)
pairs_grid <- t(combn(var_names, 2))
# Run bootstrap for every pair
boot_results <- do.call(rbind, lapply(seq_len(nrow(pairs_grid)), function(i) {
run_boot_cor(continuous_vars, pairs_grid[i, 1], pairs_grid[i, 2], R = 10000)
}))
# Sort by absolute correlation strength
boot_results <- boot_results[order(-abs(boot_results$rho)), ]
print(boot_results, row.names = FALSE)
## var1 var2 rho lower upper ci_type
## network.size effective.size 0.893 0.623 0.965 BCa
## effective.size ego.density -0.845 -0.961 -0.652 BCa
## loneliness qol -0.693 -0.876 -0.316 BCa
## network.size ego.density -0.642 -0.842 -0.306 BCa
## caregiver.burden ipaq.continuous -0.516 -0.792 -0.135 BCa
## network.size mean.degree 0.492 0.060 0.771 BCa
## caregiver.burden loneliness 0.472 -0.096 0.763 BCa
## caregiver.burden work.hours.per.week -0.370 -0.769 0.147 BCa
## network.size loneliness 0.314 -0.194 0.697 BCa
## mean.degree work.hours.per.week 0.303 -0.277 0.724 BCa
## effective.size loneliness 0.300 -0.160 0.669 BCa
## caregiver.burden ego.density -0.299 -0.683 0.167 BCa
## ipaq.continuous work.hours.per.week 0.257 -0.357 0.714 BCa
## ego.density work.hours.per.week 0.257 -0.208 0.639 BCa
## ipaq.continuous ego.density 0.242 -0.307 0.662 BCa
## ipaq.continuous loneliness -0.213 -0.623 0.266 BCa
## network.size qol -0.213 -0.615 0.279 BCa
## effective.size qol -0.183 -0.573 0.323 BCa
## effective.size mean.degree 0.171 -0.306 0.602 BCa
## caregiver.burden effective.size 0.170 -0.378 0.604 BCa
## ipaq.continuous mean.degree 0.161 -0.424 0.648 BCa
## caregiver.burden network.size 0.152 -0.392 0.606 BCa
## caregiver.burden mean.degree 0.148 -0.355 0.603 BCa
## mean.degree ego.density 0.141 -0.319 0.565 BCa
## caregiver.burden qol -0.132 -0.560 0.353 BCa
## network.size work.hours.per.week 0.125 -0.422 0.628 BCa
## mean.degree loneliness 0.123 -0.410 0.569 BCa
## mean.degree qol -0.116 -0.484 0.379 BCa
## ego.density loneliness -0.107 -0.568 0.412 BCa
## ipaq.continuous effective.size -0.102 -0.583 0.432 BCa
## work.hours.per.week qol 0.100 -0.359 0.556 BCa
## ipaq.continuous network.size 0.085 -0.440 0.551 BCa
## work.hours.per.week loneliness -0.055 -0.522 0.406 BCa
## ego.density qol 0.049 -0.437 0.521 BCa
## ipaq.continuous qol 0.045 -0.381 0.471 BCa
## effective.size work.hours.per.week -0.024 -0.542 0.542 BCa
# Save full table
write.csv(boot_results, "bootstrap_correlations.csv", row.names = FALSE)
# ---- Focal bootstrap: burden vs. each variable, visualized ----
# Forest-plot-style display of burden's correlations with
# everything else, with bootstrap CIs.
burden_boot <- boot_results |>
filter(var1 == "caregiver.burden" | var2 == "caregiver.burden") |>
mutate(other_var = ifelse(var1 == "caregiver.burden", var2, var1)) |>
arrange(rho)
burden_boot$other_var <- factor(burden_boot$other_var, levels = burden_boot$other_var)
ggplot(burden_boot, aes(x = rho, y = other_var)) +
geom_vline(xintercept = 0,
linetype = "dashed",
color = "gray50") +
geom_errorbar(aes(xmin = lower, xmax = upper), width = 0.2, orientation = "y") +
geom_point(size = 3, color = "steelblue") +
theme_bw() +
labs(
title = "Caregiver Burden: Bootstrapped Spearman Correlations",
subtitle = "Point estimates with 95% BCa bootstrap CIs (R = 10,000)",
x = expression(Spearman ~ rho),
y = NULL
)

# ---- Focal bootstrap: IPAQ vs. each variable ----
ipaq_boot <- boot_results |>
filter(var1 == "ipaq.continuous" | var2 == "ipaq.continuous") |>
mutate(other_var = ifelse(var1 == "ipaq.continuous", var2, var1)) |>
arrange(rho)
ipaq_boot$other_var <- factor(ipaq_boot$other_var, levels = ipaq_boot$other_var)
ggplot(ipaq_boot, aes(x = rho, y = other_var)) +
geom_vline(xintercept = 0,
linetype = "dashed",
color = "gray50") +
geom_errorbarh(aes(xmin = lower, xmax = upper), height = 0.2) +
geom_point(size = 3, color = "darkorange") +
theme_bw() +
labs(
title = "Physical Activity (IPAQ): Bootstrapped Spearman Correlations",
subtitle = "Point estimates with 95% BCa bootstrap CIs (R = 10,000)",
x = expression(Spearman ~ rho),
y = NULL
)
## Warning: `geom_errorbarh()` was deprecated in ggplot2 4.0.0.
## ℹ Please use the `orientation` argument of `geom_errorbar()` instead.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `height` was translated to `width`.
