This is a reusable exploration + modeling workflow for HRS 2010 Psychosocial (LB) data, focused on life satisfaction.
It explicitly:
Loads & cleans composites (Life Satisfaction, Mastery, Constraints, Affect, Loneliness) based on the HRS LB codebook.
Applies ≥50% completion rules to compute robust scale means.
Reverse-codes items where required to ensure intuitive directions.
Summarizes distributions for quick data checks.
Runs simple standardized regressions for each research question.
Provides clear visuals (scatterplots + trend lines) for interpretation.
Translates outputs into TL;DR insights in plain language.
Resources:
PDF codebook (2006–2010 LB) — reference for items, scales, and response options.
Online 2010 LB codebook (Section LB index):
https://hrs.isr.umich.edu/sites/default/files/meta/2010/core/codebook/h10lb_ri.htm
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Adjust the path if needed
dat_raw <- haven::read_sav("HRS2010_psych.sav")
# Lowercase names for easier matching
names(dat_raw) <- tolower(names(dat_raw))
nrow(dat_raw); ncol(dat_raw)
## [1] 8325
## [1] 327
# Naming in HRS LB often uses module prefix "mlb" + question number + letter
# We'll match by name prefixes first; if that fails for your file build, labels are used as fallback.
# Life Satisfaction (SWLS) — Q03 a–e (1–7 agree)
swls_items <- grab_by_name_or_label(
dat_raw,
patterns_name = c("^mlb003a","^mlb003b","^mlb003c","^mlb003d","^mlb003e"),
patterns_label = c("life is close to ideal","conditions of my life are excellent",
"satisfied with my life","important things i want",
"change almost nothing")
)
# Mastery — Q23 a–e (1–6 agree; higher = more mastery)
mastery_items <- grab_by_name_or_label(
dat_raw,
patterns_name = c("^mlb023a","^mlb023b","^mlb023c","^mlb023d","^mlb023e"),
patterns_label = c("i can do just about anything","find a way to succeed",
"in my own hands","depends on me","i can do the things")
)
# Constraints — Q22 a–e (1–6 agree; higher = more perceived constraints)
constraints_items <- grab_by_name_or_label(
dat_raw,
patterns_name = c("^mlb022a","^mlb022b","^mlb022c","^mlb022d","^mlb022e"),
patterns_label = c("feel helpless","other people determine","beyond my control",
"little control","no way i can solve")
)
# Loneliness — Q20 a–k (1=Often, 2=Some, 3=Hardly ever/never; mix of pos/neg wording)
lonely_items <- grab_by_name_or_label(
dat_raw,
patterns_name = c("^mlb020a","^mlb020b","^mlb020c","^mlb020d","^mlb020e",
"^mlb020f","^mlb020g","^mlb020h","^mlb020i","^mlb020j","^mlb020k"),
patterns_label = c("lack companionship","left out","isolated","in tune","alone",
"people you can talk to","people you can turn to",
"really understand you","feel close to","part of a group of friends",
"a lot in common")
)
# Positive/Negative Affect — Q27 a–y (1=Very much ... 5=Not at all)
affect_items <- grab_by_name_or_label(
dat_raw,
patterns_name = c("^mlb027"),
patterns_label = c("afraid","upset","determined","enthusiastic","guilty","active","proud",
"interested","scared","frustrated","happy","bored","hostile","jittery",
"ashamed","attentive","content","nervous","sad","inspired",
"hopeful","alert","distressed","calm","excited")
)
# Quick peek at what we found
list(
swls = names(swls_items),
mastery = names(mastery_items),
constraints = names(constraints_items),
loneliness = names(lonely_items),
affect = names(affect_items)
)
## $swls
## [1] "mlb003a" "mlb003b" "mlb003c" "mlb003d" "mlb003e"
##
## $mastery
## [1] "mlb023a" "mlb023b" "mlb023c" "mlb023d" "mlb023e"
##
## $constraints
## [1] "mlb022a" "mlb022b" "mlb022c" "mlb022d" "mlb022e"
##
## $loneliness
## [1] "mlb020a" "mlb020b" "mlb020c" "mlb020d" "mlb020e" "mlb020f" "mlb020g"
## [8] "mlb020h" "mlb020i" "mlb020j" "mlb020k" "mlb021d"
##
## $affect
## [1] "md116" "mlb021d" "mlb027a" "mlb027b" "mlb027c" "mlb027d"
## [7] "mlb027e" "mlb027f" "mlb027g" "mlb027h" "mlb027i" "mlb027j"
## [13] "mlb027k" "mlb027l" "mlb027m" "mlb027n" "mlb027o" "mlb027p"
## [19] "mlb027q" "mlb027r" "mlb027s" "mlb027t" "mlb027u" "mlb027v"
## [25] "mlb027w" "mlb027x" "mlb027y" "mlb029b4" "mlb030d" "mlb033l"
## [31] "mlb033q" "mlb033u" "mlb035c" "mlb041b" "mlb050m" "md113"
### Build clean composites (≥50% items present) — FIXED
dat <- dat_raw
# Helper to coerce a selected item frame to pure numeric safely
clean_numeric_df <- function(df) {
df <- as.data.frame(df)
if (ncol(df) == 0) return(df)
dplyr::mutate(df, dplyr::across(dplyr::everything(),
~ suppressWarnings(as.numeric(as.character(.x)))))
}
# --- SWLS: average (no reverse; higher = more satisfied) ---
if (ncol(swls_items) >= 3) {
swls_num <- clean_numeric_df(swls_items)
dat$swls_mean <- row_mean_half(swls_num)
}
# --- Mastery: average (higher = more mastery) ---
if (ncol(mastery_items) >= 3) {
mastery_num <- clean_numeric_df(mastery_items)
dat$mastery_mean <- row_mean_half(mastery_num)
}
# --- Constraints: average (higher = more constraints) ---
if (ncol(constraints_items) >= 3) {
constraints_num <- clean_numeric_df(constraints_items)
dat$constraints_mean <- row_mean_half(constraints_num)
}
# --- Loneliness: reverse 20a,b,c,e then average all 11 (1..3 scale; reverse = 4 - x) ---
if (ncol(lonely_items) >= 7) {
lon <- clean_numeric_df(lonely_items)
# identify a,b,c,e by name pattern
rev_idx <- names(lon) %>% stringr::str_detect("020a|020b|020c|020e")
lon[rev_idx] <- lapply(lon[rev_idx], function(x) ifelse(is.na(x), NA_real_, 4 - x))
dat$lonely_mean <- row_mean_half(lon)
}
# --- Affect: build Positive and Negative indices (1=Very much .. 5=Not at all) ---
# Reverse all items so higher = more affect (6 - x), then average sets
if (ncol(affect_items) >= 10) {
af <- clean_numeric_df(affect_items)
af_rev <- dplyr::mutate(af, dplyr::across(dplyr::everything(),
~ ifelse(is.na(.x), NA_real_, 6 - .x)))
# Positive affect keys (Q27 c,d,f,g,h,k,p,q,t,u,v,x,y)
pos_keys <- names(af_rev) %>%
stringr::str_detect("(027c|027d|027f|027g|027h|027k|027p|027q|027t|027u|027v|027x|027y)")
# Negative affect keys (Q27 a,b,e,i,j,l,m,n,o,r,s,w)
neg_keys <- names(af_rev) %>%
stringr::str_detect("(027a|027b|027e|027i|027j|027l|027m|027n|027o|027r|027s|027w)")
if (sum(pos_keys) >= 6) dat$posaff_mean <- row_mean_half(af_rev[pos_keys])
if (sum(neg_keys) >= 6) dat$negaff_mean <- row_mean_half(af_rev[neg_keys])
}
# Final analysis frame: keep only needed composites
an <- dat %>%
dplyr::select(swls_mean, mastery_mean, constraints_mean, lonely_mean, posaff_mean, negaff_mean)
summary(an)
## swls_mean mastery_mean constraints_mean lonely_mean
## Min. :1.000 Min. :1.00 Min. :1.000 Min. :1.000
## 1st Qu.:3.800 1st Qu.:4.20 1st Qu.:1.200 1st Qu.:1.250
## Median :5.200 Median :5.00 Median :2.000 Median :1.583
## Mean :4.839 Mean :4.75 Mean :2.227 Mean :1.626
## 3rd Qu.:6.000 3rd Qu.:5.60 3rd Qu.:3.000 3rd Qu.:1.917
## Max. :7.000 Max. :6.00 Max. :6.000 Max. :3.333
## NA's :71 NA's :109 NA's :120 NA's :115
## posaff_mean negaff_mean
## Min. :1.000 Min. :1.000
## 1st Qu.:3.000 1st Qu.:1.333
## Median :3.615 Median :1.667
## Mean :3.535 Mean :1.774
## 3rd Qu.:4.154 3rd Qu.:2.083
## Max. :5.000 Max. :5.000
## NA's :147 NA's :144
desc_long <- an %>%
pivot_longer(everything(), names_to="measure", values_to="value")
ggplot(desc_long, aes(value)) +
geom_histogram(bins = 30) +
facet_wrap(~ measure, scales = "free") +
labs(title = "Distributions of composites", x = NULL, y = "Count")
## Warning: Removed 706 rows containing non-finite outside the scale range
## (`stat_bin()`).
complete_an <- an %>% drop_na()
cor_mat <- round(cor(complete_an), 2)
print(cor_mat)
## swls_mean mastery_mean constraints_mean lonely_mean
## swls_mean 1.00 0.38 -0.36 -0.45
## mastery_mean 0.38 1.00 -0.43 -0.34
## constraints_mean -0.36 -0.43 1.00 0.48
## lonely_mean -0.45 -0.34 0.48 1.00
## posaff_mean 0.45 0.43 -0.51 -0.53
## negaff_mean -0.43 -0.33 0.44 0.48
## posaff_mean negaff_mean
## swls_mean 0.45 -0.43
## mastery_mean 0.43 -0.33
## constraints_mean -0.51 0.44
## lonely_mean -0.53 0.48
## posaff_mean 1.00 -0.43
## negaff_mean -0.43 1.00
** RQ1. Are people with higher mastery and fewer perceived constraints more satisfied with life? Uses Mastery (Q23) and Constraints (Q22) → robust 5-item scales. Simple to interpret for the boss: “Build mastery, reduce barriers.”
d1 <- an %>%
dplyr::select(swls_mean, mastery_mean, constraints_mean) %>%
tidyr::drop_na()
# Standardized for easy comparison
m_rq1 <- lm(scale(swls_mean) ~ scale(mastery_mean) + scale(constraints_mean), data = d1)
broom::tidy(m_rq1)
# Partial effects (geom_smooth) — two panels
p1 <- ggplot(d1, aes(mastery_mean, swls_mean)) +
geom_point(alpha=.3) +
geom_smooth(method="lm", se=TRUE) +
labs(title="SWLS vs Mastery", x="Mastery (mean)", y="Life Satisfaction (SWLS)")
p2 <- ggplot(d1, aes(constraints_mean, swls_mean)) +
geom_point(alpha=.3) +
geom_smooth(method="lm", se=TRUE) +
labs(title="SWLS vs Constraints", x="Constraints (mean)", y="Life Satisfaction (SWLS)")
p1
## `geom_smooth()` using formula = 'y ~ x'
p2
## `geom_smooth()` using formula = 'y ~ x'
** Higher mastery → higher life satisfaction. More constraints → lower
life satisfaction.
** RQ2. Do people with more positive affect (and less negative affect) report higher life satisfaction? Uses Positive/Negative Affect (Q27) → clear emotional predictors.
d2 <- an %>%
dplyr::select(swls_mean, posaff_mean, negaff_mean) %>%
tidyr::drop_na()
m_rq2 <- lm(scale(swls_mean) ~ scale(posaff_mean) + scale(negaff_mean), data = d2)
broom::tidy(m_rq2)
ggplot(d2, aes(posaff_mean, swls_mean)) +
geom_point(alpha=.3) +
geom_smooth(method="lm", se=TRUE) +
labs(title="SWLS vs Positive Affect", x="Positive Affect (mean)", y="Life Satisfaction (SWLS)")
## `geom_smooth()` using formula = 'y ~ x'
ggplot(d2, aes(negaff_mean, swls_mean)) +
geom_point(alpha=.3) +
geom_smooth(method="lm", se=TRUE) +
labs(title="SWLS vs Negative Affect", x="Negative Affect (mean)", y="Life Satisfaction (SWLS)")
## `geom_smooth()` using formula = 'y ~ x'
** More positive affect → higher life satisfaction. More negative affect
→ lower life satisfaction.
** RQ3. Is greater loneliness associated with lower life satisfaction? Uses Loneliness (Q20) → short, validated scale for social well-being.
d3 <- an %>%
dplyr::select(swls_mean, lonely_mean) %>%
tidyr::drop_na()
m_rq3 <- lm(scale(swls_mean) ~ scale(lonely_mean), data = d3)
broom::tidy(m_rq3)
ggplot(d3, aes(lonely_mean, swls_mean)) +
geom_point(alpha=.3) +
geom_smooth(method="lm", se=TRUE) +
labs(title="SWLS vs Loneliness", x="Loneliness (mean)", y="Life Satisfaction (SWLS)")
## `geom_smooth()` using formula = 'y ~ x'
** Greater loneliness → lower life satisfaction.