1 About this template

This is a reusable exploration + modeling workflow for HRS 2010 Psychosocial (LB) data, focused on life satisfaction.

It explicitly:

Loads & cleans composites (Life Satisfaction, Mastery, Constraints, Affect, Loneliness) based on the HRS LB codebook.
Applies ≥50% completion rules to compute robust scale means.
Reverse-codes items where required to ensure intuitive directions.
Summarizes distributions for quick data checks.
Runs simple standardized regressions for each research question.
Provides clear visuals (scatterplots + trend lines) for interpretation.
Translates outputs into TL;DR insights in plain language.

Resources:

PDF codebook (2006–2010 LB) — reference for items, scales, and response options.

Online 2010 LB codebook (Section LB index):

https://hrs.isr.umich.edu/sites/default/files/meta/2010/core/codebook/h10lb_ri.htm

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

2 Data & Keys

2.1 Load data

# Adjust the path if needed
dat_raw <- haven::read_sav("HRS2010_psych.sav")

# Lowercase names for easier matching
names(dat_raw) <- tolower(names(dat_raw))
nrow(dat_raw); ncol(dat_raw)

## [1] 8325

## [1] 327

2.2 Define item sets (from HRS LB codebook)

# Naming in HRS LB often uses module prefix "mlb" + question number + letter
# We'll match by name prefixes first; if that fails for your file build, labels are used as fallback.

# Life Satisfaction (SWLS) — Q03 a–e (1–7 agree)
swls_items <- grab_by_name_or_label(
  dat_raw,
  patterns_name = c("^mlb003a","^mlb003b","^mlb003c","^mlb003d","^mlb003e"),
  patterns_label = c("life is close to ideal","conditions of my life are excellent",
                     "satisfied with my life","important things i want",
                     "change almost nothing")
)

# Mastery — Q23 a–e (1–6 agree; higher = more mastery)
mastery_items <- grab_by_name_or_label(
  dat_raw,
  patterns_name = c("^mlb023a","^mlb023b","^mlb023c","^mlb023d","^mlb023e"),
  patterns_label = c("i can do just about anything","find a way to succeed",
                     "in my own hands","depends on me","i can do the things")
)

# Constraints — Q22 a–e (1–6 agree; higher = more perceived constraints)
constraints_items <- grab_by_name_or_label(
  dat_raw,
  patterns_name = c("^mlb022a","^mlb022b","^mlb022c","^mlb022d","^mlb022e"),
  patterns_label = c("feel helpless","other people determine","beyond my control",
                     "little control","no way i can solve")
)

# Loneliness — Q20 a–k (1=Often, 2=Some, 3=Hardly ever/never; mix of pos/neg wording)
lonely_items <- grab_by_name_or_label(
  dat_raw,
  patterns_name = c("^mlb020a","^mlb020b","^mlb020c","^mlb020d","^mlb020e",
                    "^mlb020f","^mlb020g","^mlb020h","^mlb020i","^mlb020j","^mlb020k"),
  patterns_label = c("lack companionship","left out","isolated","in tune","alone",
                     "people you can talk to","people you can turn to",
                     "really understand you","feel close to","part of a group of friends",
                     "a lot in common")
)

# Positive/Negative Affect — Q27 a–y (1=Very much ... 5=Not at all)
affect_items <- grab_by_name_or_label(
  dat_raw,
  patterns_name = c("^mlb027"),
  patterns_label = c("afraid","upset","determined","enthusiastic","guilty","active","proud",
                     "interested","scared","frustrated","happy","bored","hostile","jittery",
                     "ashamed","attentive","content","nervous","sad","inspired",
                     "hopeful","alert","distressed","calm","excited")
)

# Quick peek at what we found
list(
  swls = names(swls_items),
  mastery = names(mastery_items),
  constraints = names(constraints_items),
  loneliness = names(lonely_items),
  affect = names(affect_items)
)

## $swls
## [1] "mlb003a" "mlb003b" "mlb003c" "mlb003d" "mlb003e"
## 
## $mastery
## [1] "mlb023a" "mlb023b" "mlb023c" "mlb023d" "mlb023e"
## 
## $constraints
## [1] "mlb022a" "mlb022b" "mlb022c" "mlb022d" "mlb022e"
## 
## $loneliness
##  [1] "mlb020a" "mlb020b" "mlb020c" "mlb020d" "mlb020e" "mlb020f" "mlb020g"
##  [8] "mlb020h" "mlb020i" "mlb020j" "mlb020k" "mlb021d"
## 
## $affect
##  [1] "md116"    "mlb021d"  "mlb027a"  "mlb027b"  "mlb027c"  "mlb027d" 
##  [7] "mlb027e"  "mlb027f"  "mlb027g"  "mlb027h"  "mlb027i"  "mlb027j" 
## [13] "mlb027k"  "mlb027l"  "mlb027m"  "mlb027n"  "mlb027o"  "mlb027p" 
## [19] "mlb027q"  "mlb027r"  "mlb027s"  "mlb027t"  "mlb027u"  "mlb027v" 
## [25] "mlb027w"  "mlb027x"  "mlb027y"  "mlb029b4" "mlb030d"  "mlb033l" 
## [31] "mlb033q"  "mlb033u"  "mlb035c"  "mlb041b"  "mlb050m"  "md113"

2.3 Build clean composites (≥50% items present)

### Build clean composites (≥50% items present) — FIXED
dat <- dat_raw

# Helper to coerce a selected item frame to pure numeric safely
clean_numeric_df <- function(df) {
  df <- as.data.frame(df)
  if (ncol(df) == 0) return(df)
  dplyr::mutate(df, dplyr::across(dplyr::everything(),
                                  ~ suppressWarnings(as.numeric(as.character(.x)))))
}

# --- SWLS: average (no reverse; higher = more satisfied) ---
if (ncol(swls_items) >= 3) {
  swls_num <- clean_numeric_df(swls_items)
  dat$swls_mean <- row_mean_half(swls_num)
}

# --- Mastery: average (higher = more mastery) ---
if (ncol(mastery_items) >= 3) {
  mastery_num <- clean_numeric_df(mastery_items)
  dat$mastery_mean <- row_mean_half(mastery_num)
}

# --- Constraints: average (higher = more constraints) ---
if (ncol(constraints_items) >= 3) {
  constraints_num <- clean_numeric_df(constraints_items)
  dat$constraints_mean <- row_mean_half(constraints_num)
}

# --- Loneliness: reverse 20a,b,c,e then average all 11 (1..3 scale; reverse = 4 - x) ---
if (ncol(lonely_items) >= 7) {
  lon <- clean_numeric_df(lonely_items)
  # identify a,b,c,e by name pattern
  rev_idx <- names(lon) %>% stringr::str_detect("020a|020b|020c|020e")
  lon[rev_idx] <- lapply(lon[rev_idx], function(x) ifelse(is.na(x), NA_real_, 4 - x))
  dat$lonely_mean <- row_mean_half(lon)
}

# --- Affect: build Positive and Negative indices (1=Very much .. 5=Not at all) ---
# Reverse all items so higher = more affect (6 - x), then average sets
if (ncol(affect_items) >= 10) {
  af <- clean_numeric_df(affect_items)
  af_rev <- dplyr::mutate(af, dplyr::across(dplyr::everything(),
                                            ~ ifelse(is.na(.x), NA_real_, 6 - .x)))

  # Positive affect keys (Q27 c,d,f,g,h,k,p,q,t,u,v,x,y)
  pos_keys <- names(af_rev) %>%
    stringr::str_detect("(027c|027d|027f|027g|027h|027k|027p|027q|027t|027u|027v|027x|027y)")
  # Negative affect keys (Q27 a,b,e,i,j,l,m,n,o,r,s,w)
  neg_keys <- names(af_rev) %>%
    stringr::str_detect("(027a|027b|027e|027i|027j|027l|027m|027n|027o|027r|027s|027w)")

  if (sum(pos_keys) >= 6) dat$posaff_mean <- row_mean_half(af_rev[pos_keys])
  if (sum(neg_keys) >= 6) dat$negaff_mean <- row_mean_half(af_rev[neg_keys])
}

# Final analysis frame: keep only needed composites
an <- dat %>%
  dplyr::select(swls_mean, mastery_mean, constraints_mean, lonely_mean, posaff_mean, negaff_mean)

summary(an)

##    swls_mean      mastery_mean  constraints_mean  lonely_mean   
##  Min.   :1.000   Min.   :1.00   Min.   :1.000    Min.   :1.000  
##  1st Qu.:3.800   1st Qu.:4.20   1st Qu.:1.200    1st Qu.:1.250  
##  Median :5.200   Median :5.00   Median :2.000    Median :1.583  
##  Mean   :4.839   Mean   :4.75   Mean   :2.227    Mean   :1.626  
##  3rd Qu.:6.000   3rd Qu.:5.60   3rd Qu.:3.000    3rd Qu.:1.917  
##  Max.   :7.000   Max.   :6.00   Max.   :6.000    Max.   :3.333  
##  NA's   :71      NA's   :109    NA's   :120      NA's   :115    
##   posaff_mean     negaff_mean   
##  Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.:1.333  
##  Median :3.615   Median :1.667  
##  Mean   :3.535   Mean   :1.774  
##  3rd Qu.:4.154   3rd Qu.:2.083  
##  Max.   :5.000   Max.   :5.000  
##  NA's   :147     NA's   :144

3 Descriptives & Quick Checks

3.1 Distributions

desc_long <- an %>%
  pivot_longer(everything(), names_to="measure", values_to="value")

ggplot(desc_long, aes(value)) +
  geom_histogram(bins = 30) +
  facet_wrap(~ measure, scales = "free") +
  labs(title = "Distributions of composites", x = NULL, y = "Count")

## Warning: Removed 706 rows containing non-finite outside the scale range
## (`stat_bin()`).

3.2 Pairwise correlations (complete cases)

complete_an <- an %>% drop_na()
cor_mat <- round(cor(complete_an), 2)
print(cor_mat)

##                  swls_mean mastery_mean constraints_mean lonely_mean
## swls_mean             1.00         0.38            -0.36       -0.45
## mastery_mean          0.38         1.00            -0.43       -0.34
## constraints_mean     -0.36        -0.43             1.00        0.48
## lonely_mean          -0.45        -0.34             0.48        1.00
## posaff_mean           0.45         0.43            -0.51       -0.53
## negaff_mean          -0.43        -0.33             0.44        0.48
##                  posaff_mean negaff_mean
## swls_mean               0.45       -0.43
## mastery_mean            0.43       -0.33
## constraints_mean       -0.51        0.44
## lonely_mean            -0.53        0.48
## posaff_mean             1.00       -0.43
## negaff_mean            -0.43        1.00

4 RQ1: Mastery & Constraints → Life Satisfaction

** RQ1. Are people with higher mastery and fewer perceived constraints more satisfied with life? Uses Mastery (Q23) and Constraints (Q22) → robust 5-item scales. Simple to interpret for the boss: “Build mastery, reduce barriers.”

4.1 Model & Betas

d1 <- an %>%
  dplyr::select(swls_mean, mastery_mean, constraints_mean) %>%
  tidyr::drop_na()

# Standardized for easy comparison
m_rq1 <- lm(scale(swls_mean) ~ scale(mastery_mean) + scale(constraints_mean), data = d1)
broom::tidy(m_rq1)

4.2 Simple Visuals

# Partial effects (geom_smooth) — two panels
p1 <- ggplot(d1, aes(mastery_mean, swls_mean)) +
  geom_point(alpha=.3) +
  geom_smooth(method="lm", se=TRUE) +
  labs(title="SWLS vs Mastery", x="Mastery (mean)", y="Life Satisfaction (SWLS)")

p2 <- ggplot(d1, aes(constraints_mean, swls_mean)) +
  geom_point(alpha=.3) +
  geom_smooth(method="lm", se=TRUE) +
  labs(title="SWLS vs Constraints", x="Constraints (mean)", y="Life Satisfaction (SWLS)")

p1

## `geom_smooth()` using formula = 'y ~ x'

p2

## `geom_smooth()` using formula = 'y ~ x'

** Higher mastery → higher life satisfaction. More constraints → lower life satisfaction.

5 RQ2: Affect → Life Satisfaction

** RQ2. Do people with more positive affect (and less negative affect) report higher life satisfaction? Uses Positive/Negative Affect (Q27) → clear emotional predictors.

5.1 Model & Betas

d2 <- an %>%
  dplyr::select(swls_mean, posaff_mean, negaff_mean) %>%
  tidyr::drop_na()

m_rq2 <- lm(scale(swls_mean) ~ scale(posaff_mean) + scale(negaff_mean), data = d2)
broom::tidy(m_rq2)

5.2 Simple Visuals

ggplot(d2, aes(posaff_mean, swls_mean)) +
  geom_point(alpha=.3) +
  geom_smooth(method="lm", se=TRUE) +
  labs(title="SWLS vs Positive Affect", x="Positive Affect (mean)", y="Life Satisfaction (SWLS)")

## `geom_smooth()` using formula = 'y ~ x'

ggplot(d2, aes(negaff_mean, swls_mean)) +
  geom_point(alpha=.3) +
  geom_smooth(method="lm", se=TRUE) +
  labs(title="SWLS vs Negative Affect", x="Negative Affect (mean)", y="Life Satisfaction (SWLS)")

## `geom_smooth()` using formula = 'y ~ x'

** More positive affect → higher life satisfaction. More negative affect → lower life satisfaction.

6 RQ3: Loneliness → Life Satisfaction

** RQ3. Is greater loneliness associated with lower life satisfaction? Uses Loneliness (Q20) → short, validated scale for social well-being.

6.1 Model & Beta

d3 <- an %>%
  dplyr::select(swls_mean, lonely_mean) %>%
  tidyr::drop_na()

m_rq3 <- lm(scale(swls_mean) ~ scale(lonely_mean), data = d3)
broom::tidy(m_rq3)

6.2 Simple Visuals

ggplot(d3, aes(lonely_mean, swls_mean)) +
  geom_point(alpha=.3) +
  geom_smooth(method="lm", se=TRUE) +
  labs(title="SWLS vs Loneliness", x="Loneliness (mean)", y="Life Satisfaction (SWLS)")

## `geom_smooth()` using formula = 'y ~ x'

** Greater loneliness → lower life satisfaction.

Mid-term - Life Satisfaction

Wesley Gibbs

October 07, 2025