knitr::opts_chunk$set(echo = TRUE)
#Loading necessary libraries
library(reshape2) #For melt of the corr matrix
library(ggplot2)
library(cobalt) #For split command
## cobalt (Version 4.4.0, Build Date: 2022-08-13)
library(haven)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(lmtest)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(sandwich)
library(AER)
## Loading required package: car
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## Loading required package: survival
library(OneSampleMR) #Two Stage Residual Inclusion Package
## Registered S3 methods overwritten by 'ivreg':
## method from
## anova.ivreg AER
## hatvalues.ivreg AER
## model.matrix.ivreg AER
## predict.ivreg AER
## print.ivreg AER
## print.summary.ivreg AER
## summary.ivreg AER
## terms.ivreg AER
## update.ivreg AER
## vcov.ivreg AER
library(modelsummary) #Way to show results of models
library(margins)
library(MASS) #Stepwise selection
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(leaps) #Moreeee stepwise selection
library(caret) #Stepwise selection train function
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
##
## cluster
library(msm)
library(ggeffects)
library(broom)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
library(GLMMadaptive)
##
## Attaching package: 'GLMMadaptive'
## The following object is masked from 'package:MASS':
##
## negative.binomial
library(aod)
##
## Attaching package: 'aod'
## The following object is masked from 'package:survival':
##
## rats
library(funModeling)
## Loading required package: Hmisc
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following object is masked from 'package:modelsummary':
##
## Mean
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
## funModeling v.1.9.4 :)
## Examples and tutorials at livebook.datascienceheroes.com
## / Now in Spanish: librovivodecienciadedatos.ai
df$ACTIVE_ONCE_WEEKLY <- ifelse(df$ACTIVITY_FREQUENCY == 1, 1, 0)
df$ACTIVE_MORE_THAN_ONCE_WEEKLY <- ifelse(df$ACTIVITY_FREQUENCY == 2, 1, 0)
df$ACTIVE_DAILY <- ifelse(df$ACTIVITY_FREQUENCY == 3, 1, 0)
df$NOT_ACTIVE <- ifelse(df$ACTIVITY_FREQUENCY == 0, 1, 0)
df$EXCELLENT_HEALTH <- ifelse(df$RATE_HEALTH == 1, 1, 0)
df$VERY_GOOD_HEALTH <- ifelse(df$RATE_HEALTH == 2, 1, 0)
df$GOOD_HEALTH <- ifelse(df$RATE_HEALTH == 3, 1, 0)
df$FAIR_HEALTH <- ifelse(df$RATE_HEALTH == 4, 1, 0)
df$POOR_HEALTH <- ifelse(df$RATE_HEALTH == 5, 1, 0)
df$WORKING <- ifelse(df$JOB_STATUS == 1, 1, 0)
df$UNEMPLOYED <- ifelse(df$JOB_STATUS == 2, 1, 0)
df$LAID_OFF <- ifelse(df$MARITAL_STATUS == 3, 1, 0)
df$REALLY_LIKE_WORKING <- ifelse(df$WORK_ENJOYMENT == 1, 1, 0)
df$LIKE_WORKING <- ifelse(df$WORK_ENJOYMENT == 2, 1, 0)
df$DISLIKE_WORKING <- ifelse(df$WORK_ENJOYMENT == 3, 1, 0)
df$REALLY_DISLIKE_WORKING <- ifelse(df$WORK_ENJOYMENT == 4, 1, 0)
df$SINGLE <- ifelse(df$MARITAL_STATUS == 2, 1, 0)
df$MISSING_WAIST_SIZE[df$WAIST_SIZE <= 13] <- 1
df$WAIST_SIZE[df$WAIST_SIZE <= 13] <- 0
df$WEEKS_PAID_VACATION[df$WEEKS_PAID_VACATION >= 14] <- 12
df <- subset(df, select = -c(RATE_HEALTH, ACTIVITY_FREQUENCY, MARITAL_STATUS,
WORK_ENJOYMENT, JOB_STATUS))
## WEEKLY_WORK_HOURS AGE HEART_CONDITION ANY_DEPENDENTS
## Min. : 0.00 Min. :50.00 Min. :0.0000 Min. :0.0000
## 1st Qu.:30.00 1st Qu.:55.00 1st Qu.:0.0000 1st Qu.:0.0000
## Median :40.00 Median :59.00 Median :0.0000 Median :0.0000
## Mean :36.78 Mean :60.72 Mean :0.1305 Mean :0.1737
## 3rd Qu.:45.00 3rd Qu.:64.00 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :98.00 Max. :90.00 Max. :1.0000 Max. :1.0000
## WORKING_SPOUSE WEEKS_PAID_VACATION REDUCE_PAID_WORK_HOURS MEDICARE
## Min. :0.0000 Min. : 0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median : 1.000 Median :0.0000 Median :0.0000
## Mean :0.4442 Mean : 1.618 Mean :0.2755 Mean :0.2299
## 3rd Qu.:1.0000 3rd Qu.: 3.000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :13.000 Max. :1.0000 Max. :1.0000
## MEDICAID HOSPITAL_EXPENSES RETIRED VOLUNTEER
## Min. :0.0000 Min. : 0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.: 0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median : 0.0000 Median :0.0000 Median :0.0000
## Mean :0.0863 Mean : 0.2063 Mean :0.2096 Mean :0.4149
## 3rd Qu.:0.0000 3rd Qu.: 0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :30.0000 Max. :1.0000 Max. :1.0000
## HOME_VALUE WAIST_SIZE MALE YEARS_EDUCATED
## Min. : 0.0 Min. : 0.000 Min. :0.0000 Min. : 0.00
## 1st Qu.: 0.0 1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.:12.00
## Median : 0.0 Median : 0.000 Median :0.0000 Median :14.00
## Mean : 170.4 Mean : 9.976 Mean :0.4722 Mean :13.22
## 3rd Qu.: 156.0 3rd Qu.: 0.000 3rd Qu.:1.0000 3rd Qu.:16.00
## Max. :6000.0 Max. :68.250 Max. :1.0000 Max. :17.00
## WORK_LIMITING_CONDITION TOTAL_WEALTH DEBTS
## Min. :0.0000 Min. : -463000 Min. : 0.00
## 1st Qu.:0.0000 1st Qu.: 0 1st Qu.: 0.00
## Median :0.0000 Median : 0 Median : 0.00
## Mean :0.1227 Mean : 141995 Mean : 27.86
## 3rd Qu.:0.0000 3rd Qu.: 15500 3rd Qu.: 1.25
## Max. :1.0000 Max. :30900000 Max. :2000.00
## MISSING_RATE_HEALTH MISSING_HEART_CONDITION MISSING_ACTIVITY_FREQUENCY
## Min. :0.0000000 Min. :0.0000000 Min. :0.000000
## 1st Qu.:0.0000000 1st Qu.:0.0000000 1st Qu.:0.000000
## Median :0.0000000 Median :0.0000000 Median :0.000000
## Mean :0.0004785 Mean :0.0004785 Mean :0.001117
## 3rd Qu.:0.0000000 3rd Qu.:0.0000000 3rd Qu.:0.000000
## Max. :1.0000000 Max. :1.0000000 Max. :1.000000
## MISSING_WORK_ENJOYMENT MISSING_ANY_DEPENDENTS MISSING_WORKING_SPOUSE
## Min. :0.00000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.00000 Median :0.0000
## Mean :0.03956 Mean :0.04738 Mean :0.4513
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :1.00000 Max. :1.00000 Max. :1.0000
## MISSING_WEEKS_PAID_VACATION MISSING_REDUCE_PAID_WORK_HOURS MISSING_JOB_STATUS
## Min. :0.000 Min. :0.0000 Min. :0.000000
## 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.000000
## Median :0.000 Median :0.0000 Median :0.000000
## Mean :0.211 Mean :0.2098 Mean :0.004147
## 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.000000
## Max. :1.000 Max. :1.0000 Max. :1.000000
## MISSING_MEDICARE MISSING_MEDICAID MISSING_RETIRED MISSING_VOLUNTEER
## Min. :0.000000 Min. :0.0000 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.000000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.000000
## Median :0.000000 Median :0.0000 Median :0.00000 Median :0.000000
## Mean :0.004307 Mean :0.0067 Mean :0.02249 Mean :0.000319
## 3rd Qu.:0.000000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :1.000000 Max. :1.0000 Max. :1.00000 Max. :1.000000
## MISSING_TOTAL_WEALTH MISSING_HOME_VALUE MISSING_WAIST_SIZE
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :1.0000 Median :0.0000 Median :1.0000
## Mean :0.6719 Mean :0.3835 Mean :0.7531
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## MISSING_YEARS_EDUCATED MISSING_DEBTS ACTIVE_ONCE_WEEKLY
## Min. :0.00000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.0000 Median :0.0000
## Mean :0.02393 Mean :0.3835 Mean :0.1764
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.0000 Max. :1.0000
## ACTIVE_MORE_THAN_ONCE_WEEKLY ACTIVE_DAILY NOT_ACTIVE
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.4586 Mean :0.1238 Mean :0.2412
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## EXCELLENT_HEALTH VERY_GOOD_HEALTH GOOD_HEALTH FAIR_HEALTH
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.1117 Mean :0.3591 Mean :0.3623 Mean :0.1468
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## POOR_HEALTH WORKING UNEMPLOYED LAID_OFF
## Min. :0.00000 Min. :0.0000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:1.0000 1st Qu.:0.000000 1st Qu.:0.000000
## Median :0.00000 Median :1.0000 Median :0.000000 Median :0.000000
## Mean :0.01978 Mean :0.8968 Mean :0.006062 Mean :0.004945
## 3rd Qu.:0.00000 3rd Qu.:1.0000 3rd Qu.:0.000000 3rd Qu.:0.000000
## Max. :1.00000 Max. :1.0000 Max. :1.000000 Max. :1.000000
## REALLY_LIKE_WORKING LIKE_WORKING DISLIKE_WORKING REALLY_DISLIKE_WORKING
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :1.0000 Median :0.00000 Median :0.00000
## Mean :0.2744 Mean :0.5889 Mean :0.07944 Mean :0.01771
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.00000
## SINGLE
## Min. :0.000000
## 1st Qu.:0.000000
## Median :0.000000
## Mean :0.002552
## 3rd Qu.:0.000000
## Max. :1.000000
df <- subset(df, df$MISSING_ACTIVITY_FREQUENCY != 1 &
df$MISSING_ANY_DEPENDENTS != 1 &
df$MISSING_HEART_CONDITION != 1 &
df$MISSING_JOB_STATUS != 1 &
df$MISSING_MEDICAID != 1 &
df$MISSING_MEDICARE != 1 &
df$MISSING_RATE_HEALTH != 1 &
df$MISSING_RETIRED != 1 &
df$MISSING_VOLUNTEER != 1 &
df$MISSING_WORK_ENJOYMENT != 1 &
df$MISSING_YEARS_EDUCATED != 1 &
df$LAID_OFF != 1 &
df$UNEMPLOYED != 1 &
df$SINGLE != 1)
df <- subset(df,
select = -c(MISSING_ACTIVITY_FREQUENCY,
MISSING_ANY_DEPENDENTS,
MISSING_HEART_CONDITION,
MISSING_JOB_STATUS,
MISSING_MEDICAID,
MISSING_MEDICARE,
MISSING_RATE_HEALTH,
MISSING_RETIRED,
MISSING_VOLUNTEER,
MISSING_WORK_ENJOYMENT,
LAID_OFF,
UNEMPLOYED,
TOTAL_WEALTH,
MISSING_TOTAL_WEALTH,
MISSING_YEARS_EDUCATED,
SINGLE,
MISSING_WORKING_SPOUSE))
df <- subset(df,
select = -c(GOOD_HEALTH,
LIKE_WORKING,
ACTIVE_MORE_THAN_ONCE_WEEKLY))
lm1 <- lm(WEEKLY_WORK_HOURS ~ ., data = df)
summary(df)
## WEEKLY_WORK_HOURS AGE HEART_CONDITION ANY_DEPENDENTS
## Min. : 0.00 Min. :50.00 Min. :0.0000 Min. :0.0000
## 1st Qu.:30.00 1st Qu.:55.00 1st Qu.:0.0000 1st Qu.:0.0000
## Median :40.00 Median :59.00 Median :0.0000 Median :0.0000
## Mean :37.09 Mean :60.74 Mean :0.1271 Mean :0.1834
## 3rd Qu.:43.75 3rd Qu.:64.00 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :98.00 Max. :90.00 Max. :1.0000 Max. :1.0000
## WORKING_SPOUSE WEEKS_PAID_VACATION REDUCE_PAID_WORK_HOURS MEDICARE
## Min. :0.000 Min. : 0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.000 1st Qu.: 0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.000 Median : 1.000 Median :0.0000 Median :0.0000
## Mean :0.433 Mean : 1.653 Mean :0.2835 Mean :0.2308
## 3rd Qu.:1.000 3rd Qu.: 3.000 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :1.000 Max. :13.000 Max. :1.0000 Max. :1.0000
## MEDICAID HOSPITAL_EXPENSES RETIRED VOLUNTEER
## Min. :0.00000 Min. : 0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.: 0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median : 0.0000 Median :0.0000 Median :0.0000
## Mean :0.08887 Mean : 0.1995 Mean :0.2062 Mean :0.4146
## 3rd Qu.:0.00000 3rd Qu.: 0.0000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.00000 Max. :23.0000 Max. :1.0000 Max. :1.0000
## HOME_VALUE WAIST_SIZE MALE YEARS_EDUCATED
## Min. : 0.0 Min. : 0.00 Min. :0.0000 Min. : 0.00
## 1st Qu.: 0.0 1st Qu.: 0.00 1st Qu.:0.0000 1st Qu.:12.00
## Median : 0.0 Median : 0.00 Median :0.0000 Median :14.00
## Mean : 177.4 Mean :10.33 Mean :0.4596 Mean :13.57
## 3rd Qu.: 160.0 3rd Qu.:29.00 3rd Qu.:1.0000 3rd Qu.:16.00
## Max. :6000.0 Max. :68.25 Max. :1.0000 Max. :17.00
## WORK_LIMITING_CONDITION DEBTS MISSING_WEEKS_PAID_VACATION
## Min. :0.0000 Min. : 0.00 Min. :0.000
## 1st Qu.:0.0000 1st Qu.: 0.00 1st Qu.:0.000
## Median :0.0000 Median : 0.00 Median :0.000
## Mean :0.1223 Mean : 26.99 Mean :0.199
## 3rd Qu.:0.0000 3rd Qu.: 1.70 3rd Qu.:0.000
## Max. :1.0000 Max. :2000.00 Max. :1.000
## MISSING_REDUCE_PAID_WORK_HOURS MISSING_HOME_VALUE MISSING_WAIST_SIZE
## Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :1.0000
## Mean :0.1902 Mean :0.3592 Mean :0.7451
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000
## MISSING_DEBTS ACTIVE_ONCE_WEEKLY ACTIVE_DAILY NOT_ACTIVE
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.0000
## Mean :0.3592 Mean :0.1761 Mean :0.1241 Mean :0.2429
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.0000
## EXCELLENT_HEALTH VERY_GOOD_HEALTH FAIR_HEALTH POOR_HEALTH
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.0000 Median :0.00000
## Mean :0.1105 Mean :0.3599 Mean :0.1467 Mean :0.02001
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
## WORKING REALLY_LIKE_WORKING DISLIKE_WORKING REALLY_DISLIKE_WORKING
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:1.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :1.0000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.9146 Mean :0.2852 Mean :0.08465 Mean :0.01855
## 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.00000
col_count = nrow(df)
num_heart_condition = sum(df$HEART_CONDITION)
num_working_spouse = sum(df$WORKING_SPOUSE)
num_reduce_paid_work_hours = sum(df$REDUCE_PAID_WORK_HOURS)
num_medicare = sum(df$MEDICARE)
num_medicaid = sum(df$MEDICAID)
num_retired = sum(df$RETIRED)
num_volunteer = sum(df$VOLUNTEER)
num_male = sum(df$MALE)
num_work_limiting_condition = sum(df$WORK_LIMITING_CONDITION)
m_weeks_paid_vacation = sum(df$MISSING_WEEKS_PAID_VACATION)
m_reduce_paid_work_hours = sum(df$MISSING_REDUCE_PAID_WORK_HOURS)
m_home_value = sum(df$MISSING_HOME_VALUE)
m_waist_size = sum(df$MISSING_WAIST_SIZE)
m_debts = sum(df$MISSING_DEBTS)
num_active_weekly = sum(df$ACTIVE_ONCE_WEEKLY)
num_active_daily = sum(df$ACTIVE_DAILY)
num_inactive = sum(df$NOT_ACTIVE)
num_excellent_health = sum(df$EXCELLENT_HEALTH)
num_very_good_health = sum(df$VERY_GOOD_HEALTH)
num_fair_health = sum(df$FAIR_HEALTH)
num_poor_health = sum(df$POOR_HEALTH)
num_working = sum(df$WORKING)
num_really_like_working = sum(df$REALLY_LIKE_WORKING)
num_dislike_working = sum(df$DISLIKE_WORKING)
num_really_dislike_working = sum(df$REALLY_DISLIKE_WORKING)
write.csv(df, "HSV_Final.csv")
library(vtable)
## Loading required package: kableExtra
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
st(df)
| Variable | N | Mean | Std. Dev. | Min | Pctl. 25 | Pctl. 75 | Max |
|---|---|---|---|---|---|---|---|
| WEEKLY_WORK_HOURS | 5446 | 37.086 | 13.848 | 0 | 30 | 43.75 | 98 |
| AGE | 5446 | 60.737 | 7.034 | 50 | 55 | 64 | 90 |
| HEART_CONDITION | 5446 | 0.127 | 0.333 | 0 | 0 | 0 | 1 |
| ANY_DEPENDENTS | 5446 | 0.183 | 0.387 | 0 | 0 | 0 | 1 |
| WORKING_SPOUSE | 5446 | 0.433 | 0.496 | 0 | 0 | 1 | 1 |
| WEEKS_PAID_VACATION | 5446 | 1.653 | 2.132 | 0 | 0 | 3 | 13 |
| REDUCE_PAID_WORK_HOURS | 5446 | 0.284 | 0.451 | 0 | 0 | 1 | 1 |
| MEDICARE | 5446 | 0.231 | 0.421 | 0 | 0 | 0 | 1 |
| MEDICAID | 5446 | 0.089 | 0.285 | 0 | 0 | 0 | 1 |
| HOSPITAL_EXPENSES | 5446 | 0.2 | 1.186 | 0 | 0 | 0 | 23 |
| RETIRED | 5446 | 0.206 | 0.405 | 0 | 0 | 0 | 1 |
| VOLUNTEER | 5446 | 0.415 | 0.493 | 0 | 0 | 1 | 1 |
| HOME_VALUE | 5446 | 177.418 | 646.095 | 0 | 0 | 160 | 6000 |
| WAIST_SIZE | 5446 | 10.334 | 17.939 | 0 | 0 | 29 | 68.25 |
| MALE | 5446 | 0.46 | 0.498 | 0 | 0 | 1 | 1 |
| YEARS_EDUCATED | 5446 | 13.566 | 2.926 | 0 | 12 | 16 | 17 |
| WORK_LIMITING_CONDITION | 5446 | 0.122 | 0.328 | 0 | 0 | 0 | 1 |
| DEBTS | 5446 | 26.992 | 209.239 | 0 | 0 | 1.7 | 2000 |
| MISSING_WEEKS_PAID_VACATION | 5446 | 0.199 | 0.399 | 0 | 0 | 0 | 1 |
| MISSING_REDUCE_PAID_WORK_HOURS | 5446 | 0.19 | 0.393 | 0 | 0 | 0 | 1 |
| MISSING_HOME_VALUE | 5446 | 0.359 | 0.48 | 0 | 0 | 1 | 1 |
| MISSING_WAIST_SIZE | 5446 | 0.745 | 0.436 | 0 | 0 | 1 | 1 |
| MISSING_DEBTS | 5446 | 0.359 | 0.48 | 0 | 0 | 1 | 1 |
| ACTIVE_ONCE_WEEKLY | 5446 | 0.176 | 0.381 | 0 | 0 | 0 | 1 |
| ACTIVE_DAILY | 5446 | 0.124 | 0.33 | 0 | 0 | 0 | 1 |
| NOT_ACTIVE | 5446 | 0.243 | 0.429 | 0 | 0 | 0 | 1 |
| EXCELLENT_HEALTH | 5446 | 0.111 | 0.314 | 0 | 0 | 0 | 1 |
| VERY_GOOD_HEALTH | 5446 | 0.36 | 0.48 | 0 | 0 | 1 | 1 |
| FAIR_HEALTH | 5446 | 0.147 | 0.354 | 0 | 0 | 0 | 1 |
| POOR_HEALTH | 5446 | 0.02 | 0.14 | 0 | 0 | 0 | 1 |
| WORKING | 5446 | 0.915 | 0.279 | 0 | 1 | 1 | 1 |
| REALLY_LIKE_WORKING | 5446 | 0.285 | 0.452 | 0 | 0 | 1 | 1 |
| DISLIKE_WORKING | 5446 | 0.085 | 0.278 | 0 | 0 | 0 | 1 |
| REALLY_DISLIKE_WORKING | 5446 | 0.019 | 0.135 | 0 | 0 | 0 | 1 |
```{r. echo=false} df <- subset(df, df$WEEKLY_WORK_HOURS > 0)
col_count = nrow(df) num_heart_condition = sum(df\(HEART_CONDITION) num_working_spouse = sum(df\)WORKING_SPOUSE) num_reduce_paid_work_hours = sum(df\(REDUCE_PAID_WORK_HOURS) num_medicare = sum(df\)MEDICARE) num_medicaid = sum(df\(MEDICAID) num_retired = sum(df\)RETIRED) num_volunteer = sum(df\(VOLUNTEER) num_male = sum(df\)MALE) num_work_limiting_condition = sum(df$WORK_LIMITING_CONDITION)
m_weeks_paid_vacation = sum(df\(MISSING_WEEKS_PAID_VACATION) m_reduce_paid_work_hours = sum(df\)MISSING_REDUCE_PAID_WORK_HOURS) m_home_value = sum(df\(MISSING_HOME_VALUE) m_waist_size = sum(df\)MISSING_WAIST_SIZE) m_debts = sum(df$MISSING_DEBTS)
num_active_weekly = sum(df\(ACTIVE_ONCE_WEEKLY) num_active_daily = sum(df\)ACTIVE_DAILY) num_inactive = sum(df$NOT_ACTIVE)
num_excellent_health = sum(df\(EXCELLENT_HEALTH) num_very_good_health = sum(df\)VERY_GOOD_HEALTH) num_fair_health = sum(df\(FAIR_HEALTH) num_poor_health = sum(df\)POOR_HEALTH)
num_working = sum(df$WORKING)
num_really_like_working = sum(df\(REALLY_LIKE_WORKING) num_dislike_working = sum(df\)DISLIKE_WORKING) num_really_dislike_working = sum(df$REALLY_DISLIKE_WORKING)
summary(df) write.csv(df, “HSV_Final_Nonzero.csv”) hist(df$WEEKS_PAID_VACATION)
* Total number of observations: 5446
* Number of males: 2503
* Number with a working spouse: 2358
* Number with a heart condition: 692
* Number with a work limiting condition: 666
* Number with able to reduce working hours: 1544
* Number with medicare: 1257
* Number with medicaid: 484
* Number retired: 1123
* Number volunteering: 2258
* Number working: 4981
* Number active weekly: 959
* Number active more than once weekly: 2488
* Number active daily: 676
* Number inactive: 1323
* Number with excellent health: 602
* Number with very good health: 1960
* Number with good health: 1976
* Number with fair health: 799
* Number with poor health: 109
* Number who really like working: 1553
* Number who like working: 3331
* Number who dislike working: 461
* Number who really dislike working: 101
### Missing Variables ###
* Number of missing weeks paid vacation: 1084
* Number of missing reduce paid work hours: 1036
* Number of missing home value: 1956
* Number of missing waist size: 4058
* Number of missing debts: 1956
```r
st(df)
| Variable | N | Mean | Std. Dev. | Min | Pctl. 25 | Pctl. 75 | Max |
|---|---|---|---|---|---|---|---|
| WEEKLY_WORK_HOURS | 5446 | 37.086 | 13.848 | 0 | 30 | 43.75 | 98 |
| AGE | 5446 | 60.737 | 7.034 | 50 | 55 | 64 | 90 |
| HEART_CONDITION | 5446 | 0.127 | 0.333 | 0 | 0 | 0 | 1 |
| ANY_DEPENDENTS | 5446 | 0.183 | 0.387 | 0 | 0 | 0 | 1 |
| WORKING_SPOUSE | 5446 | 0.433 | 0.496 | 0 | 0 | 1 | 1 |
| WEEKS_PAID_VACATION | 5446 | 1.653 | 2.132 | 0 | 0 | 3 | 13 |
| REDUCE_PAID_WORK_HOURS | 5446 | 0.284 | 0.451 | 0 | 0 | 1 | 1 |
| MEDICARE | 5446 | 0.231 | 0.421 | 0 | 0 | 0 | 1 |
| MEDICAID | 5446 | 0.089 | 0.285 | 0 | 0 | 0 | 1 |
| HOSPITAL_EXPENSES | 5446 | 0.2 | 1.186 | 0 | 0 | 0 | 23 |
| RETIRED | 5446 | 0.206 | 0.405 | 0 | 0 | 0 | 1 |
| VOLUNTEER | 5446 | 0.415 | 0.493 | 0 | 0 | 1 | 1 |
| HOME_VALUE | 5446 | 177.418 | 646.095 | 0 | 0 | 160 | 6000 |
| WAIST_SIZE | 5446 | 10.334 | 17.939 | 0 | 0 | 29 | 68.25 |
| MALE | 5446 | 0.46 | 0.498 | 0 | 0 | 1 | 1 |
| YEARS_EDUCATED | 5446 | 13.566 | 2.926 | 0 | 12 | 16 | 17 |
| WORK_LIMITING_CONDITION | 5446 | 0.122 | 0.328 | 0 | 0 | 0 | 1 |
| DEBTS | 5446 | 26.992 | 209.239 | 0 | 0 | 1.7 | 2000 |
| MISSING_WEEKS_PAID_VACATION | 5446 | 0.199 | 0.399 | 0 | 0 | 0 | 1 |
| MISSING_REDUCE_PAID_WORK_HOURS | 5446 | 0.19 | 0.393 | 0 | 0 | 0 | 1 |
| MISSING_HOME_VALUE | 5446 | 0.359 | 0.48 | 0 | 0 | 1 | 1 |
| MISSING_WAIST_SIZE | 5446 | 0.745 | 0.436 | 0 | 0 | 1 | 1 |
| MISSING_DEBTS | 5446 | 0.359 | 0.48 | 0 | 0 | 1 | 1 |
| ACTIVE_ONCE_WEEKLY | 5446 | 0.176 | 0.381 | 0 | 0 | 0 | 1 |
| ACTIVE_DAILY | 5446 | 0.124 | 0.33 | 0 | 0 | 0 | 1 |
| NOT_ACTIVE | 5446 | 0.243 | 0.429 | 0 | 0 | 0 | 1 |
| EXCELLENT_HEALTH | 5446 | 0.111 | 0.314 | 0 | 0 | 0 | 1 |
| VERY_GOOD_HEALTH | 5446 | 0.36 | 0.48 | 0 | 0 | 1 | 1 |
| FAIR_HEALTH | 5446 | 0.147 | 0.354 | 0 | 0 | 0 | 1 |
| POOR_HEALTH | 5446 | 0.02 | 0.14 | 0 | 0 | 0 | 1 |
| WORKING | 5446 | 0.915 | 0.279 | 0 | 1 | 1 | 1 |
| REALLY_LIKE_WORKING | 5446 | 0.285 | 0.452 | 0 | 0 | 1 | 1 |
| DISLIKE_WORKING | 5446 | 0.085 | 0.278 | 0 | 0 | 0 | 1 |
| REALLY_DISLIKE_WORKING | 5446 | 0.019 | 0.135 | 0 | 0 | 0 | 1 |