knitr::opts_chunk$set(echo = TRUE)
#Loading necessary libraries
library(reshape2) #For melt of the corr matrix
library(ggplot2)
library(cobalt) #For split command
##  cobalt (Version 4.4.0, Build Date: 2022-08-13)
library(haven)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(sandwich)
library(AER)
## Loading required package: car
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
## Loading required package: survival
library(OneSampleMR) #Two Stage Residual Inclusion Package
## Registered S3 methods overwritten by 'ivreg':
##   method              from
##   anova.ivreg         AER 
##   hatvalues.ivreg     AER 
##   model.matrix.ivreg  AER 
##   predict.ivreg       AER 
##   print.ivreg         AER 
##   print.summary.ivreg AER 
##   summary.ivreg       AER 
##   terms.ivreg         AER 
##   update.ivreg        AER 
##   vcov.ivreg          AER
library(modelsummary) #Way to show results of models
library(margins)
library(MASS) #Stepwise selection
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(leaps) #Moreeee stepwise selection
library(caret) #Stepwise selection train function
## Loading required package: lattice
## 
## Attaching package: 'caret'
## The following object is masked from 'package:survival':
## 
##     cluster
library(msm)
library(ggeffects)
library(broom)
library(data.table)
## 
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
## 
##     between, first, last
## The following objects are masked from 'package:reshape2':
## 
##     dcast, melt
library(GLMMadaptive)
## 
## Attaching package: 'GLMMadaptive'
## The following object is masked from 'package:MASS':
## 
##     negative.binomial
library(aod)
## 
## Attaching package: 'aod'
## The following object is masked from 'package:survival':
## 
##     rats
library(funModeling)
## Loading required package: Hmisc
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following object is masked from 'package:modelsummary':
## 
##     Mean
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
## funModeling v.1.9.4 :)
## Examples and tutorials at livebook.datascienceheroes.com
##  / Now in Spanish: librovivodecienciadedatos.ai

Splitting Categorical

df$ACTIVE_ONCE_WEEKLY <- ifelse(df$ACTIVITY_FREQUENCY == 1, 1, 0)
df$ACTIVE_MORE_THAN_ONCE_WEEKLY <- ifelse(df$ACTIVITY_FREQUENCY == 2, 1, 0)
df$ACTIVE_DAILY <- ifelse(df$ACTIVITY_FREQUENCY == 3, 1, 0)
df$NOT_ACTIVE <- ifelse(df$ACTIVITY_FREQUENCY == 0, 1, 0)

df$EXCELLENT_HEALTH <- ifelse(df$RATE_HEALTH == 1, 1, 0)
df$VERY_GOOD_HEALTH <- ifelse(df$RATE_HEALTH == 2, 1, 0)
df$GOOD_HEALTH <- ifelse(df$RATE_HEALTH == 3, 1, 0)
df$FAIR_HEALTH <- ifelse(df$RATE_HEALTH == 4, 1, 0)
df$POOR_HEALTH <- ifelse(df$RATE_HEALTH == 5, 1, 0)

df$WORKING <- ifelse(df$JOB_STATUS == 1, 1, 0)
df$UNEMPLOYED <- ifelse(df$JOB_STATUS == 2, 1, 0)
df$LAID_OFF <- ifelse(df$MARITAL_STATUS == 3, 1, 0)

df$REALLY_LIKE_WORKING <- ifelse(df$WORK_ENJOYMENT == 1, 1, 0)
df$LIKE_WORKING <- ifelse(df$WORK_ENJOYMENT == 2, 1, 0)
df$DISLIKE_WORKING <- ifelse(df$WORK_ENJOYMENT == 3, 1, 0)
df$REALLY_DISLIKE_WORKING <- ifelse(df$WORK_ENJOYMENT == 4, 1, 0)

df$SINGLE <- ifelse(df$MARITAL_STATUS == 2, 1, 0)
df$MISSING_WAIST_SIZE[df$WAIST_SIZE <= 13] <- 1
df$WAIST_SIZE[df$WAIST_SIZE <= 13] <- 0
df$WEEKS_PAID_VACATION[df$WEEKS_PAID_VACATION >= 14] <- 12

df <- subset(df, select = -c(RATE_HEALTH, ACTIVITY_FREQUENCY, MARITAL_STATUS, 
                                         WORK_ENJOYMENT, JOB_STATUS))

Count Summary

##  WEEKLY_WORK_HOURS      AGE        HEART_CONDITION  ANY_DEPENDENTS  
##  Min.   : 0.00     Min.   :50.00   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:30.00     1st Qu.:55.00   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :40.00     Median :59.00   Median :0.0000   Median :0.0000  
##  Mean   :36.78     Mean   :60.72   Mean   :0.1305   Mean   :0.1737  
##  3rd Qu.:45.00     3rd Qu.:64.00   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :98.00     Max.   :90.00   Max.   :1.0000   Max.   :1.0000  
##  WORKING_SPOUSE   WEEKS_PAID_VACATION REDUCE_PAID_WORK_HOURS    MEDICARE     
##  Min.   :0.0000   Min.   : 0.000      Min.   :0.0000         Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.: 0.000      1st Qu.:0.0000         1st Qu.:0.0000  
##  Median :0.0000   Median : 1.000      Median :0.0000         Median :0.0000  
##  Mean   :0.4442   Mean   : 1.618      Mean   :0.2755         Mean   :0.2299  
##  3rd Qu.:1.0000   3rd Qu.: 3.000      3rd Qu.:1.0000         3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :13.000      Max.   :1.0000         Max.   :1.0000  
##     MEDICAID      HOSPITAL_EXPENSES    RETIRED         VOLUNTEER     
##  Min.   :0.0000   Min.   : 0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.: 0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median : 0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.0863   Mean   : 0.2063   Mean   :0.2096   Mean   :0.4149  
##  3rd Qu.:0.0000   3rd Qu.: 0.0000   3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :30.0000   Max.   :1.0000   Max.   :1.0000  
##    HOME_VALUE       WAIST_SIZE          MALE        YEARS_EDUCATED 
##  Min.   :   0.0   Min.   : 0.000   Min.   :0.0000   Min.   : 0.00  
##  1st Qu.:   0.0   1st Qu.: 0.000   1st Qu.:0.0000   1st Qu.:12.00  
##  Median :   0.0   Median : 0.000   Median :0.0000   Median :14.00  
##  Mean   : 170.4   Mean   : 9.976   Mean   :0.4722   Mean   :13.22  
##  3rd Qu.: 156.0   3rd Qu.: 0.000   3rd Qu.:1.0000   3rd Qu.:16.00  
##  Max.   :6000.0   Max.   :68.250   Max.   :1.0000   Max.   :17.00  
##  WORK_LIMITING_CONDITION  TOTAL_WEALTH          DEBTS        
##  Min.   :0.0000          Min.   : -463000   Min.   :   0.00  
##  1st Qu.:0.0000          1st Qu.:       0   1st Qu.:   0.00  
##  Median :0.0000          Median :       0   Median :   0.00  
##  Mean   :0.1227          Mean   :  141995   Mean   :  27.86  
##  3rd Qu.:0.0000          3rd Qu.:   15500   3rd Qu.:   1.25  
##  Max.   :1.0000          Max.   :30900000   Max.   :2000.00  
##  MISSING_RATE_HEALTH MISSING_HEART_CONDITION MISSING_ACTIVITY_FREQUENCY
##  Min.   :0.0000000   Min.   :0.0000000       Min.   :0.000000          
##  1st Qu.:0.0000000   1st Qu.:0.0000000       1st Qu.:0.000000          
##  Median :0.0000000   Median :0.0000000       Median :0.000000          
##  Mean   :0.0004785   Mean   :0.0004785       Mean   :0.001117          
##  3rd Qu.:0.0000000   3rd Qu.:0.0000000       3rd Qu.:0.000000          
##  Max.   :1.0000000   Max.   :1.0000000       Max.   :1.000000          
##  MISSING_WORK_ENJOYMENT MISSING_ANY_DEPENDENTS MISSING_WORKING_SPOUSE
##  Min.   :0.00000        Min.   :0.00000        Min.   :0.0000        
##  1st Qu.:0.00000        1st Qu.:0.00000        1st Qu.:0.0000        
##  Median :0.00000        Median :0.00000        Median :0.0000        
##  Mean   :0.03956        Mean   :0.04738        Mean   :0.4513        
##  3rd Qu.:0.00000        3rd Qu.:0.00000        3rd Qu.:1.0000        
##  Max.   :1.00000        Max.   :1.00000        Max.   :1.0000        
##  MISSING_WEEKS_PAID_VACATION MISSING_REDUCE_PAID_WORK_HOURS MISSING_JOB_STATUS
##  Min.   :0.000               Min.   :0.0000                 Min.   :0.000000  
##  1st Qu.:0.000               1st Qu.:0.0000                 1st Qu.:0.000000  
##  Median :0.000               Median :0.0000                 Median :0.000000  
##  Mean   :0.211               Mean   :0.2098                 Mean   :0.004147  
##  3rd Qu.:0.000               3rd Qu.:0.0000                 3rd Qu.:0.000000  
##  Max.   :1.000               Max.   :1.0000                 Max.   :1.000000  
##  MISSING_MEDICARE   MISSING_MEDICAID MISSING_RETIRED   MISSING_VOLUNTEER 
##  Min.   :0.000000   Min.   :0.0000   Min.   :0.00000   Min.   :0.000000  
##  1st Qu.:0.000000   1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.000000  
##  Median :0.000000   Median :0.0000   Median :0.00000   Median :0.000000  
##  Mean   :0.004307   Mean   :0.0067   Mean   :0.02249   Mean   :0.000319  
##  3rd Qu.:0.000000   3rd Qu.:0.0000   3rd Qu.:0.00000   3rd Qu.:0.000000  
##  Max.   :1.000000   Max.   :1.0000   Max.   :1.00000   Max.   :1.000000  
##  MISSING_TOTAL_WEALTH MISSING_HOME_VALUE MISSING_WAIST_SIZE
##  Min.   :0.0000       Min.   :0.0000     Min.   :0.0000    
##  1st Qu.:0.0000       1st Qu.:0.0000     1st Qu.:1.0000    
##  Median :1.0000       Median :0.0000     Median :1.0000    
##  Mean   :0.6719       Mean   :0.3835     Mean   :0.7531    
##  3rd Qu.:1.0000       3rd Qu.:1.0000     3rd Qu.:1.0000    
##  Max.   :1.0000       Max.   :1.0000     Max.   :1.0000    
##  MISSING_YEARS_EDUCATED MISSING_DEBTS    ACTIVE_ONCE_WEEKLY
##  Min.   :0.00000        Min.   :0.0000   Min.   :0.0000    
##  1st Qu.:0.00000        1st Qu.:0.0000   1st Qu.:0.0000    
##  Median :0.00000        Median :0.0000   Median :0.0000    
##  Mean   :0.02393        Mean   :0.3835   Mean   :0.1764    
##  3rd Qu.:0.00000        3rd Qu.:1.0000   3rd Qu.:0.0000    
##  Max.   :1.00000        Max.   :1.0000   Max.   :1.0000    
##  ACTIVE_MORE_THAN_ONCE_WEEKLY  ACTIVE_DAILY      NOT_ACTIVE    
##  Min.   :0.0000               Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000               1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000               Median :0.0000   Median :0.0000  
##  Mean   :0.4586               Mean   :0.1238   Mean   :0.2412  
##  3rd Qu.:1.0000               3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :1.0000               Max.   :1.0000   Max.   :1.0000  
##  EXCELLENT_HEALTH VERY_GOOD_HEALTH  GOOD_HEALTH      FAIR_HEALTH    
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.1117   Mean   :0.3591   Mean   :0.3623   Mean   :0.1468  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:1.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.0000  
##   POOR_HEALTH         WORKING         UNEMPLOYED          LAID_OFF       
##  Min.   :0.00000   Min.   :0.0000   Min.   :0.000000   Min.   :0.000000  
##  1st Qu.:0.00000   1st Qu.:1.0000   1st Qu.:0.000000   1st Qu.:0.000000  
##  Median :0.00000   Median :1.0000   Median :0.000000   Median :0.000000  
##  Mean   :0.01978   Mean   :0.8968   Mean   :0.006062   Mean   :0.004945  
##  3rd Qu.:0.00000   3rd Qu.:1.0000   3rd Qu.:0.000000   3rd Qu.:0.000000  
##  Max.   :1.00000   Max.   :1.0000   Max.   :1.000000   Max.   :1.000000  
##  REALLY_LIKE_WORKING  LIKE_WORKING    DISLIKE_WORKING   REALLY_DISLIKE_WORKING
##  Min.   :0.0000      Min.   :0.0000   Min.   :0.00000   Min.   :0.00000       
##  1st Qu.:0.0000      1st Qu.:0.0000   1st Qu.:0.00000   1st Qu.:0.00000       
##  Median :0.0000      Median :1.0000   Median :0.00000   Median :0.00000       
##  Mean   :0.2744      Mean   :0.5889   Mean   :0.07944   Mean   :0.01771       
##  3rd Qu.:1.0000      3rd Qu.:1.0000   3rd Qu.:0.00000   3rd Qu.:0.00000       
##  Max.   :1.0000      Max.   :1.0000   Max.   :1.00000   Max.   :1.00000       
##      SINGLE        
##  Min.   :0.000000  
##  1st Qu.:0.000000  
##  Median :0.000000  
##  Mean   :0.002552  
##  3rd Qu.:0.000000  
##  Max.   :1.000000

Missing Variables

  • Number of missing health rating: 3
  • Number of missing heart condition: 3
  • Number of missing activity frequency: 7
  • Number of missing work enjoyment: 248
  • Number of missing dependent reply: 297
  • Number of missing weeks paid vacation: 1323
  • Number of missing reduce paid work hours: 1315
  • Number of missing job status: 26
  • Number of missing medicare: 27
  • Number of missing medicaid: 42
  • Number of missing retired: 141
  • Number of missing volunteer: 2
  • Number of missing total wealth: 4214
  • Number of missing home value: 2405
  • Number of missing debts: 2405
  • Number of missing waist size: 4723
  • Number of missing years of education: 150

Removed small percentage rows

df <- subset(df, df$MISSING_ACTIVITY_FREQUENCY != 1 &
                 df$MISSING_ANY_DEPENDENTS != 1 &
                 df$MISSING_HEART_CONDITION != 1 &
                 df$MISSING_JOB_STATUS != 1 &
                 df$MISSING_MEDICAID != 1 &
                 df$MISSING_MEDICARE != 1 &
                 df$MISSING_RATE_HEALTH != 1 &
                 df$MISSING_RETIRED != 1 &
                 df$MISSING_VOLUNTEER != 1 &
                 df$MISSING_WORK_ENJOYMENT != 1 &
                 df$MISSING_YEARS_EDUCATED != 1 &
                 df$LAID_OFF != 1 &
                 df$UNEMPLOYED != 1 & 
                 df$SINGLE != 1)
                                     
df <- subset(df, 
             select = -c(MISSING_ACTIVITY_FREQUENCY,
                         MISSING_ANY_DEPENDENTS,
                         MISSING_HEART_CONDITION,
                         MISSING_JOB_STATUS,
                         MISSING_MEDICAID,
                         MISSING_MEDICARE,
                         MISSING_RATE_HEALTH,
                         MISSING_RETIRED,
                         MISSING_VOLUNTEER,
                         MISSING_WORK_ENJOYMENT,
                         LAID_OFF,
                         UNEMPLOYED,
                         TOTAL_WEALTH,
                         MISSING_TOTAL_WEALTH,
                         MISSING_YEARS_EDUCATED,
                         SINGLE,
                         MISSING_WORKING_SPOUSE))

Removed implicit, proving no multicollinearity

df <- subset(df, 
             select = -c(GOOD_HEALTH,
                         LIKE_WORKING,
                         ACTIVE_MORE_THAN_ONCE_WEEKLY))
lm1 <- lm(WEEKLY_WORK_HOURS ~ ., data = df)

Final Summary/Counts

summary(df)
##  WEEKLY_WORK_HOURS      AGE        HEART_CONDITION  ANY_DEPENDENTS  
##  Min.   : 0.00     Min.   :50.00   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:30.00     1st Qu.:55.00   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :40.00     Median :59.00   Median :0.0000   Median :0.0000  
##  Mean   :37.09     Mean   :60.74   Mean   :0.1271   Mean   :0.1834  
##  3rd Qu.:43.75     3rd Qu.:64.00   3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :98.00     Max.   :90.00   Max.   :1.0000   Max.   :1.0000  
##  WORKING_SPOUSE  WEEKS_PAID_VACATION REDUCE_PAID_WORK_HOURS    MEDICARE     
##  Min.   :0.000   Min.   : 0.000      Min.   :0.0000         Min.   :0.0000  
##  1st Qu.:0.000   1st Qu.: 0.000      1st Qu.:0.0000         1st Qu.:0.0000  
##  Median :0.000   Median : 1.000      Median :0.0000         Median :0.0000  
##  Mean   :0.433   Mean   : 1.653      Mean   :0.2835         Mean   :0.2308  
##  3rd Qu.:1.000   3rd Qu.: 3.000      3rd Qu.:1.0000         3rd Qu.:0.0000  
##  Max.   :1.000   Max.   :13.000      Max.   :1.0000         Max.   :1.0000  
##     MEDICAID       HOSPITAL_EXPENSES    RETIRED         VOLUNTEER     
##  Min.   :0.00000   Min.   : 0.0000   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.00000   1st Qu.: 0.0000   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.00000   Median : 0.0000   Median :0.0000   Median :0.0000  
##  Mean   :0.08887   Mean   : 0.1995   Mean   :0.2062   Mean   :0.4146  
##  3rd Qu.:0.00000   3rd Qu.: 0.0000   3rd Qu.:0.0000   3rd Qu.:1.0000  
##  Max.   :1.00000   Max.   :23.0000   Max.   :1.0000   Max.   :1.0000  
##    HOME_VALUE       WAIST_SIZE         MALE        YEARS_EDUCATED 
##  Min.   :   0.0   Min.   : 0.00   Min.   :0.0000   Min.   : 0.00  
##  1st Qu.:   0.0   1st Qu.: 0.00   1st Qu.:0.0000   1st Qu.:12.00  
##  Median :   0.0   Median : 0.00   Median :0.0000   Median :14.00  
##  Mean   : 177.4   Mean   :10.33   Mean   :0.4596   Mean   :13.57  
##  3rd Qu.: 160.0   3rd Qu.:29.00   3rd Qu.:1.0000   3rd Qu.:16.00  
##  Max.   :6000.0   Max.   :68.25   Max.   :1.0000   Max.   :17.00  
##  WORK_LIMITING_CONDITION     DEBTS         MISSING_WEEKS_PAID_VACATION
##  Min.   :0.0000          Min.   :   0.00   Min.   :0.000              
##  1st Qu.:0.0000          1st Qu.:   0.00   1st Qu.:0.000              
##  Median :0.0000          Median :   0.00   Median :0.000              
##  Mean   :0.1223          Mean   :  26.99   Mean   :0.199              
##  3rd Qu.:0.0000          3rd Qu.:   1.70   3rd Qu.:0.000              
##  Max.   :1.0000          Max.   :2000.00   Max.   :1.000              
##  MISSING_REDUCE_PAID_WORK_HOURS MISSING_HOME_VALUE MISSING_WAIST_SIZE
##  Min.   :0.0000                 Min.   :0.0000     Min.   :0.0000    
##  1st Qu.:0.0000                 1st Qu.:0.0000     1st Qu.:0.0000    
##  Median :0.0000                 Median :0.0000     Median :1.0000    
##  Mean   :0.1902                 Mean   :0.3592     Mean   :0.7451    
##  3rd Qu.:0.0000                 3rd Qu.:1.0000     3rd Qu.:1.0000    
##  Max.   :1.0000                 Max.   :1.0000     Max.   :1.0000    
##  MISSING_DEBTS    ACTIVE_ONCE_WEEKLY  ACTIVE_DAILY      NOT_ACTIVE    
##  Min.   :0.0000   Min.   :0.0000     Min.   :0.0000   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000     1st Qu.:0.0000   1st Qu.:0.0000  
##  Median :0.0000   Median :0.0000     Median :0.0000   Median :0.0000  
##  Mean   :0.3592   Mean   :0.1761     Mean   :0.1241   Mean   :0.2429  
##  3rd Qu.:1.0000   3rd Qu.:0.0000     3rd Qu.:0.0000   3rd Qu.:0.0000  
##  Max.   :1.0000   Max.   :1.0000     Max.   :1.0000   Max.   :1.0000  
##  EXCELLENT_HEALTH VERY_GOOD_HEALTH  FAIR_HEALTH      POOR_HEALTH     
##  Min.   :0.0000   Min.   :0.0000   Min.   :0.0000   Min.   :0.00000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:0.00000  
##  Median :0.0000   Median :0.0000   Median :0.0000   Median :0.00000  
##  Mean   :0.1105   Mean   :0.3599   Mean   :0.1467   Mean   :0.02001  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:0.0000   3rd Qu.:0.00000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :1.0000   Max.   :1.00000  
##     WORKING       REALLY_LIKE_WORKING DISLIKE_WORKING   REALLY_DISLIKE_WORKING
##  Min.   :0.0000   Min.   :0.0000      Min.   :0.00000   Min.   :0.00000       
##  1st Qu.:1.0000   1st Qu.:0.0000      1st Qu.:0.00000   1st Qu.:0.00000       
##  Median :1.0000   Median :0.0000      Median :0.00000   Median :0.00000       
##  Mean   :0.9146   Mean   :0.2852      Mean   :0.08465   Mean   :0.01855       
##  3rd Qu.:1.0000   3rd Qu.:1.0000      3rd Qu.:0.00000   3rd Qu.:0.00000       
##  Max.   :1.0000   Max.   :1.0000      Max.   :1.00000   Max.   :1.00000
col_count = nrow(df)
num_heart_condition = sum(df$HEART_CONDITION)
num_working_spouse = sum(df$WORKING_SPOUSE)
num_reduce_paid_work_hours = sum(df$REDUCE_PAID_WORK_HOURS)
num_medicare = sum(df$MEDICARE)
num_medicaid = sum(df$MEDICAID)
num_retired = sum(df$RETIRED)
num_volunteer = sum(df$VOLUNTEER)
num_male = sum(df$MALE)
num_work_limiting_condition = sum(df$WORK_LIMITING_CONDITION)

m_weeks_paid_vacation = sum(df$MISSING_WEEKS_PAID_VACATION)
m_reduce_paid_work_hours = sum(df$MISSING_REDUCE_PAID_WORK_HOURS)
m_home_value = sum(df$MISSING_HOME_VALUE)
m_waist_size = sum(df$MISSING_WAIST_SIZE)
m_debts = sum(df$MISSING_DEBTS)

num_active_weekly = sum(df$ACTIVE_ONCE_WEEKLY)
num_active_daily = sum(df$ACTIVE_DAILY)
num_inactive = sum(df$NOT_ACTIVE)

num_excellent_health = sum(df$EXCELLENT_HEALTH)
num_very_good_health = sum(df$VERY_GOOD_HEALTH)
num_fair_health = sum(df$FAIR_HEALTH)
num_poor_health = sum(df$POOR_HEALTH)

num_working = sum(df$WORKING)

num_really_like_working = sum(df$REALLY_LIKE_WORKING)
num_dislike_working = sum(df$DISLIKE_WORKING)
num_really_dislike_working = sum(df$REALLY_DISLIKE_WORKING)
write.csv(df, "HSV_Final.csv")

Missing Variables

  • Number of missing weeks paid vacation: 1084
  • Number of missing reduce paid work hours: 1036
  • Number of missing home value: 1956
  • Number of missing waist size: 4058
  • Number of missing debts: 1956
library(vtable)
## Loading required package: kableExtra
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
st(df)
Summary Statistics
Variable N Mean Std. Dev. Min Pctl. 25 Pctl. 75 Max
WEEKLY_WORK_HOURS 5446 37.086 13.848 0 30 43.75 98
AGE 5446 60.737 7.034 50 55 64 90
HEART_CONDITION 5446 0.127 0.333 0 0 0 1
ANY_DEPENDENTS 5446 0.183 0.387 0 0 0 1
WORKING_SPOUSE 5446 0.433 0.496 0 0 1 1
WEEKS_PAID_VACATION 5446 1.653 2.132 0 0 3 13
REDUCE_PAID_WORK_HOURS 5446 0.284 0.451 0 0 1 1
MEDICARE 5446 0.231 0.421 0 0 0 1
MEDICAID 5446 0.089 0.285 0 0 0 1
HOSPITAL_EXPENSES 5446 0.2 1.186 0 0 0 23
RETIRED 5446 0.206 0.405 0 0 0 1
VOLUNTEER 5446 0.415 0.493 0 0 1 1
HOME_VALUE 5446 177.418 646.095 0 0 160 6000
WAIST_SIZE 5446 10.334 17.939 0 0 29 68.25
MALE 5446 0.46 0.498 0 0 1 1
YEARS_EDUCATED 5446 13.566 2.926 0 12 16 17
WORK_LIMITING_CONDITION 5446 0.122 0.328 0 0 0 1
DEBTS 5446 26.992 209.239 0 0 1.7 2000
MISSING_WEEKS_PAID_VACATION 5446 0.199 0.399 0 0 0 1
MISSING_REDUCE_PAID_WORK_HOURS 5446 0.19 0.393 0 0 0 1
MISSING_HOME_VALUE 5446 0.359 0.48 0 0 1 1
MISSING_WAIST_SIZE 5446 0.745 0.436 0 0 1 1
MISSING_DEBTS 5446 0.359 0.48 0 0 1 1
ACTIVE_ONCE_WEEKLY 5446 0.176 0.381 0 0 0 1
ACTIVE_DAILY 5446 0.124 0.33 0 0 0 1
NOT_ACTIVE 5446 0.243 0.429 0 0 0 1
EXCELLENT_HEALTH 5446 0.111 0.314 0 0 0 1
VERY_GOOD_HEALTH 5446 0.36 0.48 0 0 1 1
FAIR_HEALTH 5446 0.147 0.354 0 0 0 1
POOR_HEALTH 5446 0.02 0.14 0 0 0 1
WORKING 5446 0.915 0.279 0 1 1 1
REALLY_LIKE_WORKING 5446 0.285 0.452 0 0 1 1
DISLIKE_WORKING 5446 0.085 0.278 0 0 0 1
REALLY_DISLIKE_WORKING 5446 0.019 0.135 0 0 0 1

Non-zero Weekly Work Hours

```{r. echo=false} df <- subset(df, df$WEEKLY_WORK_HOURS > 0)

col_count = nrow(df) num_heart_condition = sum(df\(HEART_CONDITION) num_working_spouse = sum(df\)WORKING_SPOUSE) num_reduce_paid_work_hours = sum(df\(REDUCE_PAID_WORK_HOURS) num_medicare = sum(df\)MEDICARE) num_medicaid = sum(df\(MEDICAID) num_retired = sum(df\)RETIRED) num_volunteer = sum(df\(VOLUNTEER) num_male = sum(df\)MALE) num_work_limiting_condition = sum(df$WORK_LIMITING_CONDITION)

m_weeks_paid_vacation = sum(df\(MISSING_WEEKS_PAID_VACATION) m_reduce_paid_work_hours = sum(df\)MISSING_REDUCE_PAID_WORK_HOURS) m_home_value = sum(df\(MISSING_HOME_VALUE) m_waist_size = sum(df\)MISSING_WAIST_SIZE) m_debts = sum(df$MISSING_DEBTS)

num_active_weekly = sum(df\(ACTIVE_ONCE_WEEKLY) num_active_daily = sum(df\)ACTIVE_DAILY) num_inactive = sum(df$NOT_ACTIVE)

num_excellent_health = sum(df\(EXCELLENT_HEALTH) num_very_good_health = sum(df\)VERY_GOOD_HEALTH) num_fair_health = sum(df\(FAIR_HEALTH) num_poor_health = sum(df\)POOR_HEALTH)

num_working = sum(df$WORKING)

num_really_like_working = sum(df\(REALLY_LIKE_WORKING) num_dislike_working = sum(df\)DISLIKE_WORKING) num_really_dislike_working = sum(df$REALLY_DISLIKE_WORKING)

summary(df) write.csv(df, “HSV_Final_Nonzero.csv”) hist(df$WEEKS_PAID_VACATION)


* Total number of observations: 5446
* Number of males: 2503
* Number with a working spouse: 2358
* Number with a heart condition: 692 
* Number with a work limiting condition: 666
* Number with able to reduce working hours: 1544 
* Number with medicare: 1257
* Number with medicaid: 484
* Number retired: 1123
* Number volunteering: 2258
* Number working: 4981
* Number active weekly: 959
* Number active more than once weekly: 2488
* Number active daily: 676
* Number inactive: 1323
* Number with excellent health: 602
* Number with very good health: 1960
* Number with good health: 1976
* Number with fair health: 799
* Number with poor health: 109
* Number who really like working: 1553
* Number who like working: 3331
* Number who dislike working: 461
* Number who really dislike working: 101

### Missing Variables ###
* Number of missing weeks paid vacation: 1084
* Number of missing reduce paid work hours: 1036
* Number of missing home value: 1956
* Number of missing waist size: 4058
* Number of missing debts: 1956


```r
st(df)
Summary Statistics
Variable N Mean Std. Dev. Min Pctl. 25 Pctl. 75 Max
WEEKLY_WORK_HOURS 5446 37.086 13.848 0 30 43.75 98
AGE 5446 60.737 7.034 50 55 64 90
HEART_CONDITION 5446 0.127 0.333 0 0 0 1
ANY_DEPENDENTS 5446 0.183 0.387 0 0 0 1
WORKING_SPOUSE 5446 0.433 0.496 0 0 1 1
WEEKS_PAID_VACATION 5446 1.653 2.132 0 0 3 13
REDUCE_PAID_WORK_HOURS 5446 0.284 0.451 0 0 1 1
MEDICARE 5446 0.231 0.421 0 0 0 1
MEDICAID 5446 0.089 0.285 0 0 0 1
HOSPITAL_EXPENSES 5446 0.2 1.186 0 0 0 23
RETIRED 5446 0.206 0.405 0 0 0 1
VOLUNTEER 5446 0.415 0.493 0 0 1 1
HOME_VALUE 5446 177.418 646.095 0 0 160 6000
WAIST_SIZE 5446 10.334 17.939 0 0 29 68.25
MALE 5446 0.46 0.498 0 0 1 1
YEARS_EDUCATED 5446 13.566 2.926 0 12 16 17
WORK_LIMITING_CONDITION 5446 0.122 0.328 0 0 0 1
DEBTS 5446 26.992 209.239 0 0 1.7 2000
MISSING_WEEKS_PAID_VACATION 5446 0.199 0.399 0 0 0 1
MISSING_REDUCE_PAID_WORK_HOURS 5446 0.19 0.393 0 0 0 1
MISSING_HOME_VALUE 5446 0.359 0.48 0 0 1 1
MISSING_WAIST_SIZE 5446 0.745 0.436 0 0 1 1
MISSING_DEBTS 5446 0.359 0.48 0 0 1 1
ACTIVE_ONCE_WEEKLY 5446 0.176 0.381 0 0 0 1
ACTIVE_DAILY 5446 0.124 0.33 0 0 0 1
NOT_ACTIVE 5446 0.243 0.429 0 0 0 1
EXCELLENT_HEALTH 5446 0.111 0.314 0 0 0 1
VERY_GOOD_HEALTH 5446 0.36 0.48 0 0 1 1
FAIR_HEALTH 5446 0.147 0.354 0 0 0 1
POOR_HEALTH 5446 0.02 0.14 0 0 0 1
WORKING 5446 0.915 0.279 0 1 1 1
REALLY_LIKE_WORKING 5446 0.285 0.452 0 0 1 1
DISLIKE_WORKING 5446 0.085 0.278 0 0 0 1
REALLY_DISLIKE_WORKING 5446 0.019 0.135 0 0 0 1