# ~~~~~~~~~~~~~~~~~~~~~~~~~
# ~ CRP 245| Homework 3 ~
# ~~~~~~~~~~~~~~~~~~~~~~~~~
# CHIS Data Set: Relationship between weight and food stamp
# program participation. In this problem, you will be working
# with data from one of the public use files (PUF) from the
# 2007 California Health Information Survey (CHIS). CHIS is
# the nation's largest state health survey. A random-dial
# telephone survey conducted every two years on a wide range
# of health topics, CHIS data gives a detailed picture of the
# health and health care needs of California's large and
# diverse population.
#
# Data Dictionary:
# 1. stamp (0= not currently receiving food stamps; 1=currently
# receiving food stamps)
# 2. female (0=male, 1=female)
# 3. foodinsec (0= food secure, 1=food insecure (with or without hunger))
# 4. white (0=nonwhite, 1=white)
# 5. bmi (continuous)
# 6. weightkg (continuous in KG)
# Load the assignment dataset:
load(url("http://www.duke.edu/~sgrambow/crp241data/CHIS_Data.RData"))
# check structure of data
str(CHIS_Data)
## 'data.frame': 1407 obs. of 6 variables:
## $ stamp : int 0 0 0 0 0 0 0 0 0 1 ...
## $ female : int 0 1 0 1 0 0 1 1 0 1 ...
## $ bmi : num 18.8 21.4 23.5 23.1 25.3 ...
## $ foodinsec: int 0 0 1 0 1 0 1 0 0 0 ...
## $ weightkg : int 61 62 68 59 73 64 53 59 81 67 ...
## $ white : int 0 0 0 1 1 1 0 1 0 0 ...
# Descriptive Summary
summary(CHIS_Data)
## stamp female bmi foodinsec
## Min. :0.0000 Min. :0.0000 Min. :13.30 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:21.09 1st Qu.:0.0000
## Median :0.0000 Median :1.0000 Median :23.88 Median :0.0000
## Mean :0.1308 Mean :0.5665 Mean :25.08 Mean :0.2701
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:27.77 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000 Max. :53.13 Max. :1.0000
## weightkg white
## Min. : 41.00 Min. :0.0000
## 1st Qu.: 59.00 1st Qu.:0.0000
## Median : 68.00 Median :0.0000
## Mean : 70.64 Mean :0.3916
## 3rd Qu.: 80.00 3rd Qu.:1.0000
## Max. :150.00 Max. :1.0000
# Here is an unadjusted model examining
# the associated between weight and food stamp status.
fit.unadjusted <- lm(weightkg ~ stamp, data=CHIS_Data)
summary(fit.unadjusted)
##
## Call:
## lm(formula = weightkg ~ stamp, data = CHIS_Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.788 -12.161 -2.161 8.839 79.839
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 70.1611 0.5063 138.574 < 2e-16 ***
## stamp 3.6270 1.4001 2.591 0.00968 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 17.71 on 1405 degrees of freedom
## Multiple R-squared: 0.004754, Adjusted R-squared: 0.004045
## F-statistic: 6.711 on 1 and 1405 DF, p-value: 0.009682
confint(fit.unadjusted)
## 2.5 % 97.5 %
## (Intercept) 69.1678786 71.154280
## stamp 0.8804921 6.373436
# Here is a multivariable model
# examining the association between weight and food
# stamp status, while controlling for food security status,
# race, and sex.
fit.adjusted <- lm(weightkg ~ stamp + foodinsec + white + female, data=CHIS_Data)
summary(fit.adjusted)
##
## Call:
## lm(formula = weightkg ~ stamp + foodinsec + white + female, data = CHIS_Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.003 -10.858 -3.365 6.997 84.090
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 76.9511 0.7745 99.361 < 2e-16 ***
## stamp 5.7755 1.2989 4.446 9.42e-06 ***
## foodinsec 1.5447 0.9815 1.574 0.1158
## white 2.0517 0.8864 2.315 0.0208 *
## female -14.6377 0.8749 -16.731 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.16 on 1402 degrees of freedom
## Multiple R-squared: 0.1728, Adjusted R-squared: 0.1705
## F-statistic: 73.24 on 4 and 1402 DF, p-value: < 2.2e-16
confint(fit.adjusted)
## 2.5 % 97.5 %
## (Intercept) 75.4318470 78.470281
## stamp 3.2274657 8.323493
## foodinsec -0.3806941 3.470011
## white 0.3128606 3.790452
## female -16.3539843 -12.921468
# Here are the basic regression diagnostics discussed
# in class for the multivariable linear
# regression model described above.
par(mfrow=c(2,2))
plot(fit.adjusted)

par(mfrow=c(1,1))
# Here is an interaction model
# examining whether there is an interaction between
# food stamp status and sex, while controlling for food security status and
# race.
fit.interaction <- lm(weightkg ~ stamp + foodinsec + white + female + stamp*female, data=CHIS_Data)
summary(fit.interaction)
##
## Call:
## lm(formula = weightkg ~ stamp + foodinsec + white + female +
## stamp * female, data = CHIS_Data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -38.165 -10.738 -3.244 6.835 84.246
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 77.1153 0.7952 96.981 <2e-16 ***
## stamp 4.0575 2.2883 1.773 0.0764 .
## foodinsec 1.5104 0.9823 1.538 0.1244
## white 2.0500 0.8864 2.313 0.0209 *
## female -14.9217 0.9287 -16.067 <2e-16 ***
## stamp:female 2.5217 2.7651 0.912 0.3619
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 16.16 on 1401 degrees of freedom
## Multiple R-squared: 0.1733, Adjusted R-squared: 0.1704
## F-statistic: 58.75 on 5 and 1401 DF, p-value: < 2.2e-16
confint(fit.interaction)
## 2.5 % 97.5 %
## (Intercept) 75.5554572 78.675130
## stamp -0.4313035 8.546257
## foodinsec -0.4164540 3.437300
## white 0.3110697 3.788880
## female -16.7434968 -13.099884
## stamp:female -2.9024219 7.945784
# End of Program