# ~~~~~~~~~~~~~~~~~~~~~~~~~
# ~ CRP 245| Homework 3 ~
# ~~~~~~~~~~~~~~~~~~~~~~~~~
# CHIS Data Set: Relationship between weight and food stamp 
# program participation. In this problem, you will be working 
# with data from one of the public use files (PUF) from the 
# 2007 California Health Information Survey (CHIS). CHIS is 
# the nation's largest state health survey. A random-dial 
# telephone survey conducted every two years on a wide range 
# of health topics, CHIS data gives a detailed picture of the 
# health and health care needs of California's large and 
# diverse population.
# 
# Data Dictionary: 
#  1.   stamp      (0= not currently receiving food stamps; 1=currently 
#                    receiving food stamps)
# 2.    female     (0=male, 1=female)
# 3.    foodinsec  (0= food secure, 1=food insecure (with or without hunger))
# 4.    white      (0=nonwhite, 1=white)
# 5.    bmi        (continuous)
# 6.    weightkg   (continuous in KG)

# Load the assignment dataset: 
load(url("http://www.duke.edu/~sgrambow/crp241data/CHIS_Data.RData"))

# check structure of data
str(CHIS_Data)
## 'data.frame':    1407 obs. of  6 variables:
##  $ stamp    : int  0 0 0 0 0 0 0 0 0 1 ...
##  $ female   : int  0 1 0 1 0 0 1 1 0 1 ...
##  $ bmi      : num  18.8 21.4 23.5 23.1 25.3 ...
##  $ foodinsec: int  0 0 1 0 1 0 1 0 0 0 ...
##  $ weightkg : int  61 62 68 59 73 64 53 59 81 67 ...
##  $ white    : int  0 0 0 1 1 1 0 1 0 0 ...
# Descriptive Summary
summary(CHIS_Data)
##      stamp            female            bmi          foodinsec     
##  Min.   :0.0000   Min.   :0.0000   Min.   :13.30   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:0.0000   1st Qu.:21.09   1st Qu.:0.0000  
##  Median :0.0000   Median :1.0000   Median :23.88   Median :0.0000  
##  Mean   :0.1308   Mean   :0.5665   Mean   :25.08   Mean   :0.2701  
##  3rd Qu.:0.0000   3rd Qu.:1.0000   3rd Qu.:27.77   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :1.0000   Max.   :53.13   Max.   :1.0000  
##     weightkg          white       
##  Min.   : 41.00   Min.   :0.0000  
##  1st Qu.: 59.00   1st Qu.:0.0000  
##  Median : 68.00   Median :0.0000  
##  Mean   : 70.64   Mean   :0.3916  
##  3rd Qu.: 80.00   3rd Qu.:1.0000  
##  Max.   :150.00   Max.   :1.0000
#   Here is an unadjusted model examining 
# the associated between weight and food stamp status.  

fit.unadjusted <- lm(weightkg ~ stamp, data=CHIS_Data)
summary(fit.unadjusted)
## 
## Call:
## lm(formula = weightkg ~ stamp, data = CHIS_Data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -30.788 -12.161  -2.161   8.839  79.839 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  70.1611     0.5063 138.574  < 2e-16 ***
## stamp         3.6270     1.4001   2.591  0.00968 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.71 on 1405 degrees of freedom
## Multiple R-squared:  0.004754,   Adjusted R-squared:  0.004045 
## F-statistic: 6.711 on 1 and 1405 DF,  p-value: 0.009682
confint(fit.unadjusted)
##                  2.5 %    97.5 %
## (Intercept) 69.1678786 71.154280
## stamp        0.8804921  6.373436
# Here is a multivariable model 
# examining the association between weight and food 
# stamp status, while controlling for food security status, 
# race, and sex.  

fit.adjusted <- lm(weightkg ~ stamp + foodinsec + white + female, data=CHIS_Data)
summary(fit.adjusted)
## 
## Call:
## lm(formula = weightkg ~ stamp + foodinsec + white + female, data = CHIS_Data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.003 -10.858  -3.365   6.997  84.090 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  76.9511     0.7745  99.361  < 2e-16 ***
## stamp         5.7755     1.2989   4.446 9.42e-06 ***
## foodinsec     1.5447     0.9815   1.574   0.1158    
## white         2.0517     0.8864   2.315   0.0208 *  
## female      -14.6377     0.8749 -16.731  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.16 on 1402 degrees of freedom
## Multiple R-squared:  0.1728, Adjusted R-squared:  0.1705 
## F-statistic: 73.24 on 4 and 1402 DF,  p-value: < 2.2e-16
confint(fit.adjusted)
##                   2.5 %     97.5 %
## (Intercept)  75.4318470  78.470281
## stamp         3.2274657   8.323493
## foodinsec    -0.3806941   3.470011
## white         0.3128606   3.790452
## female      -16.3539843 -12.921468
#   Here are the basic regression diagnostics discussed 
# in class for the multivariable linear 
# regression model described above.
par(mfrow=c(2,2))
plot(fit.adjusted)

par(mfrow=c(1,1))
# Here is an interaction model 
# examining whether there is an interaction between
# food stamp status and sex, while controlling for food security status and 
# race.  

fit.interaction <- lm(weightkg ~ stamp + foodinsec + white + female + stamp*female, data=CHIS_Data)
summary(fit.interaction)
## 
## Call:
## lm(formula = weightkg ~ stamp + foodinsec + white + female + 
##     stamp * female, data = CHIS_Data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -38.165 -10.738  -3.244   6.835  84.246 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   77.1153     0.7952  96.981   <2e-16 ***
## stamp          4.0575     2.2883   1.773   0.0764 .  
## foodinsec      1.5104     0.9823   1.538   0.1244    
## white          2.0500     0.8864   2.313   0.0209 *  
## female       -14.9217     0.9287 -16.067   <2e-16 ***
## stamp:female   2.5217     2.7651   0.912   0.3619    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 16.16 on 1401 degrees of freedom
## Multiple R-squared:  0.1733, Adjusted R-squared:  0.1704 
## F-statistic: 58.75 on 5 and 1401 DF,  p-value: < 2.2e-16
confint(fit.interaction)
##                    2.5 %     97.5 %
## (Intercept)   75.5554572  78.675130
## stamp         -0.4313035   8.546257
## foodinsec     -0.4164540   3.437300
## white          0.3110697   3.788880
## female       -16.7434968 -13.099884
## stamp:female  -2.9024219   7.945784
# End of Program