library(readr)
library(ggplot2)
NHANES_v1_1_1_ <- read_csv("Downloads/NHANES_v1(1) (1).csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   bmihead = col_logical()
## )
## ℹ Use `spec()` for the full column specifications.

Problem 1: Choose one categorical independent variable (IV) and theorize how it may affect the DV (i.e., body mass index). Estimate a simple linear regression model of body mass index using the categorical IV as the predictor.

names(NHANES_v1_1_1_)
##  [1] "seqn"     "sddsrvyr" "ridstatr" "riagendr" "ridageyr" "ridagemn"
##  [7] "ridreth1" "ridreth3" "ridexmon" "ridexagy" "ridexagm" "dmqmiliz"
## [13] "dmqadfc"  "dmdborn4" "dmdcitzn" "dmdyrsus" "dmdeduc3" "dmdeduc2"
## [19] "dmdmartl" "ridexprg" "sialang"  "siaproxy" "siaintrp" "fialang" 
## [25] "fiaproxy" "fiaintrp" "mialang"  "miaproxy" "miaintrp" "aialanga"
## [31] "wtint2yr" "wtmec2yr" "sdmvpsu"  "sdmvstra" "indhhin2" "indfmin2"
## [37] "indfmpir" "dmdhhsiz" "dmdfmsiz" "dmdhhsza" "dmdhhszb" "dmdhhsze"
## [43] "dmdhrgnd" "dmdhrage" "dmdhrbr4" "dmdhredu" "dmdhrmar" "dmdhsedu"
## [49] "bmdstats" "bmxwt"    "bmiwt"    "bmxrecum" "bmirecum" "bmxhead" 
## [55] "bmihead"  "bmxht"    "bmiht"    "bmxbmi"   "bmdbmic"  "bmxleg"  
## [61] "bmileg"   "bmxarml"  "bmiarml"  "bmxarmc"  "bmiarmc"  "bmxwaist"
## [67] "bmiwaist" "bmxsad1"  "bmxsad2"  "bmxsad3"  "bmxsad4"  "bmdavsad"
## [73] "bmdsadcm"
head(NHANES_v1_1_1_)
## # A tibble: 6 x 73
##    seqn sddsrvyr ridstatr riagendr ridageyr ridagemn ridreth1 ridreth3 ridexmon
##   <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1 62161        7        2        1       22       NA        3        3        2
## 2 62162        7        2        2        3       NA        1        1        1
## 3 62163        7        2        1       14       NA        5        6        2
## 4 62164        7        2        2       44       NA        3        3        1
## 5 62165        7        2        2       14       NA        4        4        2
## 6 62166        7        2        1        9       NA        3        3        2
## # … with 64 more variables: ridexagy <dbl>, ridexagm <dbl>, dmqmiliz <dbl>,
## #   dmqadfc <dbl>, dmdborn4 <dbl>, dmdcitzn <dbl>, dmdyrsus <dbl>,
## #   dmdeduc3 <dbl>, dmdeduc2 <dbl>, dmdmartl <dbl>, ridexprg <dbl>,
## #   sialang <dbl>, siaproxy <dbl>, siaintrp <dbl>, fialang <dbl>,
## #   fiaproxy <dbl>, fiaintrp <dbl>, mialang <dbl>, miaproxy <dbl>,
## #   miaintrp <dbl>, aialanga <dbl>, wtint2yr <dbl>, wtmec2yr <dbl>,
## #   sdmvpsu <dbl>, sdmvstra <dbl>, indhhin2 <dbl>, indfmin2 <dbl>,
## #   indfmpir <dbl>, dmdhhsiz <dbl>, dmdfmsiz <dbl>, dmdhhsza <dbl>,
## #   dmdhhszb <dbl>, dmdhhsze <dbl>, dmdhrgnd <dbl>, dmdhrage <dbl>,
## #   dmdhrbr4 <dbl>, dmdhredu <dbl>, dmdhrmar <dbl>, dmdhsedu <dbl>,
## #   bmdstats <dbl>, bmxwt <dbl>, bmiwt <dbl>, bmxrecum <dbl>, bmirecum <dbl>,
## #   bmxhead <dbl>, bmihead <lgl>, bmxht <dbl>, bmiht <dbl>, bmxbmi <dbl>,
## #   bmdbmic <dbl>, bmxleg <dbl>, bmileg <dbl>, bmxarml <dbl>, bmiarml <dbl>,
## #   bmxarmc <dbl>, bmiarmc <dbl>, bmxwaist <dbl>, bmiwaist <dbl>,
## #   bmxsad1 <dbl>, bmxsad2 <dbl>, bmxsad3 <dbl>, bmxsad4 <dbl>, bmdavsad <dbl>,
## #   bmdsadcm <dbl>
tail(NHANES_v1_1_1_)
## # A tibble: 6 x 73
##    seqn sddsrvyr ridstatr riagendr ridageyr ridagemn ridreth1 ridreth3 ridexmon
##   <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>    <dbl>
## 1 71911        7        2        1       27       NA        1        1        1
## 2 71912        7        2        1       40       NA        3        3        2
## 3 71913        7        2        2       18       NA        5        6        1
## 4 71914        7        2        2       10       NA        3        3        2
## 5 71915        7        2        1       60       NA        3        3        2
## 6 71916        7        2        1       16       NA        3        3        1
## # … with 64 more variables: ridexagy <dbl>, ridexagm <dbl>, dmqmiliz <dbl>,
## #   dmqadfc <dbl>, dmdborn4 <dbl>, dmdcitzn <dbl>, dmdyrsus <dbl>,
## #   dmdeduc3 <dbl>, dmdeduc2 <dbl>, dmdmartl <dbl>, ridexprg <dbl>,
## #   sialang <dbl>, siaproxy <dbl>, siaintrp <dbl>, fialang <dbl>,
## #   fiaproxy <dbl>, fiaintrp <dbl>, mialang <dbl>, miaproxy <dbl>,
## #   miaintrp <dbl>, aialanga <dbl>, wtint2yr <dbl>, wtmec2yr <dbl>,
## #   sdmvpsu <dbl>, sdmvstra <dbl>, indhhin2 <dbl>, indfmin2 <dbl>,
## #   indfmpir <dbl>, dmdhhsiz <dbl>, dmdfmsiz <dbl>, dmdhhsza <dbl>,
## #   dmdhhszb <dbl>, dmdhhsze <dbl>, dmdhrgnd <dbl>, dmdhrage <dbl>,
## #   dmdhrbr4 <dbl>, dmdhredu <dbl>, dmdhrmar <dbl>, dmdhsedu <dbl>,
## #   bmdstats <dbl>, bmxwt <dbl>, bmiwt <dbl>, bmxrecum <dbl>, bmirecum <dbl>,
## #   bmxhead <dbl>, bmihead <lgl>, bmxht <dbl>, bmiht <dbl>, bmxbmi <dbl>,
## #   bmdbmic <dbl>, bmxleg <dbl>, bmileg <dbl>, bmxarml <dbl>, bmiarml <dbl>,
## #   bmxarmc <dbl>, bmiarmc <dbl>, bmxwaist <dbl>, bmiwaist <dbl>,
## #   bmxsad1 <dbl>, bmxsad2 <dbl>, bmxsad3 <dbl>, bmxsad4 <dbl>, bmdavsad <dbl>,
## #   bmdsadcm <dbl>
summary(NHANES_v1_1_1_)
##       seqn          sddsrvyr    ridstatr        riagendr        ridageyr   
##  Min.   :62161   Min.   :7   Min.   :1.000   Min.   :1.000   Min.   : 0.0  
##  1st Qu.:64600   1st Qu.:7   1st Qu.:2.000   1st Qu.:1.000   1st Qu.: 9.0  
##  Median :67038   Median :7   Median :2.000   Median :2.000   Median :26.0  
##  Mean   :67038   Mean   :7   Mean   :1.957   Mean   :1.502   Mean   :31.4  
##  3rd Qu.:69477   3rd Qu.:7   3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:52.0  
##  Max.   :71916   Max.   :7   Max.   :2.000   Max.   :2.000   Max.   :80.0  
##                                                                            
##     ridagemn        ridreth1        ridreth3       ridexmon    
##  Min.   : 0.00   Min.   :1.000   Min.   :1.00   Min.   :1.000  
##  1st Qu.: 4.00   1st Qu.:3.000   1st Qu.:3.00   1st Qu.:1.000  
##  Median : 9.00   Median :3.000   Median :3.00   Median :2.000  
##  Mean   :10.03   Mean   :3.229   Mean   :3.44   Mean   :1.516  
##  3rd Qu.:16.00   3rd Qu.:4.000   3rd Qu.:4.00   3rd Qu.:2.000  
##  Max.   :24.00   Max.   :5.000   Max.   :7.00   Max.   :2.000  
##  NA's   :9130                                   NA's   :418    
##     ridexagy         ridexagm        dmqmiliz        dmqadfc     
##  Min.   : 2.000   Min.   :  0.0   Min.   :1.000   Min.   :1.000  
##  1st Qu.: 5.000   1st Qu.: 42.0   1st Qu.:2.000   1st Qu.:1.000  
##  Median : 9.000   Median : 99.0   Median :2.000   Median :1.000  
##  Mean   : 9.641   Mean   :104.2   Mean   :1.908   Mean   :1.501  
##  3rd Qu.:14.000   3rd Qu.:160.0   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :20.000   Max.   :239.0   Max.   :2.000   Max.   :9.000  
##  NA's   :6338     NA's   :5747    NA's   :3749    NA's   :9205   
##     dmdborn4         dmdcitzn        dmdyrsus         dmdeduc3     
##  Min.   : 1.000   Min.   :1.000   Min.   : 1.000   Min.   : 0.000  
##  1st Qu.: 1.000   1st Qu.:1.000   1st Qu.: 3.000   1st Qu.: 2.000  
##  Median : 1.000   Median :1.000   Median : 5.000   Median : 5.000  
##  Mean   : 1.259   Mean   :1.123   Mean   : 7.481   Mean   : 6.038  
##  3rd Qu.: 1.000   3rd Qu.:1.000   3rd Qu.: 6.000   3rd Qu.: 9.000  
##  Max.   :99.000   Max.   :7.000   Max.   :99.000   Max.   :66.000  
##                   NA's   :5       NA's   :7683     NA's   :7157    
##     dmdeduc2        dmdmartl         ridexprg        sialang     
##  Min.   :1.000   Min.   : 1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:3.000   1st Qu.: 1.000   1st Qu.:2.000   1st Qu.:1.000  
##  Median :4.000   Median : 2.000   Median :2.000   Median :1.000  
##  Mean   :3.467   Mean   : 2.749   Mean   :2.023   Mean   :1.124  
##  3rd Qu.:5.000   3rd Qu.: 5.000   3rd Qu.:2.000   3rd Qu.:1.000  
##  Max.   :9.000   Max.   :99.000   Max.   :3.000   Max.   :2.000  
##  NA's   :4196    NA's   :4196     NA's   :8548                   
##     siaproxy        siaintrp        fialang         fiaproxy    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.:2.000   1st Qu.:1.000   1st Qu.:2.000  
##  Median :2.000   Median :2.000   Median :1.000   Median :2.000  
##  Mean   :1.627   Mean   :1.965   Mean   :1.081   Mean   :1.998  
##  3rd Qu.:2.000   3rd Qu.:2.000   3rd Qu.:1.000   3rd Qu.:2.000  
##  Max.   :2.000   Max.   :2.000   Max.   :2.000   Max.   :2.000  
##  NA's   :6                       NA's   :105     NA's   :105    
##     fiaintrp        mialang         miaproxy        miaintrp    
##  Min.   :1.000   Min.   :1.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:2.000   1st Qu.:1.000   1st Qu.:2.000   1st Qu.:2.000  
##  Median :2.000   Median :1.000   Median :2.000   Median :2.000  
##  Mean   :1.969   Mean   :1.053   Mean   :1.994   Mean   :1.969  
##  3rd Qu.:2.000   3rd Qu.:1.000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :2.000   Max.   :2.000   Max.   :2.000   Max.   :2.000  
##  NA's   :105     NA's   :3043    NA's   :3043    NA's   :3043   
##     aialanga        wtint2yr         wtmec2yr         sdmvpsu     
##  Min.   :1.000   Min.   :  3321   Min.   :     0   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.: 11352   1st Qu.: 11174   1st Qu.:1.000  
##  Median :1.000   Median : 18098   Median : 18090   Median :2.000  
##  Mean   :1.114   Mean   : 31426   Mean   : 31426   Mean   :1.643  
##  3rd Qu.:1.000   3rd Qu.: 34887   3rd Qu.: 34792   3rd Qu.:2.000  
##  Max.   :3.000   Max.   :220233   Max.   :222580   Max.   :3.000  
##  NA's   :4002                                                     
##     sdmvstra         indhhin2       indfmin2        indfmpir    
##  Min.   : 90.00   Min.   : 1.0   Min.   : 1.00   Min.   :0.000  
##  1st Qu.: 92.00   1st Qu.: 5.0   1st Qu.: 4.00   1st Qu.:0.860  
##  Median : 96.00   Median : 7.0   Median : 7.00   Median :1.630  
##  Mean   : 95.87   Mean   :11.5   Mean   :11.08   Mean   :2.205  
##  3rd Qu.: 99.00   3rd Qu.:14.0   3rd Qu.:14.00   3rd Qu.:3.580  
##  Max.   :103.00   Max.   :99.0   Max.   :99.00   Max.   :5.000  
##                   NA's   :81     NA's   :51      NA's   :840    
##     dmdhhsiz        dmdfmsiz        dmdhhsza        dmdhhszb     
##  Min.   :1.000   Min.   :1.000   Min.   :0.000   Min.   :0.0000  
##  1st Qu.:2.000   1st Qu.:2.000   1st Qu.:0.000   1st Qu.:0.0000  
##  Median :4.000   Median :4.000   Median :0.000   Median :1.0000  
##  Mean   :3.761   Mean   :3.591   Mean   :0.531   Mean   :0.9318  
##  3rd Qu.:5.000   3rd Qu.:5.000   3rd Qu.:1.000   3rd Qu.:2.0000  
##  Max.   :7.000   Max.   :7.000   Max.   :3.000   Max.   :4.0000  
##                                                                  
##     dmdhhsze        dmdhrgnd        dmdhrage        dmdhrbr4    
##  Min.   :0.000   Min.   :1.000   Min.   :18.00   Min.   : 1.00  
##  1st Qu.:0.000   1st Qu.:1.000   1st Qu.:33.00   1st Qu.: 1.00  
##  Median :0.000   Median :1.000   Median :43.00   Median : 1.00  
##  Mean   :0.395   Mean   :1.496   Mean   :45.39   Mean   : 1.43  
##  3rd Qu.:1.000   3rd Qu.:2.000   3rd Qu.:56.00   3rd Qu.: 2.00  
##  Max.   :3.000   Max.   :2.000   Max.   :80.00   Max.   :99.00  
##                                                  NA's   :365    
##     dmdhredu        dmdhrmar         dmdhsedu        bmdstats   
##  Min.   :1.000   Min.   : 1.000   Min.   :1.000   Min.   :1.00  
##  1st Qu.:2.000   1st Qu.: 1.000   1st Qu.:3.000   1st Qu.:1.00  
##  Median :4.000   Median : 1.000   Median :4.000   Median :1.00  
##  Mean   :3.437   Mean   : 3.191   Mean   :3.608   Mean   :1.15  
##  3rd Qu.:4.000   3rd Qu.: 5.000   3rd Qu.:5.000   3rd Qu.:1.00  
##  Max.   :9.000   Max.   :99.000   Max.   :9.000   Max.   :4.00  
##  NA's   :362     NA's   :136      NA's   :4881    NA's   :418   
##      bmxwt            bmiwt          bmxrecum         bmirecum   
##  Min.   :  3.60   Min.   :1.000   Min.   : 48.30   Min.   :1     
##  1st Qu.: 36.00   1st Qu.:3.000   1st Qu.: 70.60   1st Qu.:1     
##  Median : 64.50   Median :3.000   Median : 85.20   Median :1     
##  Mean   : 61.53   Mean   :2.963   Mean   : 82.63   Mean   :1     
##  3rd Qu.: 82.70   3rd Qu.:3.000   3rd Qu.: 94.75   3rd Qu.:1     
##  Max.   :216.10   Max.   :4.000   Max.   :115.60   Max.   :1     
##  NA's   :513      NA's   :9377    NA's   :8677     NA's   :9725  
##     bmxhead      bmihead            bmxht           bmiht           bmxbmi     
##  Min.   :34.60   Mode:logical   Min.   : 82.0   Min.   :1.000   Min.   :12.40  
##  1st Qu.:39.40   NA's:9756      1st Qu.:148.9   1st Qu.:3.000   1st Qu.:19.30  
##  Median :41.45                  Median :162.1   Median :3.000   Median :24.50  
##  Mean   :41.29                  Mean   :155.4   Mean   :2.582   Mean   :25.34  
##  3rd Qu.:43.02                  3rd Qu.:171.3   3rd Qu.:3.000   3rd Qu.:29.80  
##  Max.   :48.40                  Max.   :204.5   Max.   :3.000   Max.   :82.10  
##  NA's   :9520                   NA's   :1141    NA's   :9488    NA's   :1154   
##     bmdbmic          bmxleg          bmileg        bmxarml         bmiarml    
##  Min.   :1.000   Min.   :24.80   Min.   :1      Min.   :10.00   Min.   :1     
##  1st Qu.:2.000   1st Qu.:35.50   1st Qu.:1      1st Qu.:29.90   1st Qu.:1     
##  Median :2.000   Median :38.30   Median :1      Median :35.30   Median :1     
##  Mean   :2.459   Mean   :38.19   Mean   :1      Mean   :32.88   Mean   :1     
##  3rd Qu.:3.000   3rd Qu.:41.00   3rd Qu.:1      3rd Qu.:38.00   3rd Qu.:1     
##  Max.   :4.000   Max.   :52.80   Max.   :1      Max.   :48.10   Max.   :1     
##  NA's   :6401    NA's   :2801    NA's   :9402   NA's   :930     NA's   :9387  
##     bmxarmc         bmiarmc        bmxwaist         bmiwaist       bmxsad1     
##  Min.   :10.50   Min.   :1      Min.   : 38.70   Min.   :1      Min.   :10.00  
##  1st Qu.:22.20   1st Qu.:1      1st Qu.: 70.28   1st Qu.:1      1st Qu.:17.40  
##  Median :29.25   Median :1      Median : 86.90   Median :1      Median :20.60  
##  Mean   :28.33   Mean   :1      Mean   : 86.22   Mean   :1      Mean   :21.06  
##  3rd Qu.:34.00   3rd Qu.:1      3rd Qu.:101.50   3rd Qu.:1      3rd Qu.:24.30  
##  Max.   :58.10   Max.   :1      Max.   :176.00   Max.   :1      Max.   :40.40  
##  NA's   :930     NA's   :9383   NA's   :1552     NA's   :9300   NA's   :2961   
##     bmxsad2         bmxsad3         bmxsad4         bmdavsad    
##  Min.   : 9.90   Min.   :11.00   Min.   :11.10   Min.   :10.00  
##  1st Qu.:17.40   1st Qu.:18.82   1st Qu.:18.80   1st Qu.:17.40  
##  Median :20.60   Median :22.45   Median :22.30   Median :20.60  
##  Mean   :21.05   Mean   :22.36   Mean   :22.37   Mean   :21.07  
##  3rd Qu.:24.30   3rd Qu.:25.50   3rd Qu.:25.50   3rd Qu.:24.30  
##  Max.   :40.80   Max.   :36.40   Max.   :36.40   Max.   :40.60  
##  NA's   :2961    NA's   :9358    NA's   :9358    NA's   :2961   
##     bmdsadcm    
##  Min.   :1.000  
##  1st Qu.:1.000  
##  Median :1.000  
##  Mean   :1.287  
##  3rd Qu.:1.000  
##  Max.   :5.000  
##  NA's   :9271
summary(NHANES_v1_1_1_$riagendr)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   2.000   1.502   2.000   2.000
table(NHANES_v1_1_1_$riagendr)
## 
##    1    2 
## 4856 4900
class(NHANES_v1_1_1_$riagendr)
## [1] "numeric"
NHANES_v1_1_1_$riagendr <-factor(NHANES_v1_1_1_$riagendr)
summary(NHANES_v1_1_1_$riagendr)
##    1    2 
## 4856 4900
table(NHANES_v1_1_1_$riagendr)
## 
##    1    2 
## 4856 4900
class(NHANES_v1_1_1_$riagendr)
## [1] "factor"
levels(NHANES_v1_1_1_$riagendr) <- c("male", "female")
summary(NHANES_v1_1_1_$riagendr)
##   male female 
##   4856   4900
table(NHANES_v1_1_1_$riagendr)
## 
##   male female 
##   4856   4900
class(NHANES_v1_1_1_$riagendr)
## [1] "factor"
NHANES_v1_1_1_.sub <- data.frame(NHANES_v1_1_1_$bmxbmi,NHANES_v1_1_1_$riagendr, NHANES_v1_1_1_$ridageyr, NHANES_v1_1_1_$dmdyrsus)
names(NHANES_v1_1_1_.sub)
## [1] "NHANES_v1_1_1_.bmxbmi"   "NHANES_v1_1_1_.riagendr"
## [3] "NHANES_v1_1_1_.ridageyr" "NHANES_v1_1_1_.dmdyrsus"
summary(NHANES_v1_1_1_.sub)
##  NHANES_v1_1_1_.bmxbmi NHANES_v1_1_1_.riagendr NHANES_v1_1_1_.ridageyr
##  Min.   :12.40         male  :4856             Min.   : 0.0           
##  1st Qu.:19.30         female:4900             1st Qu.: 9.0           
##  Median :24.50                                 Median :26.0           
##  Mean   :25.34                                 Mean   :31.4           
##  3rd Qu.:29.80                                 3rd Qu.:52.0           
##  Max.   :82.10                                 Max.   :80.0           
##  NA's   :1154                                                         
##  NHANES_v1_1_1_.dmdyrsus
##  Min.   : 1.000         
##  1st Qu.: 3.000         
##  Median : 5.000         
##  Mean   : 7.481         
##  3rd Qu.: 6.000         
##  Max.   :99.000         
##  NA's   :7683
dim(NHANES_v1_1_1_.sub)
## [1] 9756    4
colnames(NHANES_v1_1_1_.sub) <-c("bmi", "gender", "age", "time_us")
dim(NHANES_v1_1_1_.sub)
## [1] 9756    4
summary(NHANES_v1_1_1_.sub)
##       bmi           gender          age          time_us      
##  Min.   :12.40   male  :4856   Min.   : 0.0   Min.   : 1.000  
##  1st Qu.:19.30   female:4900   1st Qu.: 9.0   1st Qu.: 3.000  
##  Median :24.50                 Median :26.0   Median : 5.000  
##  Mean   :25.34                 Mean   :31.4   Mean   : 7.481  
##  3rd Qu.:29.80                 3rd Qu.:52.0   3rd Qu.: 6.000  
##  Max.   :82.10                 Max.   :80.0   Max.   :99.000  
##  NA's   :1154                                 NA's   :7683
NHANES_v1_1_1_.sub <- na.omit(NHANES_v1_1_1_.sub)
summary(NHANES_v1_1_1_.sub)
##       bmi           gender         age           time_us      
##  Min.   :13.20   male  :972   Min.   : 2.00   Min.   : 1.000  
##  1st Qu.:22.30   female:976   1st Qu.:27.00   1st Qu.: 3.000  
##  Median :25.50                Median :42.00   Median : 5.000  
##  Mean   :26.16                Mean   :42.12   Mean   : 6.922  
##  3rd Qu.:29.60                3rd Qu.:58.00   3rd Qu.: 6.000  
##  Max.   :54.40                Max.   :80.00   Max.   :99.000
dim(NHANES_v1_1_1_.sub)
## [1] 1948    4
M1 <- lm(bmi ~ gender, data = NHANES_v1_1_1_.sub)
summary(M1)
## 
## Call:
## lm(formula = bmi ~ gender, data = NHANES_v1_1_1_.sub)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.9418  -3.8418  -0.6418   3.4317  28.0582 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   25.9683     0.1892 137.256   <2e-16 ***
## genderfemale   0.3735     0.2673   1.397    0.162    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.899 on 1946 degrees of freedom
## Multiple R-squared:  0.001002,   Adjusted R-squared:  0.000489 
## F-statistic: 1.953 on 1 and 1946 DF,  p-value: 0.1625
plot(M1)

abline(M1, col="blue", lwd=3)

set.seed(1)

#define data
bmi = rnorm(1000, mean=0.6, sd=0.1)
gender = rnorm(1000, mean=0.4, sd=0.1)

#plot two histograms in same graph
hist(bmi, col=rgb(0,0,1,0.2), xlim=c(0, 1),
     xlab='Values', ylab='Frequency', main='Histogram for BMI & Gender')
hist(gender, col=rgb(1,0,0,0.2), add=TRUE)

Problem 2: Choose one continuous IV and theorize how it may affect the dependent variable. Add the continuous IV to Model 1 to create Model 2.

M2 <- lm(bmi ~ gender + age , data = NHANES_v1_1_1_.sub)
summary(M2)
## 
## Call:
## lm(formula = bmi ~ gender + age, data = NHANES_v1_1_1_.sub)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.3497  -4.0016  -0.8718   2.8945  30.3810 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  21.908596   0.325692  67.268   <2e-16 ***
## genderfemale  0.376011   0.253242   1.485    0.138    
## age           0.096353   0.006454  14.929   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.589 on 1945 degrees of freedom
## Multiple R-squared:  0.1037, Adjusted R-squared:  0.1028 
## F-statistic: 112.5 on 2 and 1945 DF,  p-value: < 2.2e-16

Anova test for M1 and M2

anova(M1, M2)
## Analysis of Variance Table
## 
## Model 1: bmi ~ gender
## Model 2: bmi ~ gender + age
##   Res.Df   RSS Df Sum of Sq      F    Pr(>F)    
## 1   1946 67707                                  
## 2   1945 60746  1    6961.2 222.89 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Problem 3: Add one more continuous independent variable to Model 2 to create Model 3

M3 <- lm(bmi ~ gender + age + time_us, data = NHANES_v1_1_1_.sub)
summary(M3)
## 
## Call:
## lm(formula = bmi ~ gender + age + time_us, data = NHANES_v1_1_1_.sub)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.1805  -4.0047  -0.8823   2.9057  30.3974 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  21.785381   0.328176  66.383  < 2e-16 ***
## genderfemale  0.377041   0.252810   1.491  0.13602    
## age           0.094970   0.006462  14.696  < 2e-16 ***
## time_us       0.026145   0.009455   2.765  0.00574 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.579 on 1944 degrees of freedom
## Multiple R-squared:  0.1072, Adjusted R-squared:  0.1058 
## F-statistic: 77.83 on 3 and 1944 DF,  p-value: < 2.2e-16

Anova test of M2 and M3

anova(M2, M3)
## Analysis of Variance Table
## 
## Model 1: bmi ~ gender + age
## Model 2: bmi ~ gender + age + time_us
##   Res.Df   RSS Df Sum of Sq      F   Pr(>F)   
## 1   1945 60746                                
## 2   1944 60508  1       238 7.6466 0.005742 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Problem 4: Continuous by Continuous Interaction

M4 <- lm(bmi ~ age + gender * time_us, data = NHANES_v1_1_1_.sub)
summary(M4)
## 
## Call:
## lm(formula = bmi ~ age + gender * time_us, data = NHANES_v1_1_1_.sub)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.2085  -4.0182  -0.8984   2.9025  30.4082 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          21.823131   0.334560  65.229   <2e-16 ***
## age                   0.094989   0.006463  14.696   <2e-16 ***
## genderfemale          0.300953   0.284560   1.058    0.290    
## time_us               0.020588   0.013429   1.533    0.125    
## genderfemale:time_us  0.010992   0.018858   0.583    0.560    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.58 on 1943 degrees of freedom
## Multiple R-squared:  0.1074, Adjusted R-squared:  0.1055 
## F-statistic: 58.44 on 4 and 1943 DF,  p-value: < 2.2e-16

Anova test of M4 and M3

anova(M4, M3)
## Analysis of Variance Table
## 
## Model 1: bmi ~ age + gender * time_us
## Model 2: bmi ~ gender + age + time_us
##   Res.Df   RSS Df Sum of Sq      F Pr(>F)
## 1   1943 60497                           
## 2   1944 60508 -1   -10.579 0.3398   0.56

Problem 5: Continuous by Categorical Interaction

M5 <- lm(bmi ~ age + time_us * gender , data = NHANES_v1_1_1_.sub)
summary(M5)
## 
## Call:
## lm(formula = bmi ~ age + time_us * gender, data = NHANES_v1_1_1_.sub)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -13.2085  -4.0182  -0.8984   2.9025  30.4082 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          21.823131   0.334560  65.229   <2e-16 ***
## age                   0.094989   0.006463  14.696   <2e-16 ***
## time_us               0.020588   0.013429   1.533    0.125    
## genderfemale          0.300953   0.284560   1.058    0.290    
## time_us:genderfemale  0.010992   0.018858   0.583    0.560    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.58 on 1943 degrees of freedom
## Multiple R-squared:  0.1074, Adjusted R-squared:  0.1055 
## F-statistic: 58.44 on 4 and 1943 DF,  p-value: < 2.2e-16

Anova test of M5 and M3

anova(M5, M3)
## Analysis of Variance Table
## 
## Model 1: bmi ~ age + time_us * gender
## Model 2: bmi ~ gender + age + time_us
##   Res.Df   RSS Df Sum of Sq      F Pr(>F)
## 1   1943 60497                           
## 2   1944 60508 -1   -10.579 0.3398   0.56

Problem 6: Conclusions regarding Models 1-5