Setup

setwd("C:/StatQuiz")
getwd()
## [1] "C:/StatQuiz"

Load Packages

library(ggplot2)
library(dplyr)

###Load Data

load("brfss2013.RData")

Research Questions

  1. Using the variable “sleptim1” and “marital”, determine the following:

1.1 number of observations that are “NA” in the variable “sleptim1”; 1.2 number of observations having at most 5 hours of sleep; 1.3 number of observations having more than 5 hours of sleep but less than 11 hours of sleep; 1.4 number of observations having at least 11 hours of sleep and 1.5 number of observations having at most 5 hours of sleep that are married.

** Question 1.1 Number of observations that are “NA” in the variable “sleptim1” **

str(select(brfss2013,sleptim1))
## 'data.frame':    491775 obs. of  1 variable:
##  $ sleptim1: int  NA 6 9 8 6 8 7 6 8 8 ...
brfss2013 %>%
  filter(is.na(sleptim1)) %>%
  group_by(sleptim1) %>%
  summarise(count = n())
## # A tibble: 1 × 2
##   sleptim1 count
##      <int> <int>
## 1       NA  7387

** Question 1.2 Number of observations having at most 5 hours of sleep. **

str(select(brfss2013,sleptim1))
## 'data.frame':    491775 obs. of  1 variable:
##  $ sleptim1: int  NA 6 9 8 6 8 7 6 8 8 ...
brfss2013 %>%
  filter(sleptim1 <= c(5)) %>%
  group_by(sleptim1) %>%
  summarise(frequency = n())
## # A tibble: 6 × 2
##   sleptim1 frequency
##      <int>     <int>
## 1        0         1
## 2        1       228
## 3        2      1076
## 4        3      3496
## 5        4     14261
## 6        5     33436

** Question 1.3 Number of observations having more than 5 hours of sleep but less than 11 hours of sleep. **

str(select(brfss2013,sleptim1))
## 'data.frame':    491775 obs. of  1 variable:
##  $ sleptim1: int  NA 6 9 8 6 8 7 6 8 8 ...
A <- brfss2013 %>%
  filter(sleptim1 > c(5), sleptim1 < c(11)) %>%
  group_by(sleptim1) %>%
  summarise(frequency = n())
B <- brfss2013 %>%
  filter(sleptim1 > c(5), sleptim1 < c(11)) %>%
  group_by(sleptim1) %>%
  summarise(frequency = n())
sum(B$frequency)
## [1] 425670

** Question 1.4 Number of observations having at least 11 hours of sleep. **

str(select(brfss2013,sleptim1))
## 'data.frame':    491775 obs. of  1 variable:
##  $ sleptim1: int  NA 6 9 8 6 8 7 6 8 8 ...
brfss2013 %>%
  filter(sleptim1 > c(11)) %>%
  group_by(sleptim1) %>%
  summarise(frequency = n())
## # A tibble: 15 × 2
##    sleptim1 frequency
##       <int>     <int>
##  1       12      3675
##  2       13       199
##  3       14       447
##  4       15       367
##  5       16       369
##  6       17        35
##  7       18       164
##  8       19        13
##  9       20        64
## 10       21         3
## 11       22        10
## 12       23         4
## 13       24        35
## 14      103         1
## 15      450         1
C <- brfss2013 %>%
  filter(sleptim1 > c(11)) %>%
  group_by(sleptim1) %>%
  summarise(frequency = n())
sum(C$frequency)
## [1] 5387

** Question 1.5 Number of observations having at most 5 hours of sleep that are married. **

str(select(brfss2013,sleptim1,marital))
## 'data.frame':    491775 obs. of  2 variables:
##  $ sleptim1: int  NA 6 9 8 6 8 7 6 8 8 ...
##  $ marital : Factor w/ 6 levels "Married","Divorced",..: 2 1 1 1 1 2 1 3 1 1 ...
brfss2013 %>%
  filter(sleptim1 > c(5), marital == "Married") %>%
  group_by(sleptim1) %>%
  summarise(frequency = n())
## # A tibble: 18 × 2
##    sleptim1 frequency
##       <int>     <int>
##  1        6     53700
##  2        7     81653
##  3        8     75534
##  4        9     11791
##  5       10      4831
##  6       11       306
##  7       12      1259
##  8       13        55
##  9       14       168
## 10       15       125
## 11       16       138
## 12       17        14
## 13       18        52
## 14       19         5
## 15       20        18
## 16       22         3
## 17       23         2
## 18       24         6
D <- brfss2013 %>%
  filter(sleptim1 > c(5), marital == "Married") %>%
  group_by(sleptim1) %>%
  summarise(frequency = n())
sum(D$frequency)
## [1] 229660