#Introduction In this skills drill, you will be asked to practice the programming skills you have learned so far in order to investigate changes in smoking behavior over time in the general adult population.

The data you are analyzing is from the National Health Interview Survey (NHIS), a survey conducted annually since 1997 by the National Institutes of Health.

#Step 1: Load Packages Load the packages necessary to (1)import, (2)manipulate, and (3)visualize data.

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning in file(con, "r"): cannot open file '/var/db/timezone/zoneinfo/
## +VERSION': No such file or directory
library(readr)

#Step 2: Import Data Import your data into R

data <-read_csv("/Users/chelsyrodriguez/Downloads/Practice Skills Drill 1 Data.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   year = col_double(),
##   Behav_EverSmokeCigs_B = col_double(),
##   Behav_CigsPerDay_N = col_double(),
##   MentalHealth_MentalIllnessK6_C = col_character()
## )

#Step 3: Preview Data Preview the first 6 rows of your data

head(data)
## # A tibble: 6 x 4
##    year Behav_EverSmokeCigs_B Behav_CigsPerDay_N MentalHealth_MentalIllnessK6_C
##   <dbl>                 <dbl>              <dbl> <chr>                         
## 1  1997                     0                  0 Low Risk                      
## 2  1997                     0                  0 <NA>                          
## 3  1997                     1                  5 Low Risk                      
## 4  1997                     0                  0 Low Risk                      
## 5  1997                     0                  0 Low Risk                      
## 6  1997                     1                  0 MMD

#Step 4: Avg Daily Cigarettes Select the Behav_CigsPerDay_N and year variables from the data Rename the Behav_CigsPerDay_N variable to NumCigs filter to only keep those observations where year is greater than 1997 Calculate the mean of the NumCigs

data %>%
select(Behav_CigsPerDay_N,year) %>%
rename(NumCigs = Behav_CigsPerDay_N) %>%
filter(year>1997) %>%
summarize(AvgCigs = mean(NumCigs))
## # A tibble: 1 x 1
##   AvgCigs
##     <dbl>
## 1    2.71

#Step 5: Avg Daily Cigarettes by Year Select the year and Behav_CigsPerDay_N variables from the data Rename the Behav_CigsPerDay_N variable to NumCigs filter to only keep those observations where year is greater than 1997 Calculate the mean of the NumCigs by year

data %>%
select(Behav_CigsPerDay_N, year) %>%
rename(NumCigs = Behav_CigsPerDay_N) %>%
filter(year>1997) %>%
group_by(year) %>%
summarize(AvgCigs = mean(NumCigs))
## # A tibble: 19 x 2
##     year AvgCigs
##  * <dbl>   <dbl>
##  1  1998    3.83
##  2  1999    3.57
##  3  2000    3.48
##  4  2001    3.43
##  5  2002    3.29
##  6  2003    3.09
##  7  2004    2.92
##  8  2005    2.88
##  9  2006    2.74
## 10  2007    2.47
## 11  2008    2.67
## 12  2009    2.49
## 13  2010    2.32
## 14  2011    2.32
## 15  2012    2.23
## 16  2013    2.08
## 17  2014    1.99
## 18  2015    1.87
## 19  2016    1.95

#Step 6: Interpretation [Write your interpretation of the above output, here]

#Step 7: Visualization Copy the code from step 5, and paste into this code chunk. Add onto the code to create a visualization which shows a

  • line chart
  • year on the x-axis
  • average cigarettes per day on the y-axis
  • vary the color of the line acording to the average cigarettes per day
data %>%
select(Behav_CigsPerDay_N, year) %>%
rename(NumCigs = Behav_CigsPerDay_N) %>%
filter(year>1997) %>%
group_by(year) %>%
summarize(AvgCigs = mean(NumCigs)) %>%
ggplot()+
geom_line(aes(x = year, y = AvgCigs, color = AvgCigs))

#Step 8: Post to Rpubs Post this to Rpubs & post the Rpubs URL on blackboard