#Introduction In this skills drill, you will be asked to practice the programming skills you have learned so far in order to investigate changes in smoking behavior over time in the general adult population.
The data you are analyzing is from the National Health Interview Survey (NHIS), a survey conducted annually since 1997 by the National Institutes of Health.
#Step 1: Load Packages Load the packages necessary to (1)import, (2)manipulate, and (3)visualize data.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning in file(con, "r"): cannot open file '/var/db/timezone/zoneinfo/
## +VERSION': No such file or directory
library(readr)
#Step 2: Import Data Import your data into R
data <-read_csv("/Users/chelsyrodriguez/Downloads/Practice Skills Drill 1 Data.csv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## year = col_double(),
## Behav_EverSmokeCigs_B = col_double(),
## Behav_CigsPerDay_N = col_double(),
## MentalHealth_MentalIllnessK6_C = col_character()
## )
#Step 3: Preview Data Preview the first 6 rows of your data
head(data)
## # A tibble: 6 x 4
## year Behav_EverSmokeCigs_B Behav_CigsPerDay_N MentalHealth_MentalIllnessK6_C
## <dbl> <dbl> <dbl> <chr>
## 1 1997 0 0 Low Risk
## 2 1997 0 0 <NA>
## 3 1997 1 5 Low Risk
## 4 1997 0 0 Low Risk
## 5 1997 0 0 Low Risk
## 6 1997 1 0 MMD
#Step 4: Avg Daily Cigarettes Select the Behav_CigsPerDay_N and year variables from the data Rename the Behav_CigsPerDay_N variable to NumCigs filter to only keep those observations where year is greater than 1997 Calculate the mean of the NumCigs
data %>%
select(Behav_CigsPerDay_N,year) %>%
rename(NumCigs = Behav_CigsPerDay_N) %>%
filter(year>1997) %>%
summarize(AvgCigs = mean(NumCigs))
## # A tibble: 1 x 1
## AvgCigs
## <dbl>
## 1 2.71
#Step 5: Avg Daily Cigarettes by Year Select the year and Behav_CigsPerDay_N variables from the data Rename the Behav_CigsPerDay_N variable to NumCigs filter to only keep those observations where year is greater than 1997 Calculate the mean of the NumCigs by year
data %>%
select(Behav_CigsPerDay_N, year) %>%
rename(NumCigs = Behav_CigsPerDay_N) %>%
filter(year>1997) %>%
group_by(year) %>%
summarize(AvgCigs = mean(NumCigs))
## # A tibble: 19 x 2
## year AvgCigs
## * <dbl> <dbl>
## 1 1998 3.83
## 2 1999 3.57
## 3 2000 3.48
## 4 2001 3.43
## 5 2002 3.29
## 6 2003 3.09
## 7 2004 2.92
## 8 2005 2.88
## 9 2006 2.74
## 10 2007 2.47
## 11 2008 2.67
## 12 2009 2.49
## 13 2010 2.32
## 14 2011 2.32
## 15 2012 2.23
## 16 2013 2.08
## 17 2014 1.99
## 18 2015 1.87
## 19 2016 1.95
#Step 6: Interpretation [Write your interpretation of the above output, here]
#Step 7: Visualization Copy the code from step 5, and paste into this code chunk. Add onto the code to create a visualization which shows a
data %>%
select(Behav_CigsPerDay_N, year) %>%
rename(NumCigs = Behav_CigsPerDay_N) %>%
filter(year>1997) %>%
group_by(year) %>%
summarize(AvgCigs = mean(NumCigs)) %>%
ggplot()+
geom_line(aes(x = year, y = AvgCigs, color = AvgCigs))
#Step 8: Post to Rpubs Post this to Rpubs & post the Rpubs URL on blackboard