Skills Drill 1
Steps 1-3
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
data<-read_csv("/Users/rebeccagibble/Downloads/Practice Skills Drill 1 Data.csv")
## Parsed with column specification:
## cols(
## year = col_double(),
## Behav_EverSmokeCigs_B = col_double(),
## Behav_CigsPerDay_N = col_double(),
## MentalHealth_MentalIllnessK6_C = col_character()
## )
head(data)
## # A tibble: 6 x 4
## year Behav_EverSmokeCigs_B Behav_CigsPerDay_N MentalHealth_MentalIllnessK6_C
## <dbl> <dbl> <dbl> <chr>
## 1 1997 0 0 Low Risk
## 2 1997 0 0 <NA>
## 3 1997 1 5 Low Risk
## 4 1997 0 0 Low Risk
## 5 1997 0 0 Low Risk
## 6 1997 1 0 MMD
Step 4
data%>%
select(year,Behav_CigsPerDay_N)%>%
rename(NumCigs=Behav_CigsPerDay_N)%>%
filter(year>1997)%>%
summarize(DailyAvgCigs=mean(NumCigs))
## # A tibble: 1 x 1
## DailyAvgCigs
## <dbl>
## 1 2.71
Step 5
data%>%
select(year, Behav_CigsPerDay_N)%>%
rename(NumCigs=Behav_CigsPerDay_N)%>%
group_by(year)%>%
filter(year>1997)%>%
summarize(DailyAvgCigsByYear=mean(NumCigs))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 19 x 2
## year DailyAvgCigsByYear
## <dbl> <dbl>
## 1 1998 3.83
## 2 1999 3.57
## 3 2000 3.48
## 4 2001 3.43
## 5 2002 3.29
## 6 2003 3.09
## 7 2004 2.92
## 8 2005 2.88
## 9 2006 2.74
## 10 2007 2.47
## 11 2008 2.67
## 12 2009 2.49
## 13 2010 2.32
## 14 2011 2.32
## 15 2012 2.23
## 16 2013 2.08
## 17 2014 1.99
## 18 2015 1.87
## 19 2016 1.95
Step 6
#The average number of cigarettes smoked on a daily basis is 2.7. The average number of cigarettes smoked goes down as the years become more recent which means that people tend to smoke more in recent years than they did in the late 1990s.
Step 7
data%>%
select(year, Behav_CigsPerDay_N)%>%
rename(NumCigs=Behav_CigsPerDay_N)%>%
group_by(year)%>%
filter(year>1997)%>%
summarize(DailyAvgCigsByYear=mean(NumCigs))%>%
ggplot() +
geom_line(aes(x=year, y=DailyAvgCigsByYear,color=DailyAvgCigsByYear))
## `summarise()` ungrouping output (override with `.groups` argument)
