library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(RColorBrewer)
library("dslabs")
us_contagious_diseases%>%
na.omit()%>%
group_by(disease,year)%>%
summarize(cases=sum(count))%>%
ggplot(aes(x=year,y=cases,color=disease))+
geom_point()
## `summarise()` has grouped output by 'disease'. You can override using the
## `.groups` argument.
Northeast<-c("Maine","Vermont","New Hampshire","Massachusetts","Connecticut","Rhode Island","New York","Pennsylvania","New Jersey")
South<-c("Delaware","Maryland","District Of Columbia","West Virginia","Virginia","North Carolina","South Carolina","Georgia","Florida","Kentucky","Tennessee","Alabama","Mississippi","Arkansas","Louisiana")
Midwest<-c("Ohio", "Michigan","North Dakota","South Dakota","Nebraska", "Kansas","Minnesota","Iowa","Missouri","Wisconsin","Illinois","Indiana")
Southwest<-c("Arizona","New Mexico","Texas","Oklahoma")
West<-c("Washington","Oregon","California","Nevada","Idaho","Montana","Wyoming","Utah","Colorado")
us_contagious_diseases%>%
filter(disease=="Measles", state!="Hawaii", state!="Alaska",year %in% (1955:1975))%>%
na.omit()%>%
mutate(region=case_when(state %in% Northeast~"Northeast",
state %in% South~"South",
state %in% Midwest~"Midwest",
state %in% Southwest~"Southwest",
state %in% West~"West",TRUE~as.character(state)))%>%
group_by(region,year)%>%
summarize(region_population=sum(population),region_count=sum(count),infected_population=(region_count/region_population)*1000000)%>%
ggplot(aes(x=year,y=infected_population,color=region))+
geom_line(size=1)+
geom_vline(xintercept=1963,color="navyblue")+
scale_color_brewer(palette="Accent")+
geom_smooth(method="loess", se=FALSE, size=1)+
ggtitle("Measles Vaccine Acceptance by Region")+
xlab("Year")+
ylab("Infected Population (per million)")
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'
The southwest region seems to be the most infected population( per
million) since the vaccine was accepted in 1963.