loading needed liberies

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(RColorBrewer)
library("dslabs")

Quick look u.s contaigious disease

us_contagious_diseases%>%
  na.omit()%>%
  group_by(disease,year)%>%
  summarize(cases=sum(count))%>%
  ggplot(aes(x=year,y=cases,color=disease))+
  geom_point()
## `summarise()` has grouped output by 'disease'. You can override using the
## `.groups` argument.

Grouping by Region

Northeast<-c("Maine","Vermont","New Hampshire","Massachusetts","Connecticut","Rhode Island","New York","Pennsylvania","New Jersey")
South<-c("Delaware","Maryland","District Of Columbia","West Virginia","Virginia","North Carolina","South Carolina","Georgia","Florida","Kentucky","Tennessee","Alabama","Mississippi","Arkansas","Louisiana")
Midwest<-c("Ohio", "Michigan","North Dakota","South Dakota","Nebraska", "Kansas","Minnesota","Iowa","Missouri","Wisconsin","Illinois","Indiana")
Southwest<-c("Arizona","New Mexico","Texas","Oklahoma")
West<-c("Washington","Oregon","California","Nevada","Idaho","Montana","Wyoming","Utah","Colorado")

Mutations and Graph

us_contagious_diseases%>%
  filter(disease=="Measles", state!="Hawaii", state!="Alaska",year %in% (1955:1975))%>%
  na.omit()%>%
  mutate(region=case_when(state %in% Northeast~"Northeast",
                         state %in% South~"South",
                         state %in% Midwest~"Midwest",
                         state %in% Southwest~"Southwest",
                         state %in% West~"West",TRUE~as.character(state)))%>%

  group_by(region,year)%>%
  summarize(region_population=sum(population),region_count=sum(count),infected_population=(region_count/region_population)*1000000)%>%
  ggplot(aes(x=year,y=infected_population,color=region))+
  geom_line(size=1)+
  geom_vline(xintercept=1963,color="navyblue")+
  scale_color_brewer(palette="Accent")+
  
  geom_smooth(method="loess", se=FALSE, size=1)+
  ggtitle("Measles Vaccine Acceptance by Region")+
  xlab("Year")+
  ylab("Infected Population (per million)")
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.
## `geom_smooth()` using formula 'y ~ x'

The southwest region seems to be the most infected population( per million) since the vaccine was accepted in 1963.