loading needed liberies

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.7     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(RColorBrewer)
library("dslabs")

Quick look u.s contaigious disease

us_contagious_diseases%>%
  na.omit()%>%
  group_by(disease,year)%>%
  summarize(cases=sum(count))%>%
  ggplot(aes(x=year,y=cases,color=disease))+
  geom_point()
## `summarise()` has grouped output by 'disease'. You can override using the
## `.groups` argument.

Grouping by Region

Northeast<-c("Maine","Vermont","New Hampshire","Massachusetts","Connecticut","Rhode Island","New York","Pennsylvania","New Jersey")
South<-c("Delaware","Maryland","District Of Columbia","West Virginia","Virginia","North Carolina","South Carolina","Georgia","Florida","Kentucky","Tennessee","Alabama","Mississippi","Arkansas","Louisiana")
Midwest<-c("Ohio", "Michigan","North Dakota","South Dakota","Nebraska", "Kansas","Minnesota","Iowa","Missouri","Wisconsin","Illinois","Indiana")
Southwest<-c("Arizona","New Mexico","Texas","Oklahoma")
West<-c("Washington","Oregon","California","Nevada","Idaho","Montana","Wyoming","Utah","Colorado")

Mutations and Graph

us_contagious_diseases%>%
  filter(disease=="Measles", state!="Hawaii", state!="Alaska",year %in% (1955:1975))%>%
  na.omit()%>%
  mutate(region=case_when(state %in% Northeast~"Northeast",
                         state %in% South~"South",
                         state %in% Midwest~"Midwest",
                         state %in% Southwest~"Southwest",
                         state %in% West~"West",TRUE~as.character(state)))%>%

  group_by(region,year)%>%
  summarize(region_population=sum(population),region_count=sum(count),infected_population=(region_count/region_population)*1000000)%>%
  ggplot(aes(x=year,y=infected_population,color=region))+
  geom_line(size=1)+
  geom_vline(xintercept=1963,color="navyblue")+
  scale_color_brewer(palette="Accent")+
  ggtitle("Measles Vaccine Acceptance by Region")+
  xlab("Year")+
  ylab("Infected Population (per million)")
## `summarise()` has grouped output by 'region'. You can override using the
## `.groups` argument.

The southwest region seems to be the most infected population( per million) before the vaccine was accepted in 1963.Since then, the number of affected population (per million) were decline drastically in all regions.