Intro Stuff

Add Population by 100,000 to Contagious Diseases File

us_contagious_diseases <- mutate(us_contagious_diseases, pop_100000 = population / 100000)
write.csv(us_contagious_diseases, file = "us_contagious_diseases")
us_contagious_diseases
## # A tibble: 16,065 x 8
##    number disease     state    year weeks_reporting count population pop_100000
##     <dbl> <chr>       <chr>   <dbl>           <dbl> <dbl>      <dbl>      <dbl>
##  1      1 Hepatitis A Alabama  1966              50   321    3345787       33.5
##  2      2 Hepatitis A Alabama  1967              49   291    3364130       33.6
##  3      3 Hepatitis A Alabama  1968              52   314    3386068       33.9
##  4      4 Hepatitis A Alabama  1969              49   380    3412450       34.1
##  5      5 Hepatitis A Alabama  1970              51   413    3444165       34.4
##  6      6 Hepatitis A Alabama  1971              51   378    3481798       34.8
##  7      7 Hepatitis A Alabama  1972              45   342    3524543       35.2
##  8      8 Hepatitis A Alabama  1973              45   467    3571209       35.7
##  9      9 Hepatitis A Alabama  1974              45   244    3620548       36.2
## 10     10 Hepatitis A Alabama  1975              46   286    3671246       36.7
## # ... with 16,055 more rows

Cases Per 100,000

us_contagious_diseases <-mutate(us_contagious_diseases, cases_per_hundred_thou = count/pop_100000)
us_contagious_diseases
## # A tibble: 16,065 x 9
##    number disease state  year weeks_reporting count population pop_100000
##     <dbl> <chr>   <chr> <dbl>           <dbl> <dbl>      <dbl>      <dbl>
##  1      1 Hepati~ Alab~  1966              50   321    3345787       33.5
##  2      2 Hepati~ Alab~  1967              49   291    3364130       33.6
##  3      3 Hepati~ Alab~  1968              52   314    3386068       33.9
##  4      4 Hepati~ Alab~  1969              49   380    3412450       34.1
##  5      5 Hepati~ Alab~  1970              51   413    3444165       34.4
##  6      6 Hepati~ Alab~  1971              51   378    3481798       34.8
##  7      7 Hepati~ Alab~  1972              45   342    3524543       35.2
##  8      8 Hepati~ Alab~  1973              45   467    3571209       35.7
##  9      9 Hepati~ Alab~  1974              45   244    3620548       36.2
## 10     10 Hepati~ Alab~  1975              46   286    3671246       36.7
## # ... with 16,055 more rows, and 1 more variable: cases_per_hundred_thou <dbl>
write.csv(us_contagious_diseases, file = "us_contagious_diseases")
us_contagious_diseases
## # A tibble: 16,065 x 9
##    number disease state  year weeks_reporting count population pop_100000
##     <dbl> <chr>   <chr> <dbl>           <dbl> <dbl>      <dbl>      <dbl>
##  1      1 Hepati~ Alab~  1966              50   321    3345787       33.5
##  2      2 Hepati~ Alab~  1967              49   291    3364130       33.6
##  3      3 Hepati~ Alab~  1968              52   314    3386068       33.9
##  4      4 Hepati~ Alab~  1969              49   380    3412450       34.1
##  5      5 Hepati~ Alab~  1970              51   413    3444165       34.4
##  6      6 Hepati~ Alab~  1971              51   378    3481798       34.8
##  7      7 Hepati~ Alab~  1972              45   342    3524543       35.2
##  8      8 Hepati~ Alab~  1973              45   467    3571209       35.7
##  9      9 Hepati~ Alab~  1974              45   244    3620548       36.2
## 10     10 Hepati~ Alab~  1975              46   286    3671246       36.7
## # ... with 16,055 more rows, and 1 more variable: cases_per_hundred_thou <dbl>

Filter to save only DC MD VA Hep A Cases

x <- filter (us_contagious_diseases, disease == "Hepatitis A")
y <-filter(x, state == "Maryland" |state ==  "Virginia" | state == "District Of Columbia" )
summary(y$year)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1966    1977    1988    1988    2000    2011
write.csv (y, file = "us_contagious_diseases_filtered")

Plot Hep A Cases in MD VA DC

plot1 <- y %>% 
  ggplot(aes(year, cases_per_hundred_thou, color = state))+ 
  geom_point()+
  xlab("Year") +
  ylab("Hepatitis A Cases/100,000 Population") +
  ggtitle("Scatterplot of Hepatitis A Cases in MD VA DC 1966 - 2011")
plot1

plot1 <- y %>% 
  ggplot(aes(year, cases_per_hundred_thou, color = state))+ 
  geom_point()+
  xlab("Years") +
  ylab("Hepatitis A Cases/100,000 Population") +
  ggtitle("Scatterplot of Hepatitis A Cases in MD VA DC 1966 - 2011")+
  scale_colour_brewer(palette = "Accent")+
  geom_smooth(method="lm", se = FALSE)+
 theme_dark()

  plot1
## `geom_smooth()` using formula 'y ~ x'