Intro Stuff
Add Population by 100,000 to Contagious Diseases File
us_contagious_diseases <- mutate(us_contagious_diseases, pop_100000 = population / 100000)
write.csv(us_contagious_diseases, file = "us_contagious_diseases")
us_contagious_diseases
## # A tibble: 16,065 x 8
## number disease state year weeks_reporting count population pop_100000
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 Hepatitis A Alabama 1966 50 321 3345787 33.5
## 2 2 Hepatitis A Alabama 1967 49 291 3364130 33.6
## 3 3 Hepatitis A Alabama 1968 52 314 3386068 33.9
## 4 4 Hepatitis A Alabama 1969 49 380 3412450 34.1
## 5 5 Hepatitis A Alabama 1970 51 413 3444165 34.4
## 6 6 Hepatitis A Alabama 1971 51 378 3481798 34.8
## 7 7 Hepatitis A Alabama 1972 45 342 3524543 35.2
## 8 8 Hepatitis A Alabama 1973 45 467 3571209 35.7
## 9 9 Hepatitis A Alabama 1974 45 244 3620548 36.2
## 10 10 Hepatitis A Alabama 1975 46 286 3671246 36.7
## # ... with 16,055 more rows
Cases Per 100,000
us_contagious_diseases <-mutate(us_contagious_diseases, cases_per_hundred_thou = count/pop_100000)
us_contagious_diseases
## # A tibble: 16,065 x 9
## number disease state year weeks_reporting count population pop_100000
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 Hepati~ Alab~ 1966 50 321 3345787 33.5
## 2 2 Hepati~ Alab~ 1967 49 291 3364130 33.6
## 3 3 Hepati~ Alab~ 1968 52 314 3386068 33.9
## 4 4 Hepati~ Alab~ 1969 49 380 3412450 34.1
## 5 5 Hepati~ Alab~ 1970 51 413 3444165 34.4
## 6 6 Hepati~ Alab~ 1971 51 378 3481798 34.8
## 7 7 Hepati~ Alab~ 1972 45 342 3524543 35.2
## 8 8 Hepati~ Alab~ 1973 45 467 3571209 35.7
## 9 9 Hepati~ Alab~ 1974 45 244 3620548 36.2
## 10 10 Hepati~ Alab~ 1975 46 286 3671246 36.7
## # ... with 16,055 more rows, and 1 more variable: cases_per_hundred_thou <dbl>
write.csv(us_contagious_diseases, file = "us_contagious_diseases")
us_contagious_diseases
## # A tibble: 16,065 x 9
## number disease state year weeks_reporting count population pop_100000
## <dbl> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 Hepati~ Alab~ 1966 50 321 3345787 33.5
## 2 2 Hepati~ Alab~ 1967 49 291 3364130 33.6
## 3 3 Hepati~ Alab~ 1968 52 314 3386068 33.9
## 4 4 Hepati~ Alab~ 1969 49 380 3412450 34.1
## 5 5 Hepati~ Alab~ 1970 51 413 3444165 34.4
## 6 6 Hepati~ Alab~ 1971 51 378 3481798 34.8
## 7 7 Hepati~ Alab~ 1972 45 342 3524543 35.2
## 8 8 Hepati~ Alab~ 1973 45 467 3571209 35.7
## 9 9 Hepati~ Alab~ 1974 45 244 3620548 36.2
## 10 10 Hepati~ Alab~ 1975 46 286 3671246 36.7
## # ... with 16,055 more rows, and 1 more variable: cases_per_hundred_thou <dbl>
Filter to save only DC MD VA Hep A Cases
x <- filter (us_contagious_diseases, disease == "Hepatitis A")
y <-filter(x, state == "Maryland" |state == "Virginia" | state == "District Of Columbia" )
summary(y$year)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1966 1977 1988 1988 2000 2011
write.csv (y, file = "us_contagious_diseases_filtered")
Plot Hep A Cases in MD VA DC
plot1 <- y %>%
ggplot(aes(year, cases_per_hundred_thou, color = state))+
geom_point()+
xlab("Year") +
ylab("Hepatitis A Cases/100,000 Population") +
ggtitle("Scatterplot of Hepatitis A Cases in MD VA DC 1966 - 2011")
plot1

plot1 <- y %>%
ggplot(aes(year, cases_per_hundred_thou, color = state))+
geom_point()+
xlab("Years") +
ylab("Hepatitis A Cases/100,000 Population") +
ggtitle("Scatterplot of Hepatitis A Cases in MD VA DC 1966 - 2011")+
scale_colour_brewer(palette = "Accent")+
geom_smooth(method="lm", se = FALSE)+
theme_dark()
plot1
## `geom_smooth()` using formula 'y ~ x'
