Loading library
# install.packages("dslabs") # these are data science labs
library("dslabs")
## Warning: package 'dslabs' was built under R version 4.1.3
data(package="dslabs")
list.files(system.file("script", package = "dslabs"))
## [1] "make-admissions.R"
## [2] "make-brca.R"
## [3] "make-brexit_polls.R"
## [4] "make-death_prob.R"
## [5] "make-divorce_margarine.R"
## [6] "make-gapminder-rdas.R"
## [7] "make-greenhouse_gases.R"
## [8] "make-historic_co2.R"
## [9] "make-mnist_27.R"
## [10] "make-movielens.R"
## [11] "make-murders-rda.R"
## [12] "make-na_example-rda.R"
## [13] "make-nyc_regents_scores.R"
## [14] "make-olive.R"
## [15] "make-outlier_example.R"
## [16] "make-polls_2008.R"
## [17] "make-polls_us_election_2016.R"
## [18] "make-reported_heights-rda.R"
## [19] "make-research_funding_rates.R"
## [20] "make-stars.R"
## [21] "make-temp_carbon.R"
## [22] "make-tissue-gene-expression.R"
## [23] "make-trump_tweets.R"
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"
Loading packages
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.1.3
## Warning in register(): Can't find generic `scale_type` in package ggplot2 to
## register S3 method.
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.1.3
## Loading required package: ggplot2
library(RColorBrewer)
library(highcharter)
## Warning: package 'highcharter' was built under R version 4.1.3
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
##
## Attaching package: 'highcharter'
## The following object is masked from 'package:dslabs':
##
## stars
library(ggplot2)
library(viridis)
## Warning: package 'viridis' was built under R version 4.1.3
## Loading required package: viridisLite
Loading and viewing data
data("us_contagious_diseases")
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.6 v dplyr 1.0.7
## v tidyr 1.1.4 v stringr 1.4.0
## v readr 2.1.1 v forcats 0.5.1
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
view(us_contagious_diseases)
Disease <- write_csv(us_contagious_diseases, "us_contagious_diseases", na="")
Look at column names
colnames(Disease)
## [1] "disease" "state" "year" "weeks_reporting"
## [5] "count" "population"
Filter data (filtering data for only years 1928 and 2011, then filtering for 4 specific states).
ECD <- Disease %>%
filter(year%in%c(1928, 2011)) %>%
filter(state =="Maryland"|state=="Virginia"|state == "Pennsylvania"| state == "District Of Columbia")
Filter data (Filtering data for years greater than 1980 and for 4 specific states).
UCD <- Disease %>% filter(year > 1980) %>%
filter(state =="Maryland"|state=="Virginia"|state == "Pennsylvania"| state == "District Of Columbia")
Filter data(filtering data for years greater than 1980 and for the state of Maryland)
DMD <- Disease %>% filter (year > 1980) %>%
filter(state =="Maryland")
Filter data (Filtering data for years greater than 1980 and for 4 specific states)
diseases_md <- Disease %>% filter(year > 1980) %>%
filter(state =="Maryland")
disease_years <- Disease %>%
filter(year %in% c(1980, 1985, 1990, 1995, 2000, 2005))
Graph of weeks of reporting of each disease over the years 1980 after
DMD %>%
ggplot( aes(weeks_reporting, y= year, color = disease, size = population)) +
geom_point(alpha = 0.5) +
xlab("Weekly Reporting") +
ylab("Year (1980 - 2011")+
facet_grid(~ disease) +
ggtitle ("Weeks Reporting for Maryland from 1980-2011")

Line graph of Diseases in 4 states
highchart () %>%
hc_add_series(data = diseases_md,
type = "line",
hcaes(x = year,
y = count,
group = disease, ))%>%
hc_title(
text="Disease Counts in Maryland since 1980")%>%
hc_xAxis(
title = list(text="Years Since 1980")) %>%
hc_yAxis(
title = list(text="Disease Totals")) %>%
hc_legend(align = "right",
verticalAlign = "top")
highchart () %>%
hc_add_series(data = disease_years,
type = "bar",
hcaes(x = year,
y = count,
group = disease, ))%>%
hc_title(
text="Disease Counts in the US Since 1980")%>%
hc_xAxis(
title = list(text="Years Since 1980")) %>%
hc_yAxis(
title = list(text="Disease Totals")) %>%
hc_legend(align = "right",
verticalAlign = "top")
I used the dataset in Dslabs US contagious diseases. The dataset includes contagious disease data for US states. Before creating the graph, I cleaned the data. I filtered the data to include years 1980 and after because the wide range of years included in the data makes the graph very crowded. So, I looked at more recent years to see how diseases in Maryland compared to the last recorded years in the dataset. For the graph, I used the aesthetics of “weeks reporting” for the x-axis, and year for the y-axis. Each disease described in the dataset has a distinct color and the population is assigned to size. As time proceeds, the population grows and weeks reporting goes up for most of the diseases. Three diseases (measles, mumps, and rubella) all cease to exist around the year 2005. Hepatitis A and Pertussis continue to grow past years 2010. I used alpha to slightly distort the points and used “facet grid” for disease to see all six diseases on separate graphs. The next graph I created is of a line graph of Maryland which includes the disease count. As you can see, there’s a spike in the count of diseases that occurs in 1990 and then dies down and remains quite stagnantg as time goes on. Mumps, measles and Rubella die out in the early 2000s and Hepatitis and Pertussis continue on, surpassing the other diseases.