Loading library

# install.packages("dslabs")  # these are data science labs
library("dslabs")
## Warning: package 'dslabs' was built under R version 4.1.3
data(package="dslabs")
list.files(system.file("script", package = "dslabs"))
##  [1] "make-admissions.R"                   
##  [2] "make-brca.R"                         
##  [3] "make-brexit_polls.R"                 
##  [4] "make-death_prob.R"                   
##  [5] "make-divorce_margarine.R"            
##  [6] "make-gapminder-rdas.R"               
##  [7] "make-greenhouse_gases.R"             
##  [8] "make-historic_co2.R"                 
##  [9] "make-mnist_27.R"                     
## [10] "make-movielens.R"                    
## [11] "make-murders-rda.R"                  
## [12] "make-na_example-rda.R"               
## [13] "make-nyc_regents_scores.R"           
## [14] "make-olive.R"                        
## [15] "make-outlier_example.R"              
## [16] "make-polls_2008.R"                   
## [17] "make-polls_us_election_2016.R"       
## [18] "make-reported_heights-rda.R"         
## [19] "make-research_funding_rates.R"       
## [20] "make-stars.R"                        
## [21] "make-temp_carbon.R"                  
## [22] "make-tissue-gene-expression.R"       
## [23] "make-trump_tweets.R"                 
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"

Loading packages

library(ggthemes)
## Warning: package 'ggthemes' was built under R version 4.1.3
## Warning in register(): Can't find generic `scale_type` in package ggplot2 to
## register S3 method.
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.1.3
## Loading required package: ggplot2
library(RColorBrewer)
library(highcharter)
## Warning: package 'highcharter' was built under R version 4.1.3
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## 
## Attaching package: 'highcharter'
## The following object is masked from 'package:dslabs':
## 
##     stars
library(ggplot2)
library(viridis)
## Warning: package 'viridis' was built under R version 4.1.3
## Loading required package: viridisLite

Loading and viewing data

data("us_contagious_diseases")
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.6     v dplyr   1.0.7
## v tidyr   1.1.4     v stringr 1.4.0
## v readr   2.1.1     v forcats 0.5.1
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
view(us_contagious_diseases)
Disease <- write_csv(us_contagious_diseases, "us_contagious_diseases", na="")

Look at column names

colnames(Disease)
## [1] "disease"         "state"           "year"            "weeks_reporting"
## [5] "count"           "population"

Filter data (filtering data for only years 1928 and 2011, then filtering for 4 specific states).

ECD <- Disease %>%
  filter(year%in%c(1928, 2011)) %>%
 filter(state =="Maryland"|state=="Virginia"|state == "Pennsylvania"| state == "District Of Columbia")

Filter data (Filtering data for years greater than 1980 and for 4 specific states).

UCD <- Disease %>% filter(year > 1980) %>%
 filter(state =="Maryland"|state=="Virginia"|state == "Pennsylvania"| state == "District Of Columbia")

View UCD

view(UCD)

Filter data(filtering data for years greater than 1980 and for the state of Maryland)

DMD <- Disease %>% filter (year > 1980) %>%
 filter(state =="Maryland")

Filter data (Filtering data for years greater than 1980 and for 4 specific states)

 diseases_4 <- Disease %>% filter(year > 1980) %>%
 filter(state =="California"|state=="New York"|state == "Pennsylvania"| state == "Texas")

Graph of weeks of reporting of each disease over the years 1980 after

DMD %>%
ggplot( aes(weeks_reporting, y= year, color = disease, size = population)) +
geom_point(alpha = 0.5) +
xlab("Weekly Reporting") +
  ylab("Year (1980 - 2011")+
facet_grid(~ disease) +
 ggtitle ("Weeks Reporting for Maryland from 1980-2011")

Line graph of Diseases in 4 states

highchart () %>%
  hc_add_series(data =  diseases_4,
                   type = "line",
                   hcaes(x = state,
                   y = count, 
                   group = disease, ))%>%
                    hc_title(
                            text="Disease Counts of 4 Major States")%>%
                    hc_xAxis(
                            title = list(text="State")) %>%
                    hc_yAxis(
                            title = list(text="Count")) %>%
                    hc_legend(align = "right", 
                              verticalAlign = "top")

I used the dataset in Dslabs US contagious diseases. The dataset includes contagious disease data for US states. Before creating the graph, I cleaned the data. I filtered the data to include years 1980 and after because the wide range of years included in the data makes the graph very crowded. So, I looked at more recent years to see how diseases in Maryland compared to the last recorded years in the dataset. For the graph, I used the aesthetics of “weeks reporting” for the x-axis, and year for the y-axis. Each disease described in the dataset has a distinct color and the population is assigned to size. As time proceeds, the population grows and weeks reporting goes up for most of the diseases. Three diseases (measles, mumps, and rubella) all cease to exist around the year 2005. Hepatitis A and Pertussis continue to grow past years 2010. I used alpha to slightly distort the points and used “facet grid” for disease to see all six diseases on separate graphs. The next graph I created is of a line graph of four of some the most populous states(California, New York, Pennsylvania, Texas) which includes the count of each state’s contagious diseases. Each line is a specific disease and within that line shows all 4 states’ disease count. As you can see, the highest count of diseases occur during the start of the data and die down as time goes on with the exception of Hepatitis A.