##Contagious disease data for US states

Load dplyr for loading and processing data

data(package="dslabs")
list.files(system.file("script", package = "dslabs"))
##  [1] "make-admissions.R"                   
##  [2] "make-brca.R"                         
##  [3] "make-brexit_polls.R"                 
##  [4] "make-death_prob.R"                   
##  [5] "make-divorce_margarine.R"            
##  [6] "make-gapminder-rdas.R"               
##  [7] "make-greenhouse_gases.R"             
##  [8] "make-historic_co2.R"                 
##  [9] "make-mnist_27.R"                     
## [10] "make-movielens.R"                    
## [11] "make-murders-rda.R"                  
## [12] "make-na_example-rda.R"               
## [13] "make-nyc_regents_scores.R"           
## [14] "make-olive.R"                        
## [15] "make-outlier_example.R"              
## [16] "make-polls_2008.R"                   
## [17] "make-polls_us_election_2016.R"       
## [18] "make-reported_heights-rda.R"         
## [19] "make-research_funding_rates.R"       
## [20] "make-stars.R"                        
## [21] "make-temp_carbon.R"                  
## [22] "make-tissue-gene-expression.R"       
## [23] "make-trump_tweets.R"                 
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"

Loading dataset from package dslabs

library("dslabs")
data("us_contagious_diseases")

Loading required packages

library(RColorBrewer)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)

Create a chart using us contagious diseases dataset

us_contagious_diseases_chart <-ggplot(us_contagious_diseases, aes(x = year, y = count)) +
  xlab("Year") + 
  ylab("Count") +
  ggtitle("Contagious Diseases in the United States from 1928 to 2011") +
  theme_minimal(base_size = 14, base_family = "serif")

Add a layer with points

us_contagious_diseases_chart +
  geom_point()

Customize the two layers we have added to the chart

us_contagious_diseases_chart +
  geom_point(size = 3, alpha = 0.5) +
  geom_smooth(method = lm, se=FALSE, color = "red")
## `geom_smooth()` using formula = 'y ~ x'

##customize, coloring the points by different types of diseases

us_contagious_diseases_chart + 
  geom_point(size = 3, alpha = 0.5, aes(color = disease)) +
  geom_smooth(method = lm, se  =FALSE, color = "black", lty = 2, size = 0.4)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## `geom_smooth()` using formula = 'y ~ x'

##Set the axis ranges, and use a different color palette

us_contagious_diseases_chart + 
  geom_point(size = 3, alpha = 0.8, aes(color = disease)) +
  geom_smooth(method = lm, se = FALSE, color = "black", lty=1, size = 0.4) + 
  ylab("Disease Reported Cases") +
  scale_x_continuous(limits=c(1940,2010)) + 
  scale_y_continuous(limits=c(10^3,10^5)) +
  scale_color_brewer(name="Disease", palette = "Set2") 
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 13727 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 13727 rows containing missing values (`geom_point()`).
## Warning: Removed 16 rows containing missing values (`geom_smooth()`).

Install and load highcharter package

library(highcharter)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## 
## Attaching package: 'highcharter'
## The following object is masked from 'package:dslabs':
## 
##     stars

Create a chart with default settings and add meaningful x-axe, y-axe, and a title

highchart() %>%
  hc_add_series(data = us_contagious_diseases,
                  type = "scatter", 
                  hcaes(x = year,
                  y = count,
                  group = state)) %>%
  hc_xAxis(title = list(text="Year")) %>%
  hc_yAxis(title = list(text="Disease Reported Cases")) %>%
  hc_title(text = "Contagious Diseases in the United States from 1930 to 2010") 

Customize the symbol

highchart() %>%
  hc_add_series(data = us_contagious_diseases,
                  type = "scatter", 
                  hcaes(x = year,
                  y = count,
                  group = state)) %>%
  hc_xAxis(title = list(text="Year")) %>%
  hc_yAxis(title = list(text="Disease Reported Cases")) %>%
  hc_title(text = "Contagious Diseases in the United States from 1928 to 2011") %>%
  hc_plotOptions(series = list(marker = list(symbol = "circle")))

Customize the tooltips and change the default theme.

Customize the tooptips with more information about the year, type of disease, and number of cases. I use
to break information in tooltip. Also, I changed the theme to theme_economist for a better visualization.

highchart() %>%
  hc_add_series(data = us_contagious_diseases,
                  type = "scatter", 
                  hcaes(x = year,
                  y = count,
                  group = state)) %>%
  hc_xAxis(title = list(text="Year")) %>%
  hc_yAxis(title = list(text="Disease Reported Cases")) %>%
  hc_title(text = "Contagious Diseases in the United States from 1928 to 2011") %>%
  hc_plotOptions(series = list(marker = list(symbol = "circle"))) %>%
  hc_add_theme(hc_theme_economist()) %>%
  hc_tooltip(shared = TRUE,
             pointFormat = "Year: {point.year}<br>Disease: {point.disease}<br>Cases: {point.count}")

Contagious Diseases in the United States from 1928 to 2011 visualization is demonstrated by a scatterplot, using DS Labs and HighCharter. The variables represented in the scatterplot are year, count, and states. Year is the year reported. Count is the number of reported cases, and the legend is the list of all states all over the U.S. When I mouse over each symbol, I can get enough information of each state, a specific year, a type of disease, and a number of reported cases. I used these variables because I wanted to see the evolvement of contagious diseases in the U.S over time. The number of diseases has drastically decreased over time. The number of contagious disease cases and the types of diseases are substantially reduced. The reduction is mostly likely due to the advancement of medical care and vaccination. Since there are numerous states, it could be difficult for readers to view the chart in bright color. That is why I decided to change the theme for a better visualization. What I found fascinating is the tooltip in high chart. I customized it with information of the year, type of disease, and number of cases in each state. It was all in one line at first, which made it not so clear and straightforward for chart readers. I figured using
could help to break the information into different lines. Another idea I would like to execute is with the weeks_report variable. I was just thinking diseases would be more likely to be evolved in cold months. A histogram or bar graph could reflect it well ( but bar graph is not encouraged in this assignment). There should be a relationship with seasons when disease cases are reported. There could be more diseases reported in the latter weeks of the year than the beginning of the year.