#install.packages("dslabs")
#install.packages("tidyverse")
#install.packages("ggplot2")
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.5.3
## -- Attaching packages --------------------------------------- tidyverse 1.2.1 --
## v tibble 2.1.3 v purrr 0.3.3
## v tidyr 1.0.0 v dplyr 0.8.3
## v readr 1.3.1 v stringr 1.4.0
## v tibble 2.1.3 v forcats 0.4.0
## Warning: package 'tibble' was built under R version 3.5.3
## Warning: package 'tidyr' was built under R version 3.5.3
## Warning: package 'readr' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.3
## Warning: package 'stringr' was built under R version 3.5.3
## Warning: package 'forcats' was built under R version 3.5.3
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 3.5.3
library(dslabs)
## Warning: package 'dslabs' was built under R version 3.5.3
data(package="dslabs")
list.files(system.file("script", package="dslabs"))
## [1] "make-admissions.R"
## [2] "make-brca.R"
## [3] "make-brexit_polls.R"
## [4] "make-death_prob.R"
## [5] "make-divorce_margarine.R"
## [6] "make-gapminder-rdas.R"
## [7] "make-greenhouse_gases.R"
## [8] "make-historic_co2.R"
## [9] "make-mnist_27.R"
## [10] "make-movielens.R"
## [11] "make-murders-rda.R"
## [12] "make-na_example-rda.R"
## [13] "make-nyc_regents_scores.R"
## [14] "make-olive.R"
## [15] "make-outlier_example.R"
## [16] "make-polls_2008.R"
## [17] "make-polls_us_election_2016.R"
## [18] "make-reported_heights-rda.R"
## [19] "make-research_funding_rates.R"
## [20] "make-stars.R"
## [21] "make-temp_carbon.R"
## [22] "make-tissue-gene-expression.R"
## [23] "make-trump_tweets.R"
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"
data("murders")
str(murders)
## 'data.frame': 51 obs. of 5 variables:
## $ state : chr "Alabama" "Alaska" "Arizona" "Arkansas" ...
## $ abb : chr "AL" "AK" "AZ" "AR" ...
## $ region : Factor w/ 4 levels "Northeast","South",..: 2 4 4 2 4 4 1 2 2 2 ...
## $ population: num 4779736 710231 6392017 2915918 37253956 ...
## $ total : num 135 19 232 93 1257 ...
write_csv(murders, "murders.csv", na="")
r <- murders %>%
summarize(rate=sum(total)/sum(population)*10^6) %>%
pull(rate)
ds_theme_set()
murders %>%
ggplot(aes(x=population/10^6, y=total, label=abb)) +
geom_abline(intercept=log10(r), lty=2, col="darkgrey") +
geom_point(aes(color=region), size=3) +
geom_text_repel(nudge_x = 0.005) +
scale_x_log10("Populations in millions (log scale)") +
scale_y_log10("Total number of murders (log scale)") +
ggtitle("US Gun Murders in 2010") +
scale_color_discrete(name="Region") +
# Remove legend title
theme(legend.title=element_blank())
data("gapminder")
west <- c("Western Europe", "Northern Europe", "Southern Europe", "Northern America", "Australia and New Zealand")
gapminder <- gapminder %>%
mutate(group=case_when(
region %in% west ~ "The West",
region %in% c("Eastern Asia", "South-Eastern Asia") ~ "East Asia",
region %in% c("Caribbean", "Central America", "South America") ~ "Latin America",
continent =="Africa" & region != "Northern Africa" ~ "Sub-Saharan Africa",
TRUE ~ "Others"))
gapminder <- gapminder %>%
mutate(group = factor(group, levels = rev(c("Others", "Latin America", "East Asia", "Sub-Saharan Africa", "The West"))))
gapminder %>%
filter(year %in% c(1962, 2013) & !is.na(group) &
!is.na(fertility) & !is.na(life_expectancy)) %>%
mutate(population_in_millions = population/10^6) %>%
ggplot(aes(fertility, y=life_expectancy, col=group, size=population_in_millions)) +
geom_point(alpha = 0.8) +
guides(size=FALSE) +
theme(plot.title = element_blank(), legend.title = element_blank()) +
coord_cartesian(ylim=c(30,85)) +
xlab("Fertility rate (births per woman)") +
ylab("Life Expectancy") +
geom_text(aes(x=7,y=82,label=year), cex=12, color="grey") +
facet_grid(.~year) +
theme(strip.background = element_blank(),
strip.text.x=element_blank(),
strip.text.y=element_blank(),
legend.position="top")
library(RColorBrewer)
## Warning: package 'RColorBrewer' was built under R version 3.5.2
data("us_contagious_diseases")
the_disease <- "Measles"
us_contagious_diseases %>%
filter(!state%in%c("Hawaii","Alaska") & disease == the_disease) %>%
mutate(rate=count/population*10000*52/weeks_reporting) %>%
mutate(state=reorder(state,rate)) %>%
ggplot(aes(year,state,fill=rate)) +
geom_tile(color="grey50") +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors=brewer.pal(9,"Reds"),trans="sqrt") +
geom_vline(xintercept=1963,col="blue") +
theme_minimal() + theme(panel.grid = element_blank()) +
ggtitle(the_disease) +
ylab("") +
xlab("")
data(polls_us_election_2016)
polls_us_election_2016 %>%
filter(state == "U.S." & enddate >= "2016-07-01") %>%
select(enddate, pollster, rawpoll_clinton, rawpoll_trump) %>%
rename(Clinton = rawpoll_clinton, Trump = rawpoll_trump) %>%
gather(candidate, percentage, -enddate, -pollster) %>%
mutate(candidate = factor(candidate, levels = c("Trump", "Clinton"))) %>%
group_by(pollster) %>%
filter(n() >=10) %>%
ungroup() %>%
ggplot(aes(enddate, percentage, color = candidate)) +
geom_point(show.legend = FALSE, alpha = 0.4) +
geom_smooth(method = "loess", span = 0.15) +
scale_y_continuous(limits = c(30,50))
## Warning: Removed 22 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).
library(readr)
library(ggplot2)
library(scales)
## Warning: package 'scales' was built under R version 3.5.3
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(dplyr)
#install.packages("highcharter")
library(highcharter)
## Warning: package 'highcharter' was built under R version 3.5.3
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
##
## Attaching package: 'highcharter'
## The following object is masked from 'package:dslabs':
##
## stars
nations <- read_csv("nations.csv") %>%
mutate(gdp_tn = gdp_percap*population/1000000000000)
## Parsed with column specification:
## cols(
## iso2c = col_character(),
## iso3c = col_character(),
## country = col_character(),
## year = col_double(),
## gdp_percap = col_double(),
## population = col_double(),
## birth_rate = col_double(),
## neonat_mortal_rate = col_double(),
## region = col_character(),
## income = col_character()
## )
big4 <- nations %>%
filter(iso3c =="CHN" | iso3c == "DEU" | iso3c == "JPN" | iso3c == "USA") %>%
arrange(year)
highchart() %>%
hc_add_series(data = big4, type = "line", hcaes(x=year, y=gdp_tn, group=country))
## Warning: `parse_quosure()` is deprecated as of rlang 0.2.0.
## Please use `parse_quo()` instead.
## This warning is displayed once per session.
cols <- brewer.pal(4, "Set1")
highchart() %>%
hc_add_series(data = big4, type = "line", hcaes(x=year, y= gdp_tn, group = country)) %>%
hc_colors(cols)
highchart() %>%
hc_add_series(data = big4, type = "line", hcaes(x=year, y=gdp_tn, group=country)) %>%
hc_colors(cols) %>%
hc_xAxis(title=list(text="Year")) %>%
hc_yAxis(title = list(text = "GDP ($trillion)"))
highchart() %>%
hc_add_series(data = big4, type = "line", hcaes(x=year, y=gdp_tn, group=country)) %>%
hc_colors(cols) %>%
hc_xAxis(title = list(text = "Year")) %>%
hc_yAxis(title = list(text = "GDP ($ trillion)")) %>%
hc_plotOptions(series = list(marker = list(symbol = "circle"))) %>%
hc_legend(align = "right", verticalAlign = "top")
big4_chart <- highchart() %>%
hc_add_series(data = big4, type = "line", hcaes(x=year, y=gdp_tn, group=country)) %>%
hc_colors(cols) %>%
hc_xAxis(title = list(text = "Year")) %>%
hc_yAxis(title = list(text = "GDP ($ trillion)")) %>%
hc_plotOptions(series = list(marker = list(symbol = "circle"))) %>%
hc_legend(align = "right", verticalAlign = "top") %>%
hc_tooltip(shared = TRUE, borderColor = "black", pointFormat = "{point.country}: {point.gdp_tn:.2f}<br>")
big4_chart
regions <- nations %>%
group_by(year,region) %>%
summarize(gdp_tn = sum(gdp_tn, na.rm = TRUE)) %>%
arrange(year, region)
highchart() %>%
hc_add_series(data = regions, type = "area", hcaes(x=year, y=gdp_tn, group=region))
cols <- brewer.pal(7, "Set2")
highchart () %>%
hc_add_series(data = regions, type = "area", hcaes(x = year, y = gdp_tn, group = region)) %>%
hc_colors(cols) %>%
hc_chart(style = list(fontFamily = "Georgia", fontWeight = "bold")) %>%
hc_plotOptions(series = list(stacking = "normal", marker = list(enabled = FALSE, states = list(hover = list(enabled = FALSE))), lineWidth = 0.5, lineColor = "white")) %>%
hc_xAxis(title = list(text = "Year")) %>%
hc_yAxis(title = list(text = "GDP ($ trillion)")) %>%
hc_legend(align = "right", verticalAlign = "top", layout = "vertical") %>%
hc_tooltip(enabled = FALSE)
cols <- brewer.pal(7, "Set2")
highchart () %>%
hc_add_series(data = regions, type = "area", hcaes(x = year, y = gdp_tn, group = region)) %>%
hc_colors(cols) %>%
hc_chart(style = list(fontFamily = "Georgia", fontWeight = "bold")) %>%
hc_plotOptions(series = list(stacking = "percent", marker = list(enabled = FALSE, states = list(hover = list(enabled = FALSE))), lineWidth = 0.5, lineColor = "white")) %>%
hc_xAxis(title = list(text = "Year")) %>%
hc_yAxis(title = list(text = "GDP ($ trillion)")) %>%
hc_legend(align = "right", verticalAlign = "top", layout = "vertical") %>%
hc_tooltip(enabled = FALSE)
cols <- c("red", "black")
food_stamps <-read_csv("food_stamps.csv")
## Parsed with column specification:
## cols(
## year = col_double(),
## participants = col_double(),
## costs = col_double()
## )
highchart() %>%
hc_yAxis_multiples(list(title = list(text = "Participants (millions)")),list(title = list(text = "Costs ($ billions)"), opposite = TRUE)) %>%
hc_add_series(data = food_stamps$participants, name = "Participants (millions)", type = "column", yAxis = 0) %>%
hc_add_series(data = food_stamps$costs, name = "Costs ($ billions)", type = "line", yAxis = 1) %>%
hc_xAxis(categories = food_stamps$year, tickInterval = 5) %>%
hc_colors(cols) %>%
hc_chart(style = list(fontFamily = "Georgia", fontWeight = "bold"))
library(dplyr)
library(highcharter)
sickly <- us_contagious_diseases %>%
filter(!state%in%c("Hawaii","Alaska")) %>%
filter(year>=1965 & year<=1975) %>%
group_by(disease,year) %>%
summarise(diseasetotal=sum(count))
head(sickly)
## # A tibble: 6 x 3
## # Groups: disease [1]
## disease year diseasetotal
## <fct> <dbl> <dbl>
## 1 Hepatitis A 1966 32270
## 2 Hepatitis A 1967 38196
## 3 Hepatitis A 1968 44998
## 4 Hepatitis A 1969 45834
## 5 Hepatitis A 1970 54838
## 6 Hepatitis A 1971 58527
sicklychart <- highchart() %>%
hc_add_series(data = sickly, type = "line", hcaes(x=year, y=diseasetotal, group = disease)) %>%
hc_xAxis(title = list(text="Year")) %>%
hc_yAxis(title = list(text="Disease Count")) %>%
hc_plotOptions(series = list(marker = list(symbol = "circle"))) %>%
hc_legend(align = "right", verticalAlign = "top") %>%
hc_tooltip(shared = TRUE, borderColor = "black", pointFormat = "{point.disease}: {point.diseasetotal:.2f}<br>")
sicklychart