library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 1.0.0
## v tidyr 1.1.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
getwd()
## [1] "C:/Users/Jerome/Documents/Data_Science_110/Datasets"
library(dslabs)
data(packages="dslabs")
## Warning in data(packages = "dslabs"): data set 'dslabs' not found
list.files(system.file("script", package = "dslabs"))
## [1] "make-admissions.R"
## [2] "make-brca.R"
## [3] "make-brexit_polls.R"
## [4] "make-death_prob.R"
## [5] "make-divorce_margarine.R"
## [6] "make-gapminder-rdas.R"
## [7] "make-greenhouse_gases.R"
## [8] "make-historic_co2.R"
## [9] "make-mnist_27.R"
## [10] "make-movielens.R"
## [11] "make-murders-rda.R"
## [12] "make-na_example-rda.R"
## [13] "make-nyc_regents_scores.R"
## [14] "make-olive.R"
## [15] "make-outlier_example.R"
## [16] "make-polls_2008.R"
## [17] "make-polls_us_election_2016.R"
## [18] "make-reported_heights-rda.R"
## [19] "make-research_funding_rates.R"
## [20] "make-stars.R"
## [21] "make-temp_carbon.R"
## [22] "make-tissue-gene-expression.R"
## [23] "make-trump_tweets.R"
## [24] "make-weekly_us_contagious_diseases.R"
## [25] "save-gapminder-example-csv.R"
data("gapminder")
west <- c("Western Europe","Northern Europe","Southern Europe",
"Northern America","Australia and New Zealand")
gapminder <- gapminder %>%
mutate(group = case_when(
region %in% west ~ "The West",
region %in% c("Eastern Asia", "South-Eastern Asia") ~ "East Asia",
region %in% c("Caribbean", "Central America", "South America") ~ "Latin America",
continent == "Africa" & region != "Northern Africa" ~ "Sub-Saharan Africa",
TRUE ~ "Others"))
gapminder <- gapminder %>%
mutate(group = factor(group, levels = rev(c("Others", "Latin America", "East Asia","Sub-Saharan
Africa", "The West"))))
filter(gapminder, year%in%c(1962, 2013) & !is.na(group) &
!is.na(fertility) & !is.na(life_expectancy)) %>%
mutate(population_in_millions = population/10^6) %>%
ggplot( aes(fertility, y=life_expectancy, col = group, size = population_in_millions)) +
geom_point(alpha = 0.8) +
guides(size=FALSE) +
theme(plot.title = element_blank(), legend.title = element_blank()) +
coord_cartesian(ylim = c(30, 85)) +
xlab("Fertility rate (births per woman)") +
ylab("Life Expectancy") +
geom_text(aes(x=7, y=82, label=year), cex=12, color="grey") +
facet_grid(. ~ year) +
theme(strip.background = element_blank(),
strip.text.x = element_blank(),
strip.text.y = element_blank(),
legend.position = "top")
## Measles in the Lower 48
library(RColorBrewer)
data("us_contagious_diseases")
the_disease <- "Measles"
us_contagious_diseases %>%
filter(!state%in%c("Hawaii","Alaska") & disease == the_disease) %>%
mutate(rate = count / population * 10000 * 52 / weeks_reporting) %>%
mutate(state = reorder(state, rate)) %>%
ggplot(aes(year, state, fill = rate)) +
geom_tile(color = "grey50") +
scale_x_continuous(expand=c(0,0)) +
scale_fill_gradientn(colors = brewer.pal(9, "Reds"), trans = "sqrt") +
geom_vline(xintercept=1963, col = "blue") +
theme_minimal() + theme(panel.grid = element_blank()) +
ggtitle(the_disease) +
ylab("") +
xlab("")