Useful Packages
library(ggplot2)
library(dplyr)
library(tidyr)
library(tidyverse)
library(esquisse)
library(scales)
library(janitor)
library(lubridate)
library(stringr)
Board Games
board_games<-readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-03-12/board_games.csv")
## Rows: 10532 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): description, image, name, thumbnail, artist, category, compilation...
## dbl (10): game_id, max_players, max_playtime, min_age, min_players, min_play...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#board_games$name[str_detect(board_games$name, "Catan")]
top5 <- board_games %>%
filter(name %in% c("Catan", "Acquire", "Twilight Struggle", "Connect Four", "Mouse Trap")) %>% select(name, year_published, average_rating)
#install.packages("ggthemes")
library(ggthemes)
## Warning: package 'ggthemes' was built under R version 3.6.2
plot <- ggplot()+
geom_point(data = board_games, aes(x=year_published, y=average_rating), alpha=.05) +
geom_smooth(data = board_games, aes(x = year_published, y = average_rating), color="red", se = FALSE) +
geom_point(data = top5, aes(x = year_published, y = average_rating), color = "black") +
ggtitle("A Golden Age Of Board Games?", subtitle= "Average user ratings for board games by original year of production")+
scale_x_discrete(limits = c(1950, 1960, 1970, 1980, 1990, 2000, 2010), name = "",
label = c("1950", "'60", "'70", "'80", "'90", "'00", "'10")) +
ylim(NA, 10.0) +
labs(y = "Average user rating") +
annotate("text", x = top5$year_published[1], y = top5$average_rating[1], label = top5$name[1], vjust = -1, fontface = "bold") +
annotate("text", x = top5$year_published[2], y = top5$average_rating[2], label = "The Settlers of Catan", vjust = -1, fontface = "bold") +
annotate("text", x = top5$year_published[3], y = top5$average_rating[3], label = top5$name[3], vjust = -1, fontface = "bold") +
annotate("text", x = top5$year_published[4], y = top5$average_rating[4], label = top5$name[4], vjust = -1, fontface = "bold") +
annotate("text", x = top5$year_published[5], y = top5$average_rating[5], label = top5$name[5], vjust = -1, fontface = "bold") +
theme_fivethirtyeight() +
theme(plot.title = element_text(vjust = 1), plot.subtitle = element_text(vjust = 2), axis.title.y = element_text(face = "bold"))
## Warning: Continuous limits supplied to discrete scale.
## Did you mean `limits = factor(...)` or `scale_*_continuous()`?
plot
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

Measles
library(colorspace)
## Warning: package 'colorspace' was built under R version 3.6.2
library(dslabs)
## Warning: package 'dslabs' was built under R version 3.6.2
measles <- us_contagious_diseases %>%
filter(!state%in%c("Hawaii","Alaska") & disease == "Measles") %>% mutate(rate = count / population * 100000 * 52 / weeks_reporting)
ggplot(measles, aes(x= year, y= factor(state, levels = rev(levels(factor(state)))))) +
geom_tile(aes(fill= rate), color = "white", size= 0.25) +
#scale_fill_distiller(trans="sqrt", palette="Spectral", direction=-1, aesthetics="fill",na.value="white") +
# scale_fill_gradient2(
# #trans="sqrt",
# low = "deepskyblue",
# mid = "yellow",
# high = "red",
# midpoint = 1700,
# na.value = "white"
#)+
scale_fill_gradientn(colours =c("aliceblue", "deepskyblue","springgreen4",
"yellow","gold","darkgoldenrod1",
"orange","red","firebrick1","firebrick2",
"firebrick3","firebrick4"),
na.value = "white")+
geom_vline(xintercept = 1963, color = "black", size = 1.2)+
labs(x="",y="", title= "Measles",
subtitle = "Vaccine introduced",
caption = "Note: CDC data from 2003-2012 comes from its Summary of Notifiable Diseases, which\npublishes yearly rather than weekly and counts confirmed cases as opposed\nto provisional ones.") +
theme_classic()+
theme(
axis.line=element_blank(),
axis.ticks=element_line(size=0.4),
plot.background=element_blank(),
plot.margin=margin(0.7,0.4,0.1,0.2,"cm"),
plot.title = element_text(size =12, face = "bold"),
plot.subtitle= element_text(size = 7, hjust = 0.55),
plot.caption.position = "panel",
plot.caption = element_text(hjust = 0, size = 7),
panel.grid = element_blank(),
legend.position = "bottom",
legend.title = element_blank(),
legend.margin=margin(grid::unit(0,"cm")),
legend.key.width=grid::unit(0.8,"cm"),
legend.key.height=grid::unit(0.2,"cm")
)

California Wild Fires
wildfires<-readr::read_csv("https://raw.githubusercontent.com/BuzzFeedNews/2018-07-wildfire-trends/master/data/calfire_frap.csv") %>%
mutate(plot_date = as.Date(format(alarm_date,"2017-%m-%d")))
## Rows: 14847 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): state, agency, unit_id, fire_name, inc_num, comments, fire_num
## dbl (9): objectid, year_, cause, report_ac, gis_acres, c_method, objective,...
## date (2): alarm_date, cont_date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(lubridate)
library(scales)
ggplot(wildfires, aes(x=plot_date, y=year_, size=shape_area))+
geom_point(alpha=0.4, color="orange")+
scale_y_reverse(n.breaks=4, labels=c("1950","1970","1990","2010"))+
ggtitle("Big fires have gotten more common.")+
theme(plot.background = element_rect(fill = "black"),
panel.background = element_rect(fill="black"),
panel.grid=element_blank(),
panel.grid.major.y = element_line(color="grey"),
panel.grid.minor.y=element_line(color="grey"),
axis.title=element_blank(),
axis.text=element_text(color="lightgrey", face="bold"),
legend.position = "none",
title=element_text(color="lightgrey"))+
scale_x_date(date_breaks="1 month",
date_labels=c("Dec","Jan","Feb","Mar", "Apr","May", "Jun","Jul",
"Aug","Sep","Oct","Nov"))+
scale_size(range=c(0.1,9))
## Warning: Removed 1617 rows containing missing values (geom_point).

Foul Balls
foul_balls<-readr::read_csv("https://raw.githubusercontent.com/fivethirtyeight/data/master/foul-balls/foul-balls.csv") %>%
mutate(velocity_mag = if_else(exit_velocity < 90, "< 90 mph", "> 90 mph", missing = "Unknown"))
## Rows: 906 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): matchup, type_of_hit
## dbl (4): exit_velocity, predicted_zone, camera_zone, used_zone
## date (1): game_date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ggplot(foul_balls,aes(predicted_zone,fill= velocity_mag))+
geom_bar()+
coord_flip()+
scale_fill_manual(values=c("#A0BF77", "#73A9AF", "#D3D3D3"))+
labs(title = "The hardest-hit fouls seem to land in unprotected areas")+
xlab("Zone")+
scale_x_continuous(trans="reverse",breaks=seq(from=1,to=7,by=1))+
theme(panel.border = element_blank(),
panel.background = element_blank(),
axis.line = element_blank(),
axis.ticks = element_blank(),
axis.title.x=element_blank(),
axis.title.y = element_text(angle=0,hjust=0.1),
plot.title = element_text(),
axis.text.x = element_blank())
