October 15, 2016
Datasets for today are here: https://www.dropbox.com/s/zhmn02ti0ggxdj7/rladies_ggplot2_datasets.rda?dl=1
You can download them from R:
download.file( 'https://www.dropbox.com/s/zhmn02ti0ggxdj7/ rladies_ggplot2_datasets.rda?dl=1', 'rladies_ggplot2_datasets.rda')
attach('rladies_ggplot2_datasets.rda')
hourly_df: hourly summary of rides
durham_voters_df: one row per voter
download.file(
'https://www.dropbox.com/s/zhmn02ti0ggxdj7/rladies_ggplot2_datasets.rda?dl=1',
'rladies_ggplot2_datasets.rda')
attach('rladies_ggplot2_datasets.rda')
library(tidyverse)
ggplot(data = daily_df) + geom_point(mapping = aes(x = ride_date, y = n_rides))
ggplot(data = daily_df, mapping = aes(x = ride_date, y = n_rides)) + geom_point()
ggplot(daily_df, aes(x = ride_date, y = n_rides)) + geom_point()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides)) + geom_point()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides)) + geom_point()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_point()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, size = n_riders)) + geom_point()
daily_df %>%
ggplot(aes(x = ride_date, y = n_rides, color = day_of_week %in% c('Sat', 'Sun'))) +
geom_point()
daily_df %>%
mutate(day_type = if_else(day_of_week %in% c('Sat', 'Sun'),
'Weekend',
'Weekday')) %>%
ggplot(aes(x = ride_date, y = n_rides, color = day_type)) +
geom_point()
daily_df %>% ggplot() + geom_point(aes(x = ride_date, y = n_rides, color = 'blue'))
daily_df %>% ggplot() + geom_point(aes(x = ride_date, y = n_rides), color = 'blue')
daily_df %>% mutate(day_type = if_else(day_of_week %in% c('Sat', 'Sun'), 'Weekend', 'Weekday')) %>% ggplot(aes(x = ride_date, y = n_rides, color = day_type)) %>% geom_point()
Plot the number of unique routes per day over time, colored by day of week. (n_unique_routes)
daily_df %>% ggplot(aes(x = ride_date, y = n_unique_routes, color = day_of_week)) + geom_point()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides)) + geom_point()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides)) + geom_point() + geom_line()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides)) + geom_point() + geom_smooth(span = .1) # try changing span
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_point() + geom_line()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_smooth(span = .2, se = FALSE)
daily_df %>%
ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_point(data = filter(daily_df, !(day_of_week %in% c('Sat', 'Sun')) & n_rides < 200),
size = 5, color = 'gray') +
geom_point()
low_weekdays_df <- daily_df %>%
filter(!(day_of_week %in% c('Sat', 'Sun')) & n_rides < 100)
daily_df %>%
ggplot(aes(x = ride_date, y = n_rides,
color = day_of_week, label = ride_date)) +
geom_point(data = low_weekdays_df, size = 5, color = 'gray') +
geom_text(data = low_weekdays_df, aes(y = n_rides + 15),
size = 2, color = 'black') +
geom_point()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides)) + geom_point() + geom_line() + facet_wrap( ~ day_of_week)
durham_voters_df %>%
group_by(race_code, gender_code, age) %>%
summarize(n_voters = n(),
n_rep = sum(party == 'REP')) %>%
filter(gender_code %in% c('F','M') &
race_code %in% c('W', 'B', 'A') &
age != 'Age < 18 Or Invalid Birth Date') %>%
ggplot(aes(x = age, y = n_voters)) +
geom_bar(stat = 'identity') +
facet_grid(race_code ~ gender_code)
durham_voters_df %>%
group_by(race_code, gender_code, age) %>%
summarize(n_voters = n(),
n_rep = sum(party == 'REP')) %>%
filter(gender_code %in% c('F','M') &
race_code %in% c('W', 'B', 'A') &
age != 'Age < 18 Or Invalid Birth Date') %>%
ggplot(aes(x = age, y = n_voters)) +
geom_bar(stat = 'identity') +
facet_grid(race_code ~ gender_code, scales = 'free_y')
Note: better to use gather
durham_voters_df %>%
group_by(race_code, gender_code, age) %>%
summarize(n_voters = n(),
n_rep = sum(party == 'REP')) %>%
filter(gender_code %in% c('F','M') &
race_code %in% c('W', 'B', 'A') &
age != 'Age < 18 Or Invalid Birth Date') %>%
mutate(age_cat = as.numeric(as.factor(age))) %>%
ggplot(aes(x = age, y = n_voters)) +
geom_point() +
geom_line(aes(x = age_cat)) +
geom_line(aes(x = age_cat, y = n_rep), color = 'red') +
geom_point(aes(y = n_rep), color = 'red') +
facet_grid(race_code ~ gender_code, scales = 'free_y') +
expand_limits(y = 0)
daily_df %>%
ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) +
geom_smooth(span = .2, se = FALSE) +
xlab('') +
ylab('# of Transit Rides') +
ggtitle('Transit Rides over time by Day of Week') +
scale_color_discrete('Day of Week')
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_point() + scale_y_reverse()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_point() + scale_y_sqrt()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_point() + scale_y_continuous(breaks = c(0, 200, 500))
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_point() + theme_bw()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_point() + theme_dark()
daily_df %>% ggplot(aes(x = ride_date, y = n_rides, color = day_of_week)) + geom_point() + theme(axis.text.x = element_text(angle = 90))
To really master themes:
Better than the regular documentation:
Check it out!
Monday, November 14th, 6pm
Location:
Topic:
Topic/format suggestions?