David Ranzolin
November 17, 2016


install.packages("tidyverse")
library(tidyverse) #loads readr, dplyr, tidyr, purrr, ggplot2, tibble
Fast and friendly way to read rectangular data.
| To… | Use… |
|---|---|
| Read delimited files | read_delim(), read_csv(), read_tsv() |
| Read lines | read_lines() |
| Read fixed width files | read_fwf(), read_table() |
df = data_frame(x = 1:3, y = x^2, z = sample(letters, 3, replace = TRUE))
write_csv(df, "my_csv.csv")
read_csv("my_csv.csv")
# A tibble: 3 × 3
x y z
<int> <dbl> <chr>
1 1 1 v
2 2 4 z
3 3 9 i
Manipulate data frames: filtering, selecting, mutating, etc.
| To… | Use… |
|---|---|
| Select columns | select() |
| Subset rows | filter() |
| Create additional columns | mutate() |
| Calculate summary statistics | summarize() |
| Order rows | arrange() |
| Perform joins | inner_join(), left_join(), anti_join(), etc. |
| Group | group_by() |
select(iris, Sepal.Length, Petal.Length, Species)
filter(iris, Sepal.Length > 7)
mutate(iris, sepal = Sepal.Length + Sepal.Width)
summarize(iris, avg = mean(Sepal.Length))
The pipe (%>%) chains tidyverse functions together:
iris %>%
filter(Sepal.Length > 4) %>%
group_by(Species) %>%
summarize(avg = mean(Sepal.Width)) %>%
arrange(desc(avg))
# A tibble: 3 × 2
Species avg
<fctr> <dbl>
1 setosa 3.428
2 virginica 2.974
3 versicolor 2.770
Package to tidy and reshape data.
| To… | Use… |
|---|---|
| Make wide data long | gather() |
| Make long data wide | spread() |
table4 %>% gather(year, cases, -country)

Work with lists and facilitate iteration.
| To… | Use… |
|---|---|
| Apply a function to each element | map(), map_*() |
| Transpose a list | transpose() |
| Flatten a list | flatten() |
| Control error handling | safely(), possibly() |
map_dbl(1:3, log, base = 2)
[1] 0.000000 1.000000 1.584963
Using purrr to calculate Ed-Data's Ethnic Diversity Index (EDI)
edi <- function(df) {
if (!is.data.frame(df)) stop("student_df must be a data frame")
if (!"ethnicity" %in% names(df)) stop("ethnicity must be a column")
ur <- c("Decline/Don't Know", "Other", "")
ur_fraction <- sum(df$ethnicity %in% ur) /
sum(!df$ethnicity %in% ur)
diversity_rating <- df %>%
filter(!ethnicity %in% ur) %>%
split(.$ethnicity) %>%
map(~ nrow(.)/nrow(df)/(1 - ur_fraction)) %>%
map_dbl(~ (. - (1/13))^2) %>% #There are thirteen reported ethnicities
sum(.) %>%
sqrt(.)
c2 <- -100 * sqrt(13*(13-1))/(13-1)
100 + (c2 * diversity_rating)
}
Data frames with nicer behavior around printing and subsetting
df1 <- tibble(x1 = 1:3, y1 = 1, z1 = x1 ^ 2 + y1)
df1
# A tibble: 3 × 3
x1 y1 z1
<int> <dbl> <dbl>
1 1 1 2
2 2 1 5
3 3 1 10
df1$x
Warning: Unknown column 'x'
NULL
Email each student earning less than 80%, as well as their parents and school counselors.
R, tidyverse, rcanvas, and gmailr.
library(rcanvas)
premium_courses <- get_course_list() %>%
filter(grepl("Premium", name))
get_emails_and_grades <- function(id) {
emails <- get_course_items(id, "users", include = "email") %>%
select(name, sis_user_id, sis_login_id, email)
grades <- get_course_items(id, "enrollments") %>%
filter(enrollment_state == "active") %>%
select(id, user.name, user.sis_user_id, user.sis_login_id, grades.current_score) %>%
left_join(emails, grades, by = c("sis_user_id" = "user.sis_user_id"))
}
safe_function <- safely(get_emails_and_grades)
student_data <- premium_courses$id %>%
map(safe_function) %>%
bind_rows() %>%
left_join(premium_courses, by = c("course_id.x" = "id")) %>%
select(name, sis_user_id, email, course_id.x, grades.current_score, sis_course_id)
student_contact <- read_csv("student_contact.csv") %>%
select(sis_user_id = `Student ID`, Student, Question, Answer, email = Email) %>%
spread(Question, Answer)
# A tibble: 3 × 5
sis_user_id Student Email `Parent Email`
<chr> <chr> <chr> <chr>
1 A0004325 David Ranzolin dranzolin@ucscout.org info@ucscout.org
2 A0004375 Sajira Awang sawang@ucscout.org info@ucscout.org
3 A0004925 Lisa Dominguez ldominguez@ucscout.org info@ucscout.org
# ... with 1 more variables: `Counselor Email` <chr>
email_df <- student_contact %>%
left_join(student_data, by = "sis_user_id") %>%
filter(grades.current_score < 80) %>%
select(-Sections, -course_id.x) %>%
rename(counselor_email = `Counselor Email`,
parent_email = `Parent Email`)
subject <- "UC Scout Weekly Grade Update"
email_sender <- 'UC Scout <info@ucscout.org>'
body <- "Dear %s,
We're writing to inform you that your current grade in %s is %s. You can view your course progress in your Online Classroom (classroom.ucscout.org).
Please let us know if you need any assistance. You can also contact your teacher with any further questions or concerns.
Best wishes,
The Scout Team"
email_df2 <- email_df %>%
mutate(
To = sprintf('%s <%s>, <%s>, <%s>', name, email, counselor_email, parent_email),
From = email_sender,
Subject = subject,
Body = sprintf(body, name, sis_course_id, grades.current_score)) %>%
select(To, From, Subject, Body)
library(gmailr)
emails <- email_df2 %>%
pmap(mime)
use_secret_file("client_secret_PROJ-NAME.json")
safe_send_message <- safely(send_message)
sent_mail <- emails %>%
map(safe_send_message)
Produce a reproducible report with plots, tables, and prose commentary.
R, rmarkdown, and knitr.
Create and share an interactive dashboard.
R, shiny, and flexdashboard.
gganimatebookdowntidytext