Apply the following dplyr verbs to your data
Filter rows
filter(data, season == 1)
filter(data, season == 1 & episode == 1)
filter(data, season == 1 | episode == 1)
filter(data, season %in% c(11, 12))
Arrange rows
arrange(data, desc(season), desc(episode))
Select columns
select(data, season:painting_title)
select(data, season, episode, painting_title, colors)
select(data, season, episode, painting_title, colors, color_hex)
select(data, season, episode, painting_title, starts_with("color"))
select(data, season, episode, contains("painting"))
select(data, season, episode, ends_with("src"))
select(data, season, episode, ends_with("src"), everything())
Add columns
# Create a new column: ratio of colors used vs episodes
mutate(data,
index_per_episode = painting_index / episode) %>%
# Select season, episode, painting_title, and new column
select(season:painting_title, index_per_episode)
mutate(data,
index_per_episode = painting_index / episode) %>%
select(season:painting_title, index_per_episode)
transmute(data,
index_per_episode = painting_index / episode)
select(data, painting_index) %>%
mutate(painting_index_lag1 = lag(painting_index))
select(data, painting_index) %>%
mutate(painting_index_cumsum = cumsum(painting_index))
Summarize by groups
data
# average painting index
summarize(data, avg_painting_index = mean(painting_index, na.rm = TRUE))
data %>%
# Group by season
group_by(season) %>%
# Calculate average painting index
summarize(avg_painting_index = mean(painting_index, na.rm = TRUE)) %>%
# Sort it
arrange(avg_painting_index)