## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 1.0.1
## ✔ tibble 3.2.1 ✔ dplyr 1.1.1
## ✔ tidyr 1.2.0 ✔ stringr 1.5.0
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
##
## Attaching package: 'lubridate'
##
##
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
##
##
##
## Attaching package: 'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
We will be using data from the Tidy Tuesday project. Tidy Tuesday is a weekly social data project available for free online. They have a goal of “making learning to work with data easier” and are a great resource for interesting data sets. To learn more about this project and explore the many data sets they have available, go to https://github.com/rfordatascience/tidytuesday.
For these practice problems, we will be using the “bob_ross” data set, which is a data set of all of the paintings painted on Bob Ross’s TV show, “The Joy of Painting.” A full data dictionary is available at the source website (https://github.com/rfordatascience/tidytuesday/tree/master/data/2023/2023-02-21). For our purposes, we are interested in the following variables:
| Variable | Description |
|---|---|
| painting index | Painting number as enumerated in collection. |
| painting title | Title of the painting. |
| Season | Season of ‘The Joy of Painting’ in which the painting was featured. |
| Episode | Episode of ‘The Joy of Painting’ in which the painting was featured. |
| num_colors | Number of unique colors used in the painting. |
| colors | List of colors used in the painting. |
| Cadmium_Yellow | Cadmium_Yellow used |
#read in file
#data comes from tidy tuesday (https://github.com/rfordatascience/tidytuesday/tree/master/data/2023/2023-02-21).
#YOUR CODE HERE
#ANSWERS
bob_ross <- read_csv("bob_ross.csv")
## Rows: 403 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): img src, painting title, youtube_src, colors, color_hex
## dbl (4): painting index, Season, Episode, num_colors
## lgl (18): Black_Gesso, Bright_Red, Burnt_Umber, Cadmium_Yellow, Dark_Sienna,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## [1] 403 27
## spc_tbl_ [403 × 27] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ painting index : num [1:403] 282 283 284 285 286 287 288 289 290 291 ...
## $ img src : chr [1:403] "https://www.twoinchbrush.com/images/painting282.png" "https://www.twoinchbrush.com/images/painting283.png" "https://www.twoinchbrush.com/images/painting284.png" "https://www.twoinchbrush.com/images/painting285.png" ...
## $ painting title : chr [1:403] "A Walk in the Woods" "Mt. McKinley" "Ebony Sunset" "Winter Mist" ...
## $ Season : num [1:403] 1 1 1 1 1 1 1 1 1 1 ...
## $ Episode : num [1:403] 1 2 3 4 5 6 7 8 9 10 ...
## $ num_colors : num [1:403] 8 8 9 3 8 4 8 8 8 8 ...
## $ youtube_src : chr [1:403] "https://www.youtube.com/embed/oh5p5f5_-7A" "https://www.youtube.com/embed/RInDWhYceLU" "https://www.youtube.com/embed/UOziR7PoVco" "https://www.youtube.com/embed/0pwoixRikn4" ...
## $ colors : chr [1:403] "['Alizarin Crimson', 'Bright Red', 'Cadmium Yellow', 'Phthalo Green\\r\\n', 'Prussian Blue', 'Sap Green', 'Tita"| __truncated__ "['Alizarin Crimson', 'Bright Red', 'Cadmium Yellow', 'Phthalo Green\\r\\n', 'Prussian Blue', 'Sap Green', 'Tita"| __truncated__ "['Alizarin Crimson', 'Black Gesso', 'Bright Red', 'Cadmium Yellow', 'Phthalo Green\\r\\n', 'Prussian Blue', 'Sa"| __truncated__ "['Prussian Blue', 'Titanium White', 'Van Dyke Brown']" ...
## $ color_hex : chr [1:403] "['#4E1500', '#DB0000', '#FFEC00', '#102E3C', '#021E44', '#0A3410', '#FFFFFF', '#221B15']" "['#4E1500', '#DB0000', '#FFEC00', '#102E3C', '#021E44', '#0A3410', '#FFFFFF', '#221B15']" "['#4E1500', '#000000', '#DB0000', '#FFEC00', '#102E3C', '#021E44', '#0A3410', '#FFFFFF', '#221B15']" "['#021E44', '#FFFFFF', '#221B15']" ...
## $ Black_Gesso : logi [1:403] FALSE FALSE TRUE FALSE FALSE TRUE ...
## $ Bright_Red : logi [1:403] TRUE TRUE TRUE FALSE TRUE FALSE ...
## $ Burnt_Umber : logi [1:403] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ Cadmium_Yellow : logi [1:403] TRUE TRUE TRUE FALSE TRUE FALSE ...
## $ Dark_Sienna : logi [1:403] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ Indian_Red : logi [1:403] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ Indian_Yellow : logi [1:403] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ Liquid_Black : logi [1:403] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ Liquid_Clear : logi [1:403] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ Midnight_Black : logi [1:403] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ Phthalo_Blue : logi [1:403] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ Phthalo_Green : logi [1:403] TRUE TRUE TRUE FALSE TRUE FALSE ...
## $ Prussian_Blue : logi [1:403] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ Sap_Green : logi [1:403] TRUE TRUE TRUE FALSE TRUE FALSE ...
## $ Titanium_White : logi [1:403] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ Van_Dyke_Brown : logi [1:403] TRUE TRUE TRUE TRUE TRUE TRUE ...
## $ Yellow_Ochre : logi [1:403] FALSE FALSE FALSE FALSE FALSE FALSE ...
## $ Alizarin_Crimson: logi [1:403] TRUE TRUE TRUE FALSE TRUE FALSE ...
## - attr(*, "spec")=
## .. cols(
## .. `painting index` = col_double(),
## .. `img src` = col_character(),
## .. `painting title` = col_character(),
## .. Season = col_double(),
## .. Episode = col_double(),
## .. num_colors = col_double(),
## .. youtube_src = col_character(),
## .. colors = col_character(),
## .. color_hex = col_character(),
## .. Black_Gesso = col_logical(),
## .. Bright_Red = col_logical(),
## .. Burnt_Umber = col_logical(),
## .. Cadmium_Yellow = col_logical(),
## .. Dark_Sienna = col_logical(),
## .. Indian_Red = col_logical(),
## .. Indian_Yellow = col_logical(),
## .. Liquid_Black = col_logical(),
## .. Liquid_Clear = col_logical(),
## .. Midnight_Black = col_logical(),
## .. Phthalo_Blue = col_logical(),
## .. Phthalo_Green = col_logical(),
## .. Prussian_Blue = col_logical(),
## .. Sap_Green = col_logical(),
## .. Titanium_White = col_logical(),
## .. Van_Dyke_Brown = col_logical(),
## .. Yellow_Ochre = col_logical(),
## .. Alizarin_Crimson = col_logical()
## .. )
## - attr(*, "problems")=<externalptr>
## Rows: 403
## Columns: 27
## $ `painting index` <dbl> 282, 283, 284, 285, 286, 287, 288, 289, 290, 291, 292…
## $ `img src` <chr> "https://www.twoinchbrush.com/images/painting282.png"…
## $ `painting title` <chr> "A Walk in the Woods", "Mt. McKinley", "Ebony Sunset"…
## $ Season <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2,…
## $ Episode <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 1, 2, 3, 4…
## $ num_colors <dbl> 8, 8, 9, 3, 8, 4, 8, 8, 8, 8, 8, 4, 8, 12, 12, 13, 3,…
## $ youtube_src <chr> "https://www.youtube.com/embed/oh5p5f5_-7A", "https:/…
## $ colors <chr> "['Alizarin Crimson', 'Bright Red', 'Cadmium Yellow',…
## $ color_hex <chr> "['#4E1500', '#DB0000', '#FFEC00', '#102E3C', '#021E4…
## $ Black_Gesso <lgl> FALSE, FALSE, TRUE, FALSE, FALSE, TRUE, FALSE, FALSE,…
## $ Bright_Red <lgl> TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRU…
## $ Burnt_Umber <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
## $ Cadmium_Yellow <lgl> TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRU…
## $ Dark_Sienna <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
## $ Indian_Red <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
## $ Indian_Yellow <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
## $ Liquid_Black <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
## $ Liquid_Clear <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
## $ Midnight_Black <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
## $ Phthalo_Blue <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
## $ Phthalo_Green <lgl> TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRU…
## $ Prussian_Blue <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,…
## $ Sap_Green <lgl> TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRU…
## $ Titanium_White <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,…
## $ Van_Dyke_Brown <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,…
## $ Yellow_Ochre <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
## $ Alizarin_Crimson <lgl> TRUE, TRUE, TRUE, FALSE, TRUE, FALSE, TRUE, TRUE, TRU…
#YOUR CODE HERE
#ANSWERS
bob_ross <- bob_ross %>%
rename_with(~ tolower(gsub(" ","_",.x,fixed=TRUE)))
names(bob_ross)
## [1] "painting_index" "img_src" "painting_title" "season"
## [5] "episode" "num_colors" "youtube_src" "colors"
## [9] "color_hex" "black_gesso" "bright_red" "burnt_umber"
## [13] "cadmium_yellow" "dark_sienna" "indian_red" "indian_yellow"
## [17] "liquid_black" "liquid_clear" "midnight_black" "phthalo_blue"
## [21] "phthalo_green" "prussian_blue" "sap_green" "titanium_white"
## [25] "van_dyke_brown" "yellow_ochre" "alizarin_crimson"
There are a lot of variables here and we just want to focus on a few of them.
#YOUR CODE HERE
#ANSWERS
bob_ross_abbr <- bob_ross %>%
select(painting_title, season, episode, num_colors, colors)
as_tibble(bob_ross_abbr)
## # A tibble: 403 × 5
## painting_title season episode num_colors colors
## <chr> <dbl> <dbl> <dbl> <chr>
## 1 A Walk in the Woods 1 1 8 "['Alizarin Crimson', 'Bright …
## 2 Mt. McKinley 1 2 8 "['Alizarin Crimson', 'Bright …
## 3 Ebony Sunset 1 3 9 "['Alizarin Crimson', 'Black G…
## 4 Winter Mist 1 4 3 "['Prussian Blue', 'Titanium W…
## 5 Quiet Stream 1 5 8 "['Alizarin Crimson', 'Bright …
## 6 Winter Moon 1 6 4 "['Black Gesso', 'Prussian Blu…
## 7 Autumn Mountain 1 7 8 "['Alizarin Crimson', 'Bright …
## 8 Peaceful Valley 1 8 8 "['Alizarin Crimson', 'Bright …
## 9 Seascape 1 9 8 "['Alizarin Crimson', 'Bright …
## 10 Mountain Lake 1 10 8 "['Alizarin Crimson', 'Bright …
## # ℹ 393 more rows
You want to categorize the number of colors used for each painting in the bob_ross_abbr data frame.
#YOUR CODE HERE
#ANSWERS
bob_ross_abbr <- bob_ross_abbr %>%
mutate(color_cat = case_when(num_colors >=2 & num_colors< 6 ~ "low",
num_colors >=6 & num_colors<9 ~ "medium",
num_colors >=9 ~ "high",
TRUE ~ "outlier"
)) %>%
arrange(num_colors)
head(bob_ross_abbr)
## # A tibble: 6 × 6
## painting_title season episode num_colors colors color_cat
## <chr> <dbl> <dbl> <dbl> <chr> <chr>
## 1 Contemplative Lady 16 6 1 "['Van Dyke Brown']" outlier
## 2 Winter Mist 1 4 3 "['Prussian Blue', 'Ti… low
## 3 Shades of Grey 2 4 3 "['Prussian Blue', 'Ti… low
## 4 Grey Winter 7 11 3 "['Alizarin Crimson', … low
## 5 Blue Winter 21 10 3 "['Midnight Black\\r\\… low
## 6 Winter Moon 1 6 4 "['Black Gesso', 'Prus… low
#ANSWERS Yes, you could have used nested if_else() statements to accomplish this. Using case_when() vs. using if_else() is a matter of your preference. However, using if_else() can be more cumbersome and more difficult to debug.
#YOUR CODE HERE
#ANSWERS
bob_ross_colors <- bob_ross_abbr %>%
group_by(season) %>%
summarize(avg_num_colors = mean(num_colors))
head(bob_ross_colors)
## # A tibble: 6 × 2
## season avg_num_colors
## <dbl> <dbl>
## 1 1 7.08
## 2 2 11.2
## 3 3 9.85
## 4 4 10
## 5 5 8.54
## 6 6 10.5
Challenge: How would you use R to print out the value for this object using base R code?
#YOUR CODE HERE
#ANSWERS
most_colorful_season <- 14
#with base R
most_colorful_season <-
bob_ross_colors[bob_ross_colors$avg_num_colors==max(bob_ross_colors$avg_num_colors),]$season
most_colorful_season
## [1] 14
#YOUR CODE HERE
#ANSWERS
ggplot(data = bob_ross_colors, aes(x=season, y=avg_num_colors))+
geom_line()+
labs(title="Bob Ross Paintings", subtitle = "Average Number of Colors Used per Season", x="Season", y="Average number of colors")+
theme_classic()
#YOUR CODE HERE
#ANSWERS
library(kableExtra)
season_1 <- bob_ross_abbr %>%
filter(season == 1) %>%
select(painting_title, episode)%>%
arrange(episode)
knitr:: kable(season_1,
booktabs=TRUE,
format="simple",
caption = "Paintings from Season 1",
col.names = c("Painting Title", "Episode #"))
| Painting Title | Episode # |
|---|---|
| A Walk in the Woods | 1 |
| Mt. McKinley | 2 |
| Ebony Sunset | 3 |
| Winter Mist | 4 |
| Quiet Stream | 5 |
| Winter Moon | 6 |
| Autumn Mountain | 7 |
| Peaceful Valley | 8 |
| Seascape | 9 |
| Mountain Lake | 10 |
| Winter Glow | 11 |
| Snow Fall | 12 |
| Final Reflections | 13 |