library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 2.0.1 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(dplyr)
library(readr)
library(viridis)
## Loading required package: viridisLite
library(viridisLite)
thanks <- read.csv("https://raw.githubusercontent.com/kitadasmalley/FA2020_DataViz/main/data/useThanks.csv")
str(thanks)
## 'data.frame': 1058 obs. of 83 variables:
## $ id : num 4.34e+09 4.34e+09 4.34e+09 4.34e+09 4.34e+09 ...
## $ celebrate : chr "Yes" "Yes" "Yes" "Yes" ...
## $ main : chr "Turkey" "Turkey" "Turkey" "Turkey" ...
## $ cooked : chr "Baked" "Baked" "Roasted" "Baked" ...
## $ stuffing : chr "Bread-based" "Bread-based" "Rice-based" "Bread-based" ...
## $ cranberry : chr "None" "Other (please specify)" "Homemade" "Homemade" ...
## $ gravy : chr "Yes" "Yes" "Yes" "Yes" ...
## $ brussel.sprouts : chr "" "" "Brussel sprouts" "Brussel sprouts" ...
## $ carrots : chr "Carrots" "" "Carrots" "" ...
## $ cauliflower : chr "" "" "Cauliflower" "" ...
## $ corn : chr "" "Corn" "Corn" "" ...
## $ cornbread : chr "" "" "Cornbread" "Cornbread" ...
## $ fruit.salad : chr "" "" "" "" ...
## $ green.beans : chr "Green beans/green bean casserole" "Green beans/green bean casserole" "" "" ...
## $ mac.n.cheese : chr "Macaroni and cheese" "Macaroni and cheese" "" "" ...
## $ mashed.potatoes : chr "Mashed potatoes" "Mashed potatoes" "Mashed potatoes" "Mashed potatoes" ...
## $ rolls : chr "" "Rolls/biscuits" "Rolls/biscuits" "Rolls/biscuits" ...
## $ squash : chr "" "" "" "" ...
## $ salad : chr "" "Vegetable salad" "Vegetable salad" "Vegetable salad" ...
## $ yams.sweet.potato : chr "Yams/sweet potato casserole" "Yams/sweet potato casserole" "" "Yams/sweet potato casserole" ...
## $ apple.pie : chr "Apple" "Apple" "Apple" "" ...
## $ buttermilk.pie : chr "" "" "" "" ...
## $ cherry.pie : chr "" "" "Cherry" "" ...
## $ chocolate.pie : chr "" "Chocolate" "" "" ...
## $ coconut.pie : chr "" "" "" "" ...
## $ keylime.pie : chr "" "" "" "" ...
## $ peach.pie : chr "" "" "Peach" "" ...
## $ pecan.pie : chr "" "" "Pecan" "Pecan" ...
## $ pumpkin.pie : chr "" "Pumpkin" "Pumpkin" "Pumpkin" ...
## $ sweet.potato.pie : chr "" "" "Sweet Potato" "" ...
## $ apple.cobbler : chr "" "" "" "" ...
## $ blondies : chr "" "" "" "" ...
## $ brownies : chr "" "" "Brownies" "" ...
## $ carrot.cake : chr "" "" "Carrot cake" "" ...
## $ cheesecake : chr "Cheesecake" "Cheesecake" "" "" ...
## $ cookies : chr "Cookies" "Cookies" "Cookies" "" ...
## $ fudge : chr "" "" "Fudge" "" ...
## $ ice.cream : chr "Ice cream" "" "Ice cream" "" ...
## $ peach.cobbler : chr "" "" "" "" ...
## $ pray : chr "Yes" "Yes" "Yes" "No" ...
## $ friendsgiving : chr "No" "No" "Yes" "No" ...
## $ black.friday : chr "No" "Yes" "Yes" "No" ...
## $ area.live : chr "Suburban" "Rural" "Suburban" "Urban" ...
## $ age : chr "18 - 29" "18 - 29" "18 - 29" "30 - 44" ...
## $ gender : chr "Male" "Female" "Male" "Male" ...
## $ income : chr "$75,000 to $99,999" "$50,000 to $74,999" "$0 to $9,999" "$200,000 and up" ...
## $ DivName : chr "Middle Atlantic" "East South Central" "Mountain" "Pacific" ...
## $ celebrate01 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ gravy01 : int 1 1 1 1 1 1 1 1 1 1 ...
## $ friendsgiving01 : int 0 0 1 0 0 1 0 1 0 0 ...
## $ black.friday01 : int 0 1 1 0 0 1 1 1 0 0 ...
## $ brussel.sprouts01 : int 0 0 1 1 1 1 0 0 1 1 ...
## $ carrots01 : int 1 0 1 0 0 1 0 1 0 1 ...
## $ cauliflower01 : int 0 0 1 0 0 1 0 0 0 0 ...
## $ corn01 : int 0 1 1 0 0 1 0 0 1 0 ...
## $ cornbread01 : int 0 0 1 1 1 1 0 0 1 0 ...
## $ fruit.salad01 : int 0 0 0 0 0 1 1 0 0 0 ...
## $ green.beans01 : int 1 1 0 0 0 1 1 0 1 1 ...
## $ mac.n.cheese01 : int 1 1 0 0 0 1 0 0 0 0 ...
## $ mashed.potatoes01 : int 1 1 1 1 1 1 1 0 1 1 ...
## $ rolls01 : int 0 1 1 1 1 1 1 0 1 1 ...
## $ squash01 : int 0 0 0 0 1 1 0 0 1 0 ...
## $ salad01 : int 0 1 1 1 1 1 0 0 0 0 ...
## $ yams.sweet.potato01: int 1 1 0 1 1 1 1 0 0 1 ...
## $ apple.pie01 : int 1 1 1 0 1 0 1 0 1 0 ...
## $ buttermilk.pie01 : int 0 0 0 0 0 0 0 0 1 1 ...
## $ cherry.pie01 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ chocolate.pie01 : int 0 1 0 0 0 0 0 1 0 0 ...
## $ coconut.pie01 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ keylime.pie01 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ peach.pie01 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ pecan.pie01 : int 0 0 1 1 0 0 0 0 0 0 ...
## $ pumpkin.pie01 : int 0 1 1 1 1 0 1 0 1 1 ...
## $ sweet.potato.pie01 : int 0 0 1 0 0 1 0 0 1 1 ...
## $ apple.cobbler01 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ blondies01 : int 0 0 0 0 0 0 0 0 0 0 ...
## $ brownies01 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ carrot.cake01 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ cheesecake01 : int 1 1 0 0 0 1 0 0 0 0 ...
## $ cookies01 : int 1 1 1 0 0 0 1 1 1 0 ...
## $ fudge01 : int 0 0 1 0 0 0 0 0 0 0 ...
## $ ice.cream01 : int 1 0 1 0 0 0 0 0 0 0 ...
## $ peach.cobbler01 : int 0 0 0 0 0 0 0 0 0 0 ...
levels(as.factor(thanks$main))
## [1] "" "Chicken" "Ham/Pork"
## [4] "I don't know" "Other (please specify)" "Roast beef"
## [7] "Tofurkey" "Turducken" "Turkey"
levels(as.factor(thanks$age))
## [1] "" "18 - 29" "30 - 44" "45 - 59" "60+"
thanks %>%
filter(!(main %in% "")) %>%
filter(!(cooked %in% "")) %>%
filter(!(stuffing %in% "")) %>%
filter(!(cranberry %in% "")) %>%
filter(!(gravy %in% "")) %>%
filter(!(pray %in% "")) %>%
filter(!(friendsgiving %in%
"")) %>%
filter(!(black.friday %in% "")) %>%
filter(!(area.live %in% "")) %>%
filter(!(age %in% "")) %>%
filter(!(gender %in% "")) %>%
filter(!(income %in% "")) %>%
filter(!(DivName %in% "")) %>%
filter(!(main %in% "I don't know")) %>%
ggplot() +
aes(x = age, y = main, fill = main) +
geom_tile(size = 1.2) +
scale_fill_viridis_d(option = "viridis", direction = -1) +
theme_minimal()+
scale_fill_discrete()
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.
thanks %>%
filter(main != "", age != "")%>%
group_by(age)%>%
count(main)
## # A tibble: 28 × 3
## # Groups: age [4]
## age main n
## <chr> <chr> <int>
## 1 18 - 29 Chicken 3
## 2 18 - 29 Ham/Pork 6
## 3 18 - 29 I don't know 3
## 4 18 - 29 Other (please specify) 2
## 5 18 - 29 Roast beef 3
## 6 18 - 29 Tofurkey 6
## 7 18 - 29 Turkey 162
## 8 30 - 44 Chicken 3
## 9 30 - 44 Ham/Pork 12
## 10 30 - 44 I don't know 1
## # … with 18 more rows
thanks_main_all <- thanks %>%
select(age, main)%>%
filter(age != "", main != "")%>%
group_by(age, main)%>%
mutate(count = n())%>%
arrange(age)
thanks_main_all
## # A tibble: 947 × 3
## # Groups: age, main [28]
## age main count
## <chr> <chr> <int>
## 1 18 - 29 Turkey 162
## 2 18 - 29 Turkey 162
## 3 18 - 29 Turkey 162
## 4 18 - 29 Turkey 162
## 5 18 - 29 Turkey 162
## 6 18 - 29 Turkey 162
## 7 18 - 29 Ham/Pork 6
## 8 18 - 29 Turkey 162
## 9 18 - 29 Turkey 162
## 10 18 - 29 Turkey 162
## # … with 937 more rows
thanks_main <- thanks %>%
select(age, main)%>%
filter(main != "", age != "")%>%
group_by(age, main)%>%
mutate(count = n())%>%
arrange(age)
thanks_main
## # A tibble: 947 × 3
## # Groups: age, main [28]
## age main count
## <chr> <chr> <int>
## 1 18 - 29 Turkey 162
## 2 18 - 29 Turkey 162
## 3 18 - 29 Turkey 162
## 4 18 - 29 Turkey 162
## 5 18 - 29 Turkey 162
## 6 18 - 29 Turkey 162
## 7 18 - 29 Ham/Pork 6
## 8 18 - 29 Turkey 162
## 9 18 - 29 Turkey 162
## 10 18 - 29 Turkey 162
## # … with 937 more rows
thanks_main %>%
ggplot(aes(age, count, fill = main))+
geom_col(position = "dodge")+
labs(title = "Turkey is Still a Clear Winner at Thanksgiving",
subtitle = "Popularity of Main Protein by Age")
thanks %>%
filter(main != "", age != "")%>%
ggplot()+
geom_bar(mapping = aes(x = age, fill = main), position = "dodge")+
facet_wrap(~age, scale = "free")+
scale_x_discrete(expand = c(0,0))+
scale_y_discrete(expand = c(0,0))+
labs(title = "Turkey is Still a Clear Winner at Thanksgiving",
subtitle = "Popularity of Main Protein by Age")
thanks %>%
filter(main != "", age != "")%>%
ggplot()+
geom_bar(mapping = aes(x = age, fill = main), position = "fill")+
facet_wrap(~age, scale = "free")+
scale_x_discrete(expand = c(0,0))+
scale_y_discrete(expand = c(0,0))+
theme(plot.background = element_rect(fill = "gray94"),
legend.background = element_rect(fill = "gray94"),
axis.text.x = element_blank(),
axis.title = element_text(family = "Courier"))+
labs(title = "Turkey is Still a Clear Winner at Thanksgiving",
subtitle = "Popularity of Main Protein by Age")
ggplot(thanks_main_all, aes(x="", y=count, fill = main))+
geom_bar(stat = "identity", width = 1, position = "fill")+
coord_polar("y", start = 0)+
facet_wrap(~age)+
theme(plot.background = element_rect(fill = "gray94"),
legend.background = element_rect(fill = "gray94"),
axis.text.x = element_blank(),
axis.title = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(family = "Courier"),
plot.subtitle = element_text(family = "Courier"))+
labs(title = "Turkey is Still a Clear Winner at Thanksgiving",
subtitle = "Popularity of Main Protein by Age")
thanks %>%
filter(main != "", age != "")%>%
ggplot(aes(x=main, y=age, color = main, size = main))+
geom_point(alpha = 0.5, stat = "identity")+
labs(title = "Turkey is Still a Clear Winner at Thanksgiving",
subtitle = "Popularity of Main Protein by Age")
## Warning: Using size for a discrete variable is not advised.
thanks %>%
filter(main != "", age != "")%>%
filter(main != "I don't know")%>%
arrange(main)%>%
ggplot(aes(x=age, y=main, color = main))+
geom_jitter(stat = "identity", alpha = 0.5, size = 2)+
theme(panel.grid.major = element_blank(),
plot.background = element_rect(fill = "gray94"),
panel.background = element_rect(fill = "gray94"),
axis.text = element_text(family = "Courier"),
axis.title = element_text(family = "Courier"),
legend.background = element_rect(fill = "gray94"),
legend.title = element_text(family = "Courier"),
legend.text = element_text(family = "Courier"),
plot.title = element_text(family = "Courier"),
plot.subtitle = element_text(family = "Courier"))+
labs(title = "Turkey is Still the Clear Winner at Thanksgiving",
subtitle = "popularity of main dishes by age groups")
thanks_main %>%
filter(main != c("I don't know"))%>%
ggplot(aes(main, count, size = count, color = main))+
geom_point(alpha = 0.7)+
guides(col = "none")+
scale_size(range = c(2,18),
breaks = c(200, 150, 50, 10),
labels = c(200, 150, 50, 10),
name = "Count")+
coord_flip()+
theme(legend.text = element_text(family = "Courier"),
plot.background = element_rect(fill = "grey80"),
panel.background = element_rect(fill = "grey80"),
axis.title = element_blank(),
plot.title = element_text(family = "Courier", face = "bold"),
plot.subtitle = element_text(family = "Courier"),
axis.text = element_text(family = "Courier"),
legend.background = element_rect(fill = "grey80"),
legend.box.background = element_rect(fill = "grey80"),
legend.key = element_rect(fill = "grey80"),
legend.title = element_text(family = "Courier"),
panel.grid.major.y = element_line(color = "grey50", size = 0.5, linetype = "dotted"))+
labs(title = "Turkey is Still the Clear Winner at Thanksgiving",
subtitle = "popularity of main dishes by age group")+
facet_wrap(~age, scale = "free_y")+
scale_y_discrete(expand = c(0.5,3.5))+
scale_x_discrete(expand = c(0,3.5))+
scale_color_viridis(option = "viridis", discrete = TRUE)