library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.4     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   2.0.1     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(dplyr)
library(readr)
library(viridis)
## Loading required package: viridisLite
library(viridisLite)
thanks <- read.csv("https://raw.githubusercontent.com/kitadasmalley/FA2020_DataViz/main/data/useThanks.csv")

str(thanks) 
## 'data.frame':    1058 obs. of  83 variables:
##  $ id                 : num  4.34e+09 4.34e+09 4.34e+09 4.34e+09 4.34e+09 ...
##  $ celebrate          : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ main               : chr  "Turkey" "Turkey" "Turkey" "Turkey" ...
##  $ cooked             : chr  "Baked" "Baked" "Roasted" "Baked" ...
##  $ stuffing           : chr  "Bread-based" "Bread-based" "Rice-based" "Bread-based" ...
##  $ cranberry          : chr  "None" "Other (please specify)" "Homemade" "Homemade" ...
##  $ gravy              : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ brussel.sprouts    : chr  "" "" "Brussel sprouts" "Brussel sprouts" ...
##  $ carrots            : chr  "Carrots" "" "Carrots" "" ...
##  $ cauliflower        : chr  "" "" "Cauliflower" "" ...
##  $ corn               : chr  "" "Corn" "Corn" "" ...
##  $ cornbread          : chr  "" "" "Cornbread" "Cornbread" ...
##  $ fruit.salad        : chr  "" "" "" "" ...
##  $ green.beans        : chr  "Green beans/green bean casserole" "Green beans/green bean casserole" "" "" ...
##  $ mac.n.cheese       : chr  "Macaroni and cheese" "Macaroni and cheese" "" "" ...
##  $ mashed.potatoes    : chr  "Mashed potatoes" "Mashed potatoes" "Mashed potatoes" "Mashed potatoes" ...
##  $ rolls              : chr  "" "Rolls/biscuits" "Rolls/biscuits" "Rolls/biscuits" ...
##  $ squash             : chr  "" "" "" "" ...
##  $ salad              : chr  "" "Vegetable salad" "Vegetable salad" "Vegetable salad" ...
##  $ yams.sweet.potato  : chr  "Yams/sweet potato casserole" "Yams/sweet potato casserole" "" "Yams/sweet potato casserole" ...
##  $ apple.pie          : chr  "Apple" "Apple" "Apple" "" ...
##  $ buttermilk.pie     : chr  "" "" "" "" ...
##  $ cherry.pie         : chr  "" "" "Cherry" "" ...
##  $ chocolate.pie      : chr  "" "Chocolate" "" "" ...
##  $ coconut.pie        : chr  "" "" "" "" ...
##  $ keylime.pie        : chr  "" "" "" "" ...
##  $ peach.pie          : chr  "" "" "Peach" "" ...
##  $ pecan.pie          : chr  "" "" "Pecan" "Pecan" ...
##  $ pumpkin.pie        : chr  "" "Pumpkin" "Pumpkin" "Pumpkin" ...
##  $ sweet.potato.pie   : chr  "" "" "Sweet Potato" "" ...
##  $ apple.cobbler      : chr  "" "" "" "" ...
##  $ blondies           : chr  "" "" "" "" ...
##  $ brownies           : chr  "" "" "Brownies" "" ...
##  $ carrot.cake        : chr  "" "" "Carrot cake" "" ...
##  $ cheesecake         : chr  "Cheesecake" "Cheesecake" "" "" ...
##  $ cookies            : chr  "Cookies" "Cookies" "Cookies" "" ...
##  $ fudge              : chr  "" "" "Fudge" "" ...
##  $ ice.cream          : chr  "Ice cream" "" "Ice cream" "" ...
##  $ peach.cobbler      : chr  "" "" "" "" ...
##  $ pray               : chr  "Yes" "Yes" "Yes" "No" ...
##  $ friendsgiving      : chr  "No" "No" "Yes" "No" ...
##  $ black.friday       : chr  "No" "Yes" "Yes" "No" ...
##  $ area.live          : chr  "Suburban" "Rural" "Suburban" "Urban" ...
##  $ age                : chr  "18 - 29" "18 - 29" "18 - 29" "30 - 44" ...
##  $ gender             : chr  "Male" "Female" "Male" "Male" ...
##  $ income             : chr  "$75,000 to $99,999" "$50,000 to $74,999" "$0 to $9,999" "$200,000 and up" ...
##  $ DivName            : chr  "Middle Atlantic" "East South Central" "Mountain" "Pacific" ...
##  $ celebrate01        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ gravy01            : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ friendsgiving01    : int  0 0 1 0 0 1 0 1 0 0 ...
##  $ black.friday01     : int  0 1 1 0 0 1 1 1 0 0 ...
##  $ brussel.sprouts01  : int  0 0 1 1 1 1 0 0 1 1 ...
##  $ carrots01          : int  1 0 1 0 0 1 0 1 0 1 ...
##  $ cauliflower01      : int  0 0 1 0 0 1 0 0 0 0 ...
##  $ corn01             : int  0 1 1 0 0 1 0 0 1 0 ...
##  $ cornbread01        : int  0 0 1 1 1 1 0 0 1 0 ...
##  $ fruit.salad01      : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ green.beans01      : int  1 1 0 0 0 1 1 0 1 1 ...
##  $ mac.n.cheese01     : int  1 1 0 0 0 1 0 0 0 0 ...
##  $ mashed.potatoes01  : int  1 1 1 1 1 1 1 0 1 1 ...
##  $ rolls01            : int  0 1 1 1 1 1 1 0 1 1 ...
##  $ squash01           : int  0 0 0 0 1 1 0 0 1 0 ...
##  $ salad01            : int  0 1 1 1 1 1 0 0 0 0 ...
##  $ yams.sweet.potato01: int  1 1 0 1 1 1 1 0 0 1 ...
##  $ apple.pie01        : int  1 1 1 0 1 0 1 0 1 0 ...
##  $ buttermilk.pie01   : int  0 0 0 0 0 0 0 0 1 1 ...
##  $ cherry.pie01       : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ chocolate.pie01    : int  0 1 0 0 0 0 0 1 0 0 ...
##  $ coconut.pie01      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ keylime.pie01      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ peach.pie01        : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ pecan.pie01        : int  0 0 1 1 0 0 0 0 0 0 ...
##  $ pumpkin.pie01      : int  0 1 1 1 1 0 1 0 1 1 ...
##  $ sweet.potato.pie01 : int  0 0 1 0 0 1 0 0 1 1 ...
##  $ apple.cobbler01    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ blondies01         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ brownies01         : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ carrot.cake01      : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ cheesecake01       : int  1 1 0 0 0 1 0 0 0 0 ...
##  $ cookies01          : int  1 1 1 0 0 0 1 1 1 0 ...
##  $ fudge01            : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ ice.cream01        : int  1 0 1 0 0 0 0 0 0 0 ...
##  $ peach.cobbler01    : int  0 0 0 0 0 0 0 0 0 0 ...
levels(as.factor(thanks$main))
## [1] ""                       "Chicken"                "Ham/Pork"              
## [4] "I don't know"           "Other (please specify)" "Roast beef"            
## [7] "Tofurkey"               "Turducken"              "Turkey"
levels(as.factor(thanks$age))
## [1] ""        "18 - 29" "30 - 44" "45 - 59" "60+"

What are different age ranges eating? Is there a clear difference?

thanks %>%
 filter(!(main %in% "")) %>%
 filter(!(cooked %in% "")) %>%
 filter(!(stuffing %in% "")) %>%
 filter(!(cranberry %in% "")) %>%
 filter(!(gravy %in% "")) %>%
 filter(!(pray %in% "")) %>%
 filter(!(friendsgiving %in% 
 "")) %>%
 filter(!(black.friday %in% "")) %>%
 filter(!(area.live %in% "")) %>%
 filter(!(age %in% "")) %>%
 filter(!(gender %in% "")) %>%
 filter(!(income %in% "")) %>%
 filter(!(DivName %in% "")) %>%
 filter(!(main %in% "I don't know")) %>%
 ggplot() +
  aes(x = age, y = main, fill = main) +
  geom_tile(size = 1.2) +
  scale_fill_viridis_d(option = "viridis", direction = -1) +
  theme_minimal()+
  scale_fill_discrete()
## Scale for 'fill' is already present. Adding another scale for 'fill', which
## will replace the existing scale.

thanks %>%
  filter(main != "", age != "")%>%
  group_by(age)%>%
  count(main)
## # A tibble: 28 × 3
## # Groups:   age [4]
##    age     main                       n
##    <chr>   <chr>                  <int>
##  1 18 - 29 Chicken                    3
##  2 18 - 29 Ham/Pork                   6
##  3 18 - 29 I don't know               3
##  4 18 - 29 Other (please specify)     2
##  5 18 - 29 Roast beef                 3
##  6 18 - 29 Tofurkey                   6
##  7 18 - 29 Turkey                   162
##  8 30 - 44 Chicken                    3
##  9 30 - 44 Ham/Pork                  12
## 10 30 - 44 I don't know               1
## # … with 18 more rows
thanks_main_all <- thanks %>%
  select(age, main)%>%
  filter(age != "", main != "")%>%
  group_by(age, main)%>%
  mutate(count = n())%>%
  arrange(age)

thanks_main_all
## # A tibble: 947 × 3
## # Groups:   age, main [28]
##    age     main     count
##    <chr>   <chr>    <int>
##  1 18 - 29 Turkey     162
##  2 18 - 29 Turkey     162
##  3 18 - 29 Turkey     162
##  4 18 - 29 Turkey     162
##  5 18 - 29 Turkey     162
##  6 18 - 29 Turkey     162
##  7 18 - 29 Ham/Pork     6
##  8 18 - 29 Turkey     162
##  9 18 - 29 Turkey     162
## 10 18 - 29 Turkey     162
## # … with 937 more rows
thanks_main <- thanks %>%
  select(age, main)%>%
  filter(main != "", age != "")%>%
  group_by(age, main)%>%
  mutate(count = n())%>%
  arrange(age)

thanks_main
## # A tibble: 947 × 3
## # Groups:   age, main [28]
##    age     main     count
##    <chr>   <chr>    <int>
##  1 18 - 29 Turkey     162
##  2 18 - 29 Turkey     162
##  3 18 - 29 Turkey     162
##  4 18 - 29 Turkey     162
##  5 18 - 29 Turkey     162
##  6 18 - 29 Turkey     162
##  7 18 - 29 Ham/Pork     6
##  8 18 - 29 Turkey     162
##  9 18 - 29 Turkey     162
## 10 18 - 29 Turkey     162
## # … with 937 more rows
thanks_main %>% 
ggplot(aes(age, count, fill = main))+
  geom_col(position = "dodge")+
  labs(title = "Turkey is Still a Clear Winner at Thanksgiving",
       subtitle = "Popularity of Main Protein by Age")

thanks %>%
  filter(main != "", age != "")%>%
  ggplot()+
  geom_bar(mapping = aes(x = age, fill = main), position = "dodge")+
  facet_wrap(~age, scale = "free")+
  scale_x_discrete(expand = c(0,0))+
  scale_y_discrete(expand = c(0,0))+
  labs(title = "Turkey is Still a Clear Winner at Thanksgiving",
       subtitle = "Popularity of Main Protein by Age")

thanks %>%
  filter(main != "", age != "")%>%
  ggplot()+
  geom_bar(mapping = aes(x = age, fill = main), position = "fill")+
  facet_wrap(~age, scale = "free")+
  scale_x_discrete(expand = c(0,0))+
  scale_y_discrete(expand = c(0,0))+
  theme(plot.background = element_rect(fill = "gray94"),
        legend.background = element_rect(fill = "gray94"),
        axis.text.x = element_blank(),
        axis.title = element_text(family = "Courier"))+
  labs(title = "Turkey is Still a Clear Winner at Thanksgiving",
       subtitle = "Popularity of Main Protein by Age")

  ggplot(thanks_main_all, aes(x="", y=count, fill = main))+
  geom_bar(stat = "identity", width = 1, position = "fill")+
  coord_polar("y", start = 0)+
  facet_wrap(~age)+
  theme(plot.background = element_rect(fill = "gray94"),
        legend.background = element_rect(fill = "gray94"),
        axis.text.x = element_blank(),
        axis.title = element_blank(),
        axis.ticks = element_blank(),
        plot.title = element_text(family = "Courier"),
        plot.subtitle = element_text(family = "Courier"))+
  labs(title = "Turkey is Still a Clear Winner at Thanksgiving",
       subtitle = "Popularity of Main Protein by Age")

thanks %>% 
  filter(main != "", age != "")%>%
ggplot(aes(x=main, y=age, color = main, size = main))+
  geom_point(alpha = 0.5, stat = "identity")+
  labs(title = "Turkey is Still a Clear Winner at Thanksgiving",
       subtitle = "Popularity of Main Protein by Age")
## Warning: Using size for a discrete variable is not advised.

thanks %>% 
  filter(main != "", age != "")%>%
  filter(main != "I don't know")%>%
  arrange(main)%>%
ggplot(aes(x=age, y=main, color = main))+
  geom_jitter(stat = "identity", alpha = 0.5, size = 2)+
  theme(panel.grid.major = element_blank(),
        plot.background = element_rect(fill = "gray94"),
        panel.background = element_rect(fill = "gray94"),
        axis.text = element_text(family = "Courier"),
        axis.title = element_text(family = "Courier"),
        legend.background = element_rect(fill = "gray94"),
        legend.title = element_text(family = "Courier"),
        legend.text = element_text(family = "Courier"),
        plot.title = element_text(family = "Courier"),
        plot.subtitle = element_text(family = "Courier"))+
  labs(title = "Turkey is Still the Clear Winner at Thanksgiving",
       subtitle = "popularity of main dishes by age groups")

thanks_main %>%
  filter(main != c("I don't know"))%>%
  ggplot(aes(main, count, size = count, color = main))+
  geom_point(alpha = 0.7)+
  guides(col = "none")+
  scale_size(range = c(2,18),
             breaks = c(200, 150, 50, 10),
             labels = c(200, 150, 50, 10),
             name = "Count")+
  coord_flip()+
  theme(legend.text = element_text(family = "Courier"),
        plot.background = element_rect(fill = "grey80"),
        panel.background = element_rect(fill = "grey80"),
        axis.title = element_blank(),
        plot.title = element_text(family = "Courier", face = "bold"),
        plot.subtitle = element_text(family = "Courier"),
        axis.text = element_text(family = "Courier"),
        legend.background = element_rect(fill = "grey80"),
        legend.box.background = element_rect(fill = "grey80"),
        legend.key = element_rect(fill = "grey80"),
        legend.title = element_text(family = "Courier"),
        panel.grid.major.y = element_line(color = "grey50", size = 0.5, linetype = "dotted"))+
  labs(title = "Turkey is Still the Clear Winner at Thanksgiving",
       subtitle = "popularity of main dishes by age group")+
  facet_wrap(~age, scale = "free_y")+
  scale_y_discrete(expand = c(0.5,3.5))+
  scale_x_discrete(expand = c(0,3.5))+
  scale_color_viridis(option = "viridis", discrete = TRUE)