Packages

library(tidyverse)
## -- Attaching packages ------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ---------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(usmap)
## Warning: package 'usmap' was built under R version 4.0.3
library(sf)
## Warning: package 'sf' was built under R version 4.0.3
## Linking to GEOS 3.8.0, GDAL 3.0.4, PROJ 6.3.1
library(tigris)
## Warning: package 'tigris' was built under R version 4.0.3
## To enable 
## caching of data, set `options(tigris_use_cache = TRUE)` in your R script or .Rprofile.
options(tigris_use_cache = TRUE)
library(tmap)
## Warning: package 'tmap' was built under R version 4.0.3
library(colorspace)

Read in Data

useThanks<-read.csv("https://raw.githubusercontent.com/kitadasmalley/FA2020_DataViz/main/data/useThanks.csv", header=TRUE)

View Data

str(useThanks)
## 'data.frame':    1058 obs. of  83 variables:
##  $ id                 : num  4.34e+09 4.34e+09 4.34e+09 4.34e+09 4.34e+09 ...
##  $ celebrate          : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ main               : chr  "Turkey" "Turkey" "Turkey" "Turkey" ...
##  $ cooked             : chr  "Baked" "Baked" "Roasted" "Baked" ...
##  $ stuffing           : chr  "Bread-based" "Bread-based" "Rice-based" "Bread-based" ...
##  $ cranberry          : chr  "None" "Other (please specify)" "Homemade" "Homemade" ...
##  $ gravy              : chr  "Yes" "Yes" "Yes" "Yes" ...
##  $ brussel.sprouts    : chr  "" "" "Brussel sprouts" "Brussel sprouts" ...
##  $ carrots            : chr  "Carrots" "" "Carrots" "" ...
##  $ cauliflower        : chr  "" "" "Cauliflower" "" ...
##  $ corn               : chr  "" "Corn" "Corn" "" ...
##  $ cornbread          : chr  "" "" "Cornbread" "Cornbread" ...
##  $ fruit.salad        : chr  "" "" "" "" ...
##  $ green.beans        : chr  "Green beans/green bean casserole" "Green beans/green bean casserole" "" "" ...
##  $ mac.n.cheese       : chr  "Macaroni and cheese" "Macaroni and cheese" "" "" ...
##  $ mashed.potatoes    : chr  "Mashed potatoes" "Mashed potatoes" "Mashed potatoes" "Mashed potatoes" ...
##  $ rolls              : chr  "" "Rolls/biscuits" "Rolls/biscuits" "Rolls/biscuits" ...
##  $ squash             : chr  "" "" "" "" ...
##  $ salad              : chr  "" "Vegetable salad" "Vegetable salad" "Vegetable salad" ...
##  $ yams.sweet.potato  : chr  "Yams/sweet potato casserole" "Yams/sweet potato casserole" "" "Yams/sweet potato casserole" ...
##  $ apple.pie          : chr  "Apple" "Apple" "Apple" "" ...
##  $ buttermilk.pie     : chr  "" "" "" "" ...
##  $ cherry.pie         : chr  "" "" "Cherry" "" ...
##  $ chocolate.pie      : chr  "" "Chocolate" "" "" ...
##  $ coconut.pie        : chr  "" "" "" "" ...
##  $ keylime.pie        : chr  "" "" "" "" ...
##  $ peach.pie          : chr  "" "" "Peach" "" ...
##  $ pecan.pie          : chr  "" "" "Pecan" "Pecan" ...
##  $ pumpkin.pie        : chr  "" "Pumpkin" "Pumpkin" "Pumpkin" ...
##  $ sweet.potato.pie   : chr  "" "" "Sweet Potato" "" ...
##  $ apple.cobbler      : chr  "" "" "" "" ...
##  $ blondies           : chr  "" "" "" "" ...
##  $ brownies           : chr  "" "" "Brownies" "" ...
##  $ carrot.cake        : chr  "" "" "Carrot cake" "" ...
##  $ cheesecake         : chr  "Cheesecake" "Cheesecake" "" "" ...
##  $ cookies            : chr  "Cookies" "Cookies" "Cookies" "" ...
##  $ fudge              : chr  "" "" "Fudge" "" ...
##  $ ice.cream          : chr  "Ice cream" "" "Ice cream" "" ...
##  $ peach.cobbler      : chr  "" "" "" "" ...
##  $ pray               : chr  "Yes" "Yes" "Yes" "No" ...
##  $ friendsgiving      : chr  "No" "No" "Yes" "No" ...
##  $ black.friday       : chr  "No" "Yes" "Yes" "No" ...
##  $ area.live          : chr  "Suburban" "Rural" "Suburban" "Urban" ...
##  $ age                : chr  "18 - 29" "18 - 29" "18 - 29" "30 - 44" ...
##  $ gender             : chr  "Male" "Female" "Male" "Male" ...
##  $ income             : chr  "$75,000 to $99,999" "$50,000 to $74,999" "$0 to $9,999" "$200,000 and up" ...
##  $ DivName            : chr  "Middle Atlantic" "East South Central" "Mountain" "Pacific" ...
##  $ celebrate01        : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ gravy01            : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ friendsgiving01    : int  0 0 1 0 0 1 0 1 0 0 ...
##  $ black.friday01     : int  0 1 1 0 0 1 1 1 0 0 ...
##  $ brussel.sprouts01  : int  0 0 1 1 1 1 0 0 1 1 ...
##  $ carrots01          : int  1 0 1 0 0 1 0 1 0 1 ...
##  $ cauliflower01      : int  0 0 1 0 0 1 0 0 0 0 ...
##  $ corn01             : int  0 1 1 0 0 1 0 0 1 0 ...
##  $ cornbread01        : int  0 0 1 1 1 1 0 0 1 0 ...
##  $ fruit.salad01      : int  0 0 0 0 0 1 1 0 0 0 ...
##  $ green.beans01      : int  1 1 0 0 0 1 1 0 1 1 ...
##  $ mac.n.cheese01     : int  1 1 0 0 0 1 0 0 0 0 ...
##  $ mashed.potatoes01  : int  1 1 1 1 1 1 1 0 1 1 ...
##  $ rolls01            : int  0 1 1 1 1 1 1 0 1 1 ...
##  $ squash01           : int  0 0 0 0 1 1 0 0 1 0 ...
##  $ salad01            : int  0 1 1 1 1 1 0 0 0 0 ...
##  $ yams.sweet.potato01: int  1 1 0 1 1 1 1 0 0 1 ...
##  $ apple.pie01        : int  1 1 1 0 1 0 1 0 1 0 ...
##  $ buttermilk.pie01   : int  0 0 0 0 0 0 0 0 1 1 ...
##  $ cherry.pie01       : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ chocolate.pie01    : int  0 1 0 0 0 0 0 1 0 0 ...
##  $ coconut.pie01      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ keylime.pie01      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ peach.pie01        : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ pecan.pie01        : int  0 0 1 1 0 0 0 0 0 0 ...
##  $ pumpkin.pie01      : int  0 1 1 1 1 0 1 0 1 1 ...
##  $ sweet.potato.pie01 : int  0 0 1 0 0 1 0 0 1 1 ...
##  $ apple.cobbler01    : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ blondies01         : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ brownies01         : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ carrot.cake01      : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ cheesecake01       : int  1 1 0 0 0 1 0 0 0 0 ...
##  $ cookies01          : int  1 1 1 0 0 0 1 1 1 0 ...
##  $ fudge01            : int  0 0 1 0 0 0 0 0 0 0 ...
##  $ ice.cream01        : int  1 0 1 0 0 0 0 0 0 0 ...
##  $ peach.cobbler01    : int  0 0 0 0 0 0 0 0 0 0 ...

Organizing Data

sides<-c(
  "brussel.sprouts01",
  "carrots01",
  "cauliflower01",
  "corn01",
  "cornbread01",
  "fruit.salad01",
  "green.beans01",
  "mac.n.cheese01",
  "mashed.potatoes01",
  "rolls01",
  "salad01",
  "yams.sweet.potato01",
  "squash01"
)
regThanks<-useThanks%>%group_by(DivName)%>%summarize(
  n=n(),
  nCarrots=sum(carrots01),
  nSprouts=sum(brussel.sprouts01),
  nCauliflower=sum(cauliflower01),
  nCorn=sum(corn01),
  nCornBread=sum(cornbread01),
  nFruitSalad=sum(fruit.salad01),
  nGreenBeans=sum(green.beans01),
  nMacnCheese=sum(mac.n.cheese01),
  nMashedPotatoes=sum(mashed.potatoes01),
  nRolls=sum(rolls01),
  nSalad=sum(salad01),
  nYams=sum(yams.sweet.potato01),
  nSquash=sum(squash01)
)
## `summarise()` ungrouping output (override with `.groups` argument)

Create for the national level

natThanks<-useThanks%>%summarize(
  n=n(),
  nCarrots=sum(carrots01),
  nSprouts=sum(brussel.sprouts01),
  nCauliflower=sum(cauliflower01),
  nCorn=sum(corn01),
  nCornBread=sum(cornbread01),
  nFruitSalad=sum(fruit.salad01),
  nGreenBeans=sum(green.beans01),
  nMacnCheese=sum(mac.n.cheese01),
  nMashedPotatoes=sum(mashed.potatoes01),
  nRolls=sum(rolls01),
  nSalad=sum(salad01),
  nYams=sum(yams.sweet.potato01),
  nSquash=sum(squash01)
)

Add a column to add DivName

natThanks$DivName <- "National"

Join Regional and National

combineThanks <- rbind(regThanks, natThanks)

Proportions

propThanks<-combineThanks%>%mutate(
  propCarrots=nCarrots/n,
  propSprouts=nSprouts/n,
  propCauli=nCauliflower/n,
  propCorn=nCorn/n,
  propCBread=nCornBread/n,
  propFruitSalad=nFruitSalad/n,
  propBeans=nGreenBeans/n,
  propMac=nMacnCheese/n,
  propMashed=nMashedPotatoes/n,
  propRolls=nRolls/n,
  propSalad=nSalad/n,
  propYams=nYams/n,
  propSquash=nSquash/n
)

Create the difference between National and Regional Proportions

difThanks<-propThanks%>%
  group_by(DivName) %>%
  mutate(
  difCarrots=propCarrots - propThanks[11,16],
  difSprouts=propSprouts - propThanks[11,17],
  difCauli=propCauli - propThanks[11,18],
  difCorn=propCorn - propThanks[11,19],
  difCBread=propCBread - propThanks[11,20],
  difFruitSalad=propFruitSalad - propThanks[11,21],
  difBeans=propBeans - propThanks[11,22],
  difMac=propMac - propThanks[11,23],
  difMashed=propMashed - propThanks[11,24],
  difRolls=propRolls - propThanks[11,25],
  difSalad=propSalad - propThanks[11,26],
  difYams=propYams - propThanks[11,27],
  difSquash=propSquash - propThanks[11,28]
)

First select the columns you want the max from

onlydifThanks <- difThanks %>%
  select(difCarrots,difSprouts,difCauli,difCorn,difCBread,difFruitSalad,difBeans,difMac,difMashed,difRolls,difSalad,difYams,difSquash)
## Adding missing grouping variables: `DivName`

Find the max

onlydifThanks$Max <- colnames(onlydifThanks)[apply(onlydifThanks,1,which.max)]
## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

## Warning in FUN(newX[, i], ...): NAs introduced by coercion

First Map attempt

following hints

states <- usmap::us_map()
fips<-read.csv("https://raw.githubusercontent.com/kitadasmalley/FA2020_DataViz/main/data/stateFIPS.csv", 
               header=TRUE)

DivNames don’t match further clean up needed

fips$DivName <- gsub(" Division","",fips$DivName)

The name is funky, fix in the join

geoThanks<-fips%>%
  left_join(onlydifThanks)
## Joining, by = "DivName"
thankStates<-states %>%
  mutate(Name=full)%>%
  left_join(geoThanks, by = c("full" = "ï..Name"))
ggplot(thankStates, aes(x,y, group = group, fill = Max))+
  geom_polygon(colour = "grey")+
  coord_quickmap()

# Interesting. My graph is slightly different, beans appears in an area it doesn’t in the polished map. I checked my calculations and the data at the point, and mac is the second highest, but beans is the highest. It should be mac according to 538. At this point. I am moving forward to polishing instead of troubleshooting what occurred. The data looks right.

Saving First draft

ggsave("Midterm 2 - First Draft.pdf", height = 6, width = 8)

Polishing Notes!!

These are the major differences between my first plot and the 538 plot that I see:

  1. Remove legend
  2. Add text next to region
  3. Remove Axis
  4. Add Title
  5. Add Subtitle
  6. Fix colors?

Polishing notes will be noted below with comments

Second Graph Revision

ggplot(thankStates, aes(x,y, group = group, fill = Max))+
  geom_polygon(colour = "grey")+
  coord_quickmap()+
  coord_fixed()+ #Not originally thought of, but added to fix map
  theme_minimal() + #using this theme to help remove things, but not enough yet
  theme(axis.line=element_blank(),axis.text.x=element_blank(),
          axis.text.y=element_blank(),axis.ticks=element_blank(),axis.title.x=element_blank(), axis.title.y=element_blank(),legend.position="none",panel.background=element_blank(),panel.border=element_blank(),panel.grid.major=element_blank(),panel.grid.minor=element_blank(),plot.background=element_blank())+ #This fixes #1 and #3 removing the legend and removing the axis.
labs(title= "Side Dishes Of America's Regions", subtitle = "Most disproportionately common Thanksgiving side dish by Region", caption = "Source: Surveymonkey Audience")+ #Fixing #4 and #5. Cannot figure out how to add the survey date, and add the bar at the bottom for the caption
  scale_fill_manual(values=c("#dbd93d","#ffa8e9","#7d81e8","#809c30","#45bdbf","#cb94e3"))+ #This addresses #6, and was a lot of guessing with hex codes. I'm not that great at color matching, but I tried to get close
  annotate(geom="text", x=2400000, y=830000, label="Squash", color="#ffa8e9")+
  annotate(geom="text", x=2400000, y=-1000000, label="Mac & \n Cheese",color="#7d81e8")+
  annotate(geom="text", x=1250000, y=650000, label = "Rolls & \n Biscuits", color = "#809c30")+
  annotate(geom="text", x=-2100000, y=350000, label = "Salad", color="#45bdbf")+
  annotate(geom="text", x=150000, y=680000, label = "Green Beans/ \n Casserole", color="#dbd93d") +
  annotate(geom="text", x=700000, y=-2000000, label = "Cornbread",color="#cb94e3") #This addresses #2, and was a lot of moving and guessing
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.

ggsave("Midterm 2 - Final.pdf", height = 6, width = 8)

Polishing Notes final thoughts

I left all polishing notes in line with the code I wrote as I updated the graph. There is still a bunch I didn’t know how to do, add a bar at the bottom. I probably could have explored changing/bolding the font. I could have gotten closer with my colors if I were a better color match. The moving around of annotate was a lot of second guessing myself and trying to get it in the general area. But I think that I got close to the original graph.