Option 2: Recreate

Upload data (cleaned by Heather)

tg<- read_csv("CLEAN thanksgiving data.csv")
## Rows: 1058 Columns: 83
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (46): celebrate, main, cooked, stuffing, cranberry, gravy, brussel.sprou...
## dbl (37): id, celebrate01, gravy01, friendsgiving01, black.friday01, brussel...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(tg)
## # A tibble: 6 × 83
##       id celebrate main  cooked stuffing cranberry gravy brussel.sprouts carrots
##    <dbl> <chr>     <chr> <chr>  <chr>    <chr>     <chr> <chr>           <chr>  
## 1 4.34e9 Yes       Turk… Baked  Bread-b… None      Yes   <NA>            Carrots
## 2 4.34e9 Yes       Turk… Baked  Bread-b… Other (p… Yes   <NA>            <NA>   
## 3 4.34e9 Yes       Turk… Roast… Rice-ba… Homemade  Yes   Brussel sprouts Carrots
## 4 4.34e9 Yes       Turk… Baked  Bread-b… Homemade  Yes   Brussel sprouts <NA>   
## 5 4.34e9 Yes       Tofu… Baked  Bread-b… Canned    Yes   Brussel sprouts <NA>   
## 6 4.34e9 Yes       Turk… Roast… Rice-ba… Homemade  Yes   Brussel sprouts Carrots
## # … with 74 more variables: cauliflower <chr>, corn <chr>, cornbread <chr>,
## #   fruit.salad <chr>, green.beans <chr>, mac.n.cheese <chr>,
## #   mashed.potatoes <chr>, rolls <chr>, squash <chr>, salad <chr>,
## #   yams.sweet.potato <chr>, apple.pie <chr>, buttermilk.pie <chr>,
## #   cherry.pie <chr>, chocolate.pie <chr>, coconut.pie <chr>,
## #   keylime.pie <chr>, peach.pie <chr>, pecan.pie <chr>, pumpkin.pie <chr>,
## #   sweet.potato.pie <chr>, apple.cobbler <chr>, blondies <chr>, …

Wrangle data

Hint 1: group_by and summarize

According to the variable breakdown (https://github.com/fivethirtyeight/data/tree/master/thanksgiving-2015), the side dish options are brussel sprouts, carrots, cauliflower, corn, cornbread, fruit salad, green beans/green bean casserole, macaroni and cheese, mashed potatoes, rolls/biscuits, vegetable salad, yams/sweet potato casserole, and a category for “other”. We include these variables along with squash (from the “other” category presumably) in our attempt to recreate FiveThirtyEight’s side dish graphic.

#Number of households in each region
#Number of households who serve the side menu items, by region
tg%>%
  group_by(DivName)%>%
  summarize(n=n())
## # A tibble: 10 × 2
##    DivName                n
##    <chr>              <int>
##  1 East North Central   150
##  2 East South Central    60
##  3 Middle Atlantic      159
##  4 Mountain              47
##  5 New England           58
##  6 Pacific              146
##  7 South Atlantic       214
##  8 West North Central    74
##  9 West South Central    91
## 10 <NA>                  59
#Number of households who serve the side menu items, by region
tg%>%
  group_by(DivName)%>%
  summarize(sum(brussel.sprouts01), sum(carrots01), sum(cauliflower01),
            sum(corn01), sum(cornbread01), sum(fruit.salad01), 
            sum(green.beans01), sum(mac.n.cheese01), sum(mashed.potatoes01), 
            sum(rolls01), sum(squash01), sum(salad01), sum(yams.sweet.potato01))
## # A tibble: 10 × 14
##    DivName    `sum(brussel.spro… `sum(carrots01)` `sum(cauliflowe… `sum(corn01)`
##    <chr>                   <dbl>            <dbl>            <dbl>         <dbl>
##  1 East Nort…                 22               28               13            76
##  2 East Sout…                  7               15                5            31
##  3 Middle At…                 41               45               25            77
##  4 Mountain                    6               11                4            17
##  5 New Engla…                 12               25                4            22
##  6 Pacific                    29               30               18            55
##  7 South Atl…                 27               48               11            96
##  8 West Nort…                  3               12                3            36
##  9 West Sout…                  4               21                3            46
## 10 <NA>                        4                7                2             8
## # … with 9 more variables: sum(cornbread01) <dbl>, sum(fruit.salad01) <dbl>,
## #   sum(green.beans01) <dbl>, sum(mac.n.cheese01) <dbl>,
## #   sum(mashed.potatoes01) <dbl>, sum(rolls01) <dbl>, sum(squash01) <dbl>,
## #   sum(salad01) <dbl>, sum(yams.sweet.potato01) <dbl>
tgsum<- tg%>%
  group_by(DivName)%>%
  summarize(n=n(), sumBrussel=sum(brussel.sprouts01), sumCarrots=sum(carrots01), sumCauliflower=sum(cauliflower01),
            sumCorn=sum(corn01), sumCornbread=sum(cornbread01), sumFruitSalad=sum(fruit.salad01), 
            sumGreenBeans=sum(green.beans01), sumMac=sum(mac.n.cheese01), sumMashedPotatoes=sum(mashed.potatoes01), 
            sumRolls=sum(rolls01), sumSquash=sum(squash01), sumSalad=sum(salad01), sumYams=sum(yams.sweet.potato01))
tgsum%>%
  mutate(propBrussel=sumBrussel/n, propCarrots=sumCarrots/n,
         propCauliflower=sumCauliflower/n, propCorn=sumCorn/n,
         propCornbread=sumCornbread/n, propFruitSalad=sumFruitSalad/n,
         propGreenBeans=sumGreenBeans/n, propMac=sumMac/n,
         propMashedPotatoes=sumMashedPotatoes/n, propRolls=sumRolls/n,
         propSquash = sumSquash/n, propSalad=sumSalad/n, 
         propYams = sumYams/n)
## # A tibble: 10 × 28
##    DivName           n sumBrussel sumCarrots sumCauliflower sumCorn sumCornbread
##    <chr>         <int>      <dbl>      <dbl>          <dbl>   <dbl>        <dbl>
##  1 East North C…   150         22         28             13      76           23
##  2 East South C…    60          7         15              5      31           16
##  3 Middle Atlan…   159         41         45             25      77           33
##  4 Mountain         47          6         11              4      17           10
##  5 New England      58         12         25              4      22           10
##  6 Pacific         146         29         30             18      55           37
##  7 South Atlant…   214         27         48             11      96           53
##  8 West North C…    74          3         12              3      36           12
##  9 West South C…    91          4         21              3      46           34
## 10 <NA>             59          4          7              2       8            7
## # … with 21 more variables: sumFruitSalad <dbl>, sumGreenBeans <dbl>,
## #   sumMac <dbl>, sumMashedPotatoes <dbl>, sumRolls <dbl>, sumSquash <dbl>,
## #   sumSalad <dbl>, sumYams <dbl>, propBrussel <dbl>, propCarrots <dbl>,
## #   propCauliflower <dbl>, propCorn <dbl>, propCornbread <dbl>,
## #   propFruitSalad <dbl>, propGreenBeans <dbl>, propMac <dbl>,
## #   propMashedPotatoes <dbl>, propRolls <dbl>, propSquash <dbl>,
## #   propSalad <dbl>, propYams <dbl>
Hint 2: mutate
tgsum<- tgsum%>%
  mutate(propBrussel=sumBrussel/n, propCarrots=sumCarrots/n,
         propCauliflower=sumCauliflower/n, propCorn=sumCorn/n,
         propCornbread=sumCornbread/n, propFruitSalad=sumFruitSalad/n,
         propGreenBeans=sumGreenBeans/n, propMac=sumMac/n,
         propMashedPotatoes=sumMashedPotatoes/n, propRolls=sumRolls/n,
         propSquash = sumSquash/n, propSalad=sumSalad/n, 
         propYams = sumYams/n)
Hint 3: Census statistics
#Loading data for national population values
popDiv<-data.frame(DivName=c("East North Central",
                             "East South Central", 
                             "Middle Atlantic", 
                             "Mountain", 
                             "New England", 
                             "Pacific", 
                             "South Atlantic", 
                             "West North Central", 
                             "West South Central"), 
                   pop=c(46798649, 
                         18931477,
                         41601787,
                         23811346,
                         14757573,
                         52833604,
                         63991523,
                         21179519,
                         39500457))%>%
  mutate(popProp=pop/323405935)

popDiv
##              DivName      pop    popProp
## 1 East North Central 46798649 0.14470560
## 2 East South Central 18931477 0.05853782
## 3    Middle Atlantic 41601787 0.12863644
## 4           Mountain 23811346 0.07362681
## 5        New England 14757573 0.04563173
## 6            Pacific 52833604 0.16336622
## 7     South Atlantic 63991523 0.19786750
## 8 West North Central 21179519 0.06548896
## 9 West South Central 39500457 0.12213894
Hint 4: National trends/weighted averages
#Join national population values with summarized data set
tgpop<- left_join(tgsum, popDiv)
## Joining, by = "DivName"
tgpop
## # A tibble: 10 × 30
##    DivName           n sumBrussel sumCarrots sumCauliflower sumCorn sumCornbread
##    <chr>         <int>      <dbl>      <dbl>          <dbl>   <dbl>        <dbl>
##  1 East North C…   150         22         28             13      76           23
##  2 East South C…    60          7         15              5      31           16
##  3 Middle Atlan…   159         41         45             25      77           33
##  4 Mountain         47          6         11              4      17           10
##  5 New England      58         12         25              4      22           10
##  6 Pacific         146         29         30             18      55           37
##  7 South Atlant…   214         27         48             11      96           53
##  8 West North C…    74          3         12              3      36           12
##  9 West South C…    91          4         21              3      46           34
## 10 <NA>             59          4          7              2       8            7
## # … with 23 more variables: sumFruitSalad <dbl>, sumGreenBeans <dbl>,
## #   sumMac <dbl>, sumMashedPotatoes <dbl>, sumRolls <dbl>, sumSquash <dbl>,
## #   sumSalad <dbl>, sumYams <dbl>, propBrussel <dbl>, propCarrots <dbl>,
## #   propCauliflower <dbl>, propCorn <dbl>, propCornbread <dbl>,
## #   propFruitSalad <dbl>, propGreenBeans <dbl>, propMac <dbl>,
## #   propMashedPotatoes <dbl>, propRolls <dbl>, propSquash <dbl>,
## #   propSalad <dbl>, propYams <dbl>, pop <dbl>, popProp <dbl>
#Weight the proportions 
tgpop<- tgpop%>%
  mutate(wPropBrussel=propBrussel*popProp, wPropCarrots=propCarrots*popProp,
         wPropCauliflower=propCauliflower*popProp, wPropCorn=propCorn*popProp,
         wPropCornbread=propCornbread*popProp, wPropFruitSalad=propFruitSalad*popProp,
         wPropGreenBeans=propGreenBeans*popProp, wPropMac=propMac*popProp,
         wPropMashedPotatoes=propMashedPotatoes*popProp, wPropRolls=propRolls*popProp,
         wPropSquash=propSquash*popProp, wPropSalad=propSalad*popProp,
         wPropYams=propYams*popProp)

#Compute national level values
natPropBrussel<- sum(tgpop$wPropBrussel, na.rm=TRUE)
natPropBrussel
## [1] 0.1455013
natPropCarrots<- sum(tgpop$wPropCarrots, na.rm=TRUE)
natPropCarrots
## [1] 0.231709
natPropCauliflower<- sum(tgpop$wPropCauliflower, na.rm=TRUE)
natPropCauliflower
## [1] 0.0840516
natPropCorn<- sum(tgpop$wPropCorn, na.rm=TRUE)
natPropCorn
## [1] 0.4537023
natPropCornbread<- sum(tgpop$wPropCornbread, na.rm=TRUE)
natPropCornbread
## [1] 0.234689
natPropFruitSalad<- sum(tgpop$wPropFruitSalad, na.rm=TRUE)
natPropFruitSalad
## [1] 0.2124256
natPropGreenBeans<- sum(tgpop$wPropGreenBeans, na.rm=TRUE)
natPropGreenBeans
## [1] 0.6704939
natPropMac<- sum(tgpop$wPropMac, na.rm=TRUE)
natPropMac
## [1] 0.1969969
natPropMashedPotatoes<- sum(tgpop$wPropMashedPotatoes, na.rm=TRUE)
natPropMashedPotatoes
## [1] 0.7939981
natPropRolls<- sum(tgpop$wPropRolls, na.rm=TRUE)
natPropRolls
## [1] 0.745888
natPropSquash<- sum(tgpop$wPropSquash, na.rm=TRUE)
natPropSquash
## [1] 0.1541902
natPropSalad<- sum(tgpop$wPropSalad, na.rm=TRUE)
natPropSalad
## [1] 0.2001446
natPropYams<- sum(tgpop$wPropYams, na.rm=TRUE)
natPropYams
## [1] 0.619342

Now that we have the national proportions we can see which regions are disproportionately into certain side dishes!!!

Hint 5: Differences

To find difference, for each region we want to subtract the national proportion from the regional proportion

tgdiff<-tgpop%>%
  filter(!is.na(DivName))%>%
  mutate(diffBrussel = propBrussel - natPropBrussel,
         diffCarrots = propCarrots - natPropCarrots,
         diffCauliflower = propCauliflower - natPropCauliflower,
         diffCorn = propCorn - natPropCorn,
         diffCornbread = propCornbread - natPropCornbread,
         diffFruitSalad = propFruitSalad - natPropFruitSalad,
         diffGreenBeans = propGreenBeans - natPropGreenBeans,
         diffMac = propMac - natPropMac,
         diffMashedPotatoes = propMashedPotatoes - natPropMashedPotatoes,
         diffRolls = propRolls - natPropRolls,
         diffSquash = propSquash - natPropSquash,
         diffSalad = propSalad - natPropSalad,
         diffYams = propYams - natPropYams)%>%
  select(DivName, diffBrussel, diffCarrots, diffCauliflower, diffCorn,
         diffCornbread, diffFruitSalad, diffGreenBeans, diffMac, 
         diffMashedPotatoes, diffRolls, diffSquash, diffSalad,
         diffYams)
tgdiff
## # A tibble: 9 × 14
##   DivName         diffBrussel diffCarrots diffCauliflower diffCorn diffCornbread
##   <chr>                 <dbl>       <dbl>           <dbl>    <dbl>         <dbl>
## 1 East North Cen…     0.00117   -0.0450          0.00262   0.0530        -0.0814
## 2 East South Cen…    -0.0288     0.0183         -0.000718  0.0630         0.0320
## 3 Middle Atlantic     0.112      0.0513          0.0732    0.0306        -0.0271
## 4 Mountain           -0.0178     0.00233         0.00105  -0.0920        -0.0219
## 5 New England         0.0614     0.199          -0.0151   -0.0744        -0.0623
## 6 Pacific             0.0531    -0.0262          0.0392   -0.0770         0.0187
## 7 South Atlantic     -0.0193    -0.00741        -0.0326   -0.00510        0.0130
## 8 West North Cen…    -0.105     -0.0695         -0.0435    0.0328        -0.0725
## 9 West South Cen…    -0.102     -0.000940       -0.0511    0.0518         0.139 
## # … with 8 more variables: diffFruitSalad <dbl>, diffGreenBeans <dbl>,
## #   diffMac <dbl>, diffMashedPotatoes <dbl>, diffRolls <dbl>, diffSquash <dbl>,
## #   diffSalad <dbl>, diffYams <dbl>

I’ll try to return to this if I can do use R to find the greatest spreads but it’s giving me a huge headache (trying to use gather/spread functions to calculate the maximum across rows instead of doswn columns but it’s not giving me what I want) so I’m gonna do it the gold old-fashioned way and look at the table.

We observe the following side dishes are the most disproportionately popular for each region: East North Central: ROLLS! (At 0.074) East South Central: MAC N CHEESE! (At 0.153) Middle Atlantic: SQUASH! (At 0.1225) Mountain: SALAD! (At 0.055) New England: SQUASH! (At 0.380) Pacific: SALAD! (At 0.067) South Atlantic: MAC N CHEESE! (At 0.172) West North Central: GREEN BEANS! (At 0.140) West South Central: CORNBREAD! (At 0.139)

Hint 6: Summarize to region/division favorites
sides<- c("Rolls","Mac N Cheese","Squash","Salad","Squash","Salad","Mac N Cheese","Green Beans","Cornbread")
favorites<- tgdiff%>%
  select(DivName)%>%
  mutate(favSides=sides, DivName=paste(DivName, " Division", sep=""))
favorites
## # A tibble: 9 × 2
##   DivName                     favSides    
##   <chr>                       <chr>       
## 1 East North Central Division Rolls       
## 2 East South Central Division Mac N Cheese
## 3 Middle Atlantic Division    Squash      
## 4 Mountain Division           Salad       
## 5 New England Division        Squash      
## 6 Pacific Division            Salad       
## 7 South Atlantic Division     Mac N Cheese
## 8 West North Central Division Green Beans 
## 9 West South Central Division Cornbread
Hint 7: Import maps package
#usmap library called earlier with other libraries 
states <- usmap::us_map()
head(states)
##         x        y order  hole piece group fips abbr    full
## 1 1091779 -1380695     1 FALSE     1  01.1   01   AL Alabama
## 2 1091268 -1376372     2 FALSE     1  01.1   01   AL Alabama
## 3 1091140 -1362998     3 FALSE     1  01.1   01   AL Alabama
## 4 1090940 -1343517     4 FALSE     1  01.1   01   AL Alabama
## 5 1090913 -1341006     5 FALSE     1  01.1   01   AL Alabama
## 6 1090796 -1334480     6 FALSE     1  01.1   01   AL Alabama
Hint 8: Join to your data
fips<-read.csv("https://raw.githubusercontent.com/kitadasmalley/FA2020_DataViz/main/data/stateFIPS.csv", 
               header=TRUE)

geoPie<-fips%>%
  left_join(favorites)
## Joining, by = "DivName"
head(geoPie)
##            Name State Region       RegionName Division              DivName
## 1   Connecticut     9      1 Northeast Region        1 New England Division
## 2         Maine    23      1 Northeast Region        1 New England Division
## 3 Massachusetts    25      1 Northeast Region        1 New England Division
## 4 New Hampshire    33      1 Northeast Region        1 New England Division
## 5  Rhode Island    44      1 Northeast Region        1 New England Division
## 6       Vermont    50      1 Northeast Region        1 New England Division
##   favSides
## 1   Squash
## 2   Squash
## 3   Squash
## 4   Squash
## 5   Squash
## 6   Squash
foodStates<-states %>%
  mutate(Name=full)%>%
  left_join(geoPie)
## Joining, by = "Name"
head(foodStates)
##         x        y order  hole piece group fips abbr    full    Name State
## 1 1091779 -1380695     1 FALSE     1  01.1   01   AL Alabama Alabama     1
## 2 1091268 -1376372     2 FALSE     1  01.1   01   AL Alabama Alabama     1
## 3 1091140 -1362998     3 FALSE     1  01.1   01   AL Alabama Alabama     1
## 4 1090940 -1343517     4 FALSE     1  01.1   01   AL Alabama Alabama     1
## 5 1090913 -1341006     5 FALSE     1  01.1   01   AL Alabama Alabama     1
## 6 1090796 -1334480     6 FALSE     1  01.1   01   AL Alabama Alabama     1
##   Region   RegionName Division                     DivName     favSides
## 1      3 South Region        6 East South Central Division Mac N Cheese
## 2      3 South Region        6 East South Central Division Mac N Cheese
## 3      3 South Region        6 East South Central Division Mac N Cheese
## 4      3 South Region        6 East South Central Division Mac N Cheese
## 5      3 South Region        6 East South Central Division Mac N Cheese
## 6      3 South Region        6 East South Central Division Mac N Cheese

Step 2: Make first attempt at creating a graphic

Use geom_poly in ggplot

ggplot(foodStates, aes(x=x,y=y, group=group))+
  geom_polygon()

Step 3: Color palette

Bonus points for creating my own palette!

Brainstorming colors: This graph features a qualitative palette varying primarily in hue (as opposed to luminance or chroma) since these side dishes are unordered, as far as this graph is concerned (while some are MORE disproportionately popular than others, that is not the point of this graph). This website https://color.adobe.com/create/image will even allow me to identify the colors of the original graph (or close enough at least).

ggplot(foodStates, aes(x=x,y=y, group=group))+
  geom_polygon(aes(fill=favSides), color='grey')+
  scale_fill_manual(values=c("#F29979","#F2CA50","#8FAFD9","#BCD97E","#5ABFB5","#E39FC5"))

#ggsave("firstgraphic.pdf",height=5,width=7)

Step 4: Brainstorm/Plan

What polishing needs to be done?

  • Add graph title and subtitle

  • Add caption with source

  • Add labels to fills (matching in color)

  • Generally, let’s try applying the FiveThirtyEight theme

  • remove legend and axis grid/text

Step 5: Update your plot

ggplot(foodStates, aes(x=x,y=y, group=group))+
  geom_polygon(aes(fill=favSides), color='grey')+
  scale_fill_manual(values=c("#F29979","#F2CA50","#8FAFD9","#BCD97E","#5ABFB5","#E39FC5"))+
  theme_fivethirtyeight()+
  labs(title="Side Dishes of America's Regions",
       subtitle = "Most disproportionately common Thanksgiving side dish by region",
       caption = "SOURCE: SURVEYMONKEY AUDIENCE, VIA FiveThirtyEight")+
  theme(panel.grid.major = element_blank(),
        axis.text=element_blank(),
        legend.position='none')+
  coord_fixed()+
  annotate("text", x=-2.1e+06, y=.5e+06, label="SALAD",color='#5ABFB5', size=3, fontface='bold')+
  annotate("text", x=0,y=.7e+06, label="GREEN BEANS/ \n CASSEROLE", color='#F2CA50', size=3, fontface='bold')+
  annotate("text", x=1.3e+06,y=.5e+06, label="ROLLS/ \n BISCUITS", color='#BCD97E', size=3, fontface='bold')+
  annotate("text", x=2e+06,y=.8e+06, label="SQUASH", color='#E39FC5', size=3, fontface='bold')+
  annotate("text", x=.8e+06,y=-1.9e+06, label="CORNBREAD", color='#F29979', size=3, fontface='bold')+
  annotate("text", x=2.2e+06,y=-1.4e+06, label="MAC & \nCHEESE", color='#8FAFD9', size=3, fontface='bold')

Step 6: Final polished plot

For final polishing I want to make it as close as I am capable of! Going to try to add additional annotations and mess with text size, etc.

ggplot(foodStates, aes(x=x,y=y, group=group))+
  geom_polygon(aes(fill=favSides), color="#636E72", size=.2)+
  scale_fill_manual(values=c("#F29979","#F2CA50","#8FAFD9","#BCD97E","#5ABFB5","#E39FC5"))+
  theme_fivethirtyeight()+
  labs(title="Side Dishes of America's Regions",
       subtitle = "Most disproportionately common Thanksgiving side dish by region",
       caption = "SOURCE: SURVEYMONKEY AUDIENCE, VIA FiveThirtyEight")+
  theme(panel.grid.major = element_blank(),
        axis.text=element_blank(),
        legend.position='none',
        plot.caption=element_text(size=6, face='bold', color='grey'),
        plot.title = element_text(size=15),
        plot.subtitle = element_text(size=10.5))+
  coord_fixed()+
  annotate("text", x=-2.1e+06, y=.5e+06, label="SALAD",color='#5ABFB5', size=3, fontface='bold')+
  annotate("text", x=0,y=.7e+06, label="GREEN BEANS/ \n CASSEROLE", color='#F2CA50', size=3, fontface='bold')+
  annotate("text", x=1.3e+06,y=.5e+06, label="ROLLS/ \n BISCUITS", color='#BCD97E', size=3, fontface='bold')+
  annotate("text", x=2e+06,y=.8e+06, label="SQUASH", color='#E39FC5', size=3, fontface='bold')+
  annotate("text", x=.8e+06,y=-1.9e+06, label="CORNBREAD", color='#F29979', size=3, fontface='bold')+
  annotate("text", x=2.2e+06,y=-1.4e+06, label="MAC & \nCHEESE", color='#8FAFD9', size=3, fontface='bold')+
  geom_hline(yintercept=1.1e+06, color='grey')+
  geom_hline(yintercept=1.4e+06, color='grey')+
  annotate("text", x=-1.8e+06, y=1.3e+06, label="SURVERY DATES", size=2, fontface='bold', color="#636E72")+
  annotate("text", x=-1.9e+06, y=1.2e+06, label="11/17/2015", size=2, color="#636E72")+
  annotate("text", x=1e+06, y=1.3e+06, label="NO. OF RESPONDENTS", size=2, fontface='bold', color="#636E72")+
  annotate("text", x=0.6e+06, y=1.2e+06, label="931", size=2, color="#636E72")

#ggsave("finalgraphic.pdf",height=5,width=5)

Note: submit pdfs of first graphic AND final graphic along with notes and code in this RMarkdown.