Introduction

I asked 1,058 respondents a SurveyMonkey poll the following questions about their Thanksgiving:

DataSet: https://fivethirtyeight.com/features/heres-what-your-part-of-america-eats-on-thanksgiving/

Github Link: https://github.com/fivethirtyeight/data/blob/master/thanksgiving-2015/thanksgiving-2015-poll-data.csv

#loading required libraries
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3     v purrr   0.3.4
## v tibble  3.0.6     v dplyr   1.0.4
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(curl)
## 
## Attaching package: 'curl'
## The following object is masked from 'package:readr':
## 
##     parse_date
#data import from the source
poll_dataframe <- read.csv('https://raw.githubusercontent.com/keshaws/CUNY_MSDS_2020/master/DATA607/Week1/data/thanksgiving-2015-poll-data.csv')

dim(poll_dataframe)
## [1] 1058   65
colnames(poll_dataframe)
##  [1] "RespondentID"                                                                                                                                
##  [2] "Do.you.celebrate.Thanksgiving."                                                                                                              
##  [3] "What.is.typically.the.main.dish.at.your.Thanksgiving.dinner."                                                                                
##  [4] "What.is.typically.the.main.dish.at.your.Thanksgiving.dinner....Other..please.specify."                                                       
##  [5] "How.is.the.main.dish.typically.cooked."                                                                                                      
##  [6] "How.is.the.main.dish.typically.cooked....Other..please.specify."                                                                             
##  [7] "What.kind.of.stuffing.dressing.do.you.typically.have."                                                                                       
##  [8] "What.kind.of.stuffing.dressing.do.you.typically.have....Other..please.specify."                                                              
##  [9] "What.type.of.cranberry.saucedo.you.typically.have."                                                                                          
## [10] "What.type.of.cranberry.saucedo.you.typically.have....Other..please.specify."                                                                 
## [11] "Do.you.typically.have.gravy."                                                                                                                
## [12] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Brussel.sprouts"                 
## [13] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Carrots"                         
## [14] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Cauliflower"                     
## [15] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Corn"                            
## [16] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Cornbread"                       
## [17] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Fruit.salad"                     
## [18] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Green.beans.green.bean.casserole"
## [19] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Macaroni.and.cheese"             
## [20] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Mashed.potatoes"                 
## [21] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Rolls.biscuits"                  
## [22] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Squash"                          
## [23] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Vegetable.salad"                 
## [24] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Yams.sweet.potato.casserole"     
## [25] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Other..please.specify."          
## [26] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Other..please.specify..1"        
## [27] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Apple"                                    
## [28] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Buttermilk"                               
## [29] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Cherry"                                   
## [30] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Chocolate"                                
## [31] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Coconut.cream"                            
## [32] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Key.lime"                                 
## [33] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Peach"                                    
## [34] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Pecan"                                    
## [35] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Pumpkin"                                  
## [36] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Sweet.Potato"                             
## [37] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....None"                                     
## [38] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Other..please.specify."                   
## [39] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Other..please.specify..1"                 
## [40] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Apple.cobbler"                       
## [41] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Blondies"                            
## [42] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Brownies"                            
## [43] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Carrot.cake"                         
## [44] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Cheesecake"                          
## [45] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Cookies"                             
## [46] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Fudge"                               
## [47] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Ice.cream"                           
## [48] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Peach.cobbler"                       
## [49] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......None"                                
## [50] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Other..please.specify."              
## [51] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Other..please.specify..1"            
## [52] "Do.you.typically.pray.before.or.after.the.Thanksgiving.meal."                                                                                
## [53] "How.far.will.you.travel.for.Thanksgiving."                                                                                                   
## [54] "Will.you.watch.any.of.the.following.programs.on.Thanksgiving..Please.select.all.that.apply....Macy.s.Parade"                                 
## [55] "What.s.the.age.cutoff.at.your..kids..table..at.Thanksgiving."                                                                                
## [56] "Have.you.ever.tried.to.meet.up.with.hometown.friends.on.Thanksgiving.night."                                                                 
## [57] "Have.you.ever.attended.a..Friendsgiving.."                                                                                                   
## [58] "Will.you.shop.any.Black.Friday.sales.on.Thanksgiving.Day."                                                                                   
## [59] "Do.you.work.in.retail."                                                                                                                      
## [60] "Will.you.employer.make.you.work.on.Black.Friday."                                                                                            
## [61] "How.would.you.describe.where.you.live."                                                                                                      
## [62] "Age"                                                                                                                                         
## [63] "What.is.your.gender."                                                                                                                        
## [64] "How.much.total.combined.money.did.all.members.of.your.HOUSEHOLD.earn.last.year."                                                             
## [65] "US.Region"
poll_dataframe_subset <- select(poll_dataframe, 'RespondentID', 'Do.you.celebrate.Thanksgiving.', 'What.is.your.gender.', 'Age', 'How.would.you.describe.where.you.live.', 'US.Region')
head(poll_dataframe_subset,10)
##    RespondentID Do.you.celebrate.Thanksgiving. What.is.your.gender.     Age
## 1    4337954960                            Yes                 Male 18 - 29
## 2    4337951949                            Yes               Female 18 - 29
## 3    4337935621                            Yes                 Male 18 - 29
## 4    4337933040                            Yes                 Male 30 - 44
## 5    4337931983                            Yes                 Male 30 - 44
## 6    4337929779                            Yes                 Male 18 - 29
## 7    4337924420                            Yes                 Male 18 - 29
## 8    4337916002                            Yes                 Male 18 - 29
## 9    4337914977                            Yes                 Male 30 - 44
## 10   4337899817                            Yes                 Male 30 - 44
##    How.would.you.describe.where.you.live.          US.Region
## 1                                Suburban    Middle Atlantic
## 2                                   Rural East South Central
## 3                                Suburban           Mountain
## 4                                   Urban            Pacific
## 5                                   Urban            Pacific
## 6                                   Urban            Pacific
## 7                                   Rural East North Central
## 8                                   Rural           Mountain
## 9                                   Urban    Middle Atlantic
## 10                               Suburban East South Central
poll_dataframe_data <- rename(poll_dataframe_subset,ID='RespondentID',celebrate='Do.you.celebrate.Thanksgiving.', gender = 'What.is.your.gender.', age_range='Age', living_region = 'How.would.you.describe.where.you.live.', us_region='US.Region')
head(poll_dataframe_data,5)
##           ID celebrate gender age_range living_region          us_region
## 1 4337954960       Yes   Male   18 - 29      Suburban    Middle Atlantic
## 2 4337951949       Yes Female   18 - 29         Rural East South Central
## 3 4337935621       Yes   Male   18 - 29      Suburban           Mountain
## 4 4337933040       Yes   Male   30 - 44         Urban            Pacific
## 5 4337931983       Yes   Male   30 - 44         Urban            Pacific
unique(poll_dataframe_data$living_region)
## [1] "Suburban" "Rural"    "Urban"    ""
poll_dataframe_data %>%
 group_by(living_region) %>%
 summarize(count=n())
## # A tibble: 4 x 2
##   living_region count
## * <chr>         <int>
## 1 ""              110
## 2 "Rural"         216
## 3 "Suburban"      496
## 4 "Urban"         236
poll_dataframe_data %>%
 group_by(us_region) %>%
 summarize(count=n())
## # A tibble: 10 x 2
##    us_region            count
##  * <chr>                <int>
##  1 ""                      59
##  2 "East North Central"   150
##  3 "East South Central"    60
##  4 "Middle Atlantic"      159
##  5 "Mountain"              47
##  6 "New England"           58
##  7 "Pacific"              146
##  8 "South Atlantic"       214
##  9 "West North Central"    74
## 10 "West South Central"    91
poll_data_cleaned <- poll_dataframe_data %>%
      filter((poll_dataframe_data$us_region!=""))

poll_data_cleaned %>%
 group_by(us_region) %>%
 summarize(count=n())
## # A tibble: 9 x 2
##   us_region          count
## * <chr>              <int>
## 1 East North Central   150
## 2 East South Central    60
## 3 Middle Atlantic      159
## 4 Mountain              47
## 5 New England           58
## 6 Pacific              146
## 7 South Atlantic       214
## 8 West North Central    74
## 9 West South Central    91
poll_data_cleaned <- poll_dataframe_data %>%
      filter((poll_dataframe_data$us_region!=""))

poll_data_cleaned %>%
 group_by(us_region, celebrate) %>%
 summarize(count=n())
## `summarise()` has grouped output by 'us_region'. You can override using the `.groups` argument.
## # A tibble: 18 x 3
## # Groups:   us_region [9]
##    us_region          celebrate count
##    <chr>              <chr>     <int>
##  1 East North Central No            5
##  2 East North Central Yes         145
##  3 East South Central No            4
##  4 East South Central Yes          56
##  5 Middle Atlantic    No           14
##  6 Middle Atlantic    Yes         145
##  7 Mountain           No            6
##  8 Mountain           Yes          41
##  9 New England        No            3
## 10 New England        Yes          55
## 11 Pacific            No           16
## 12 Pacific            Yes         130
## 13 South Atlantic     No           11
## 14 South Atlantic     Yes         203
## 15 West North Central No            3
## 16 West North Central Yes          71
## 17 West South Central No            6
## 18 West South Central Yes          85
ggplot(poll_dataframe_data, mapping = aes(poll_dataframe_data$living_region))+
    geom_bar(aes(fill=poll_dataframe_data$age_range))+
    xlab('Living Region')+labs(fill='gender')+geom_text(stat='count',aes(label = ..count..,y=..count..),vjust=-0.2)+
    ggtitle('Poll data')

ggplot(poll_data_cleaned, mapping = aes(poll_data_cleaned$us_region))+
    geom_bar(aes(fill=poll_data_cleaned$celebrate))+
    xlab('US Region')+labs(fill='age_range')+geom_text(stat='count',aes(label = ..count..,y=..count..),vjust=-0.2)+
    theme(axis.text.x = element_text(angle = 90))+
    ggtitle('Poll data')

Conclusion

The main data set has 1058 data points and 65 features. I have created a subset and cleaned it for the US Region. From the data visualization it 68 said yes and 931 said no. From the plot, thanksgiving celebration is popular in suburban living region. In addition, thanksgiving celebration is least popular in “mountain” region and most popular in “south atlantic” region