I asked 1,058 respondents a SurveyMonkey poll the following questions about their Thanksgiving:
DataSet: https://fivethirtyeight.com/features/heres-what-your-part-of-america-eats-on-thanksgiving/
Github Link: https://github.com/fivethirtyeight/data/blob/master/thanksgiving-2015/thanksgiving-2015-poll-data.csv
#loading required libraries
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.3 v purrr 0.3.4
## v tibble 3.0.6 v dplyr 1.0.4
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(curl)
##
## Attaching package: 'curl'
## The following object is masked from 'package:readr':
##
## parse_date
#data import from the source
poll_dataframe <- read.csv('https://raw.githubusercontent.com/keshaws/CUNY_MSDS_2020/master/DATA607/Week1/data/thanksgiving-2015-poll-data.csv')
dim(poll_dataframe)
## [1] 1058 65
colnames(poll_dataframe)
## [1] "RespondentID"
## [2] "Do.you.celebrate.Thanksgiving."
## [3] "What.is.typically.the.main.dish.at.your.Thanksgiving.dinner."
## [4] "What.is.typically.the.main.dish.at.your.Thanksgiving.dinner....Other..please.specify."
## [5] "How.is.the.main.dish.typically.cooked."
## [6] "How.is.the.main.dish.typically.cooked....Other..please.specify."
## [7] "What.kind.of.stuffing.dressing.do.you.typically.have."
## [8] "What.kind.of.stuffing.dressing.do.you.typically.have....Other..please.specify."
## [9] "What.type.of.cranberry.saucedo.you.typically.have."
## [10] "What.type.of.cranberry.saucedo.you.typically.have....Other..please.specify."
## [11] "Do.you.typically.have.gravy."
## [12] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Brussel.sprouts"
## [13] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Carrots"
## [14] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Cauliflower"
## [15] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Corn"
## [16] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Cornbread"
## [17] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Fruit.salad"
## [18] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Green.beans.green.bean.casserole"
## [19] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Macaroni.and.cheese"
## [20] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Mashed.potatoes"
## [21] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Rolls.biscuits"
## [22] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Squash"
## [23] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Vegetable.salad"
## [24] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Yams.sweet.potato.casserole"
## [25] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Other..please.specify."
## [26] "Which.of.these.side.dishes.aretypically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Other..please.specify..1"
## [27] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Apple"
## [28] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Buttermilk"
## [29] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Cherry"
## [30] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Chocolate"
## [31] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Coconut.cream"
## [32] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Key.lime"
## [33] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Peach"
## [34] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Pecan"
## [35] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Pumpkin"
## [36] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Sweet.Potato"
## [37] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....None"
## [38] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Other..please.specify."
## [39] "Which.type.of.pie.is.typically.served.at.your.Thanksgiving.dinner..Please.select.all.that.apply....Other..please.specify..1"
## [40] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Apple.cobbler"
## [41] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Blondies"
## [42] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Brownies"
## [43] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Carrot.cake"
## [44] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Cheesecake"
## [45] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Cookies"
## [46] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Fudge"
## [47] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Ice.cream"
## [48] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Peach.cobbler"
## [49] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......None"
## [50] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Other..please.specify."
## [51] "Which.of.these.desserts.do.you.typically.have.at.Thanksgiving.dinner..Please.select.all.that.apply......Other..please.specify..1"
## [52] "Do.you.typically.pray.before.or.after.the.Thanksgiving.meal."
## [53] "How.far.will.you.travel.for.Thanksgiving."
## [54] "Will.you.watch.any.of.the.following.programs.on.Thanksgiving..Please.select.all.that.apply....Macy.s.Parade"
## [55] "What.s.the.age.cutoff.at.your..kids..table..at.Thanksgiving."
## [56] "Have.you.ever.tried.to.meet.up.with.hometown.friends.on.Thanksgiving.night."
## [57] "Have.you.ever.attended.a..Friendsgiving.."
## [58] "Will.you.shop.any.Black.Friday.sales.on.Thanksgiving.Day."
## [59] "Do.you.work.in.retail."
## [60] "Will.you.employer.make.you.work.on.Black.Friday."
## [61] "How.would.you.describe.where.you.live."
## [62] "Age"
## [63] "What.is.your.gender."
## [64] "How.much.total.combined.money.did.all.members.of.your.HOUSEHOLD.earn.last.year."
## [65] "US.Region"
poll_dataframe_subset <- select(poll_dataframe, 'RespondentID', 'Do.you.celebrate.Thanksgiving.', 'What.is.your.gender.', 'Age', 'How.would.you.describe.where.you.live.', 'US.Region')
head(poll_dataframe_subset,10)
## RespondentID Do.you.celebrate.Thanksgiving. What.is.your.gender. Age
## 1 4337954960 Yes Male 18 - 29
## 2 4337951949 Yes Female 18 - 29
## 3 4337935621 Yes Male 18 - 29
## 4 4337933040 Yes Male 30 - 44
## 5 4337931983 Yes Male 30 - 44
## 6 4337929779 Yes Male 18 - 29
## 7 4337924420 Yes Male 18 - 29
## 8 4337916002 Yes Male 18 - 29
## 9 4337914977 Yes Male 30 - 44
## 10 4337899817 Yes Male 30 - 44
## How.would.you.describe.where.you.live. US.Region
## 1 Suburban Middle Atlantic
## 2 Rural East South Central
## 3 Suburban Mountain
## 4 Urban Pacific
## 5 Urban Pacific
## 6 Urban Pacific
## 7 Rural East North Central
## 8 Rural Mountain
## 9 Urban Middle Atlantic
## 10 Suburban East South Central
poll_dataframe_data <- rename(poll_dataframe_subset,ID='RespondentID',celebrate='Do.you.celebrate.Thanksgiving.', gender = 'What.is.your.gender.', age_range='Age', living_region = 'How.would.you.describe.where.you.live.', us_region='US.Region')
head(poll_dataframe_data,5)
## ID celebrate gender age_range living_region us_region
## 1 4337954960 Yes Male 18 - 29 Suburban Middle Atlantic
## 2 4337951949 Yes Female 18 - 29 Rural East South Central
## 3 4337935621 Yes Male 18 - 29 Suburban Mountain
## 4 4337933040 Yes Male 30 - 44 Urban Pacific
## 5 4337931983 Yes Male 30 - 44 Urban Pacific
unique(poll_dataframe_data$living_region)
## [1] "Suburban" "Rural" "Urban" ""
poll_dataframe_data %>%
group_by(living_region) %>%
summarize(count=n())
## # A tibble: 4 x 2
## living_region count
## * <chr> <int>
## 1 "" 110
## 2 "Rural" 216
## 3 "Suburban" 496
## 4 "Urban" 236
poll_dataframe_data %>%
group_by(us_region) %>%
summarize(count=n())
## # A tibble: 10 x 2
## us_region count
## * <chr> <int>
## 1 "" 59
## 2 "East North Central" 150
## 3 "East South Central" 60
## 4 "Middle Atlantic" 159
## 5 "Mountain" 47
## 6 "New England" 58
## 7 "Pacific" 146
## 8 "South Atlantic" 214
## 9 "West North Central" 74
## 10 "West South Central" 91
poll_data_cleaned <- poll_dataframe_data %>%
filter((poll_dataframe_data$us_region!=""))
poll_data_cleaned %>%
group_by(us_region) %>%
summarize(count=n())
## # A tibble: 9 x 2
## us_region count
## * <chr> <int>
## 1 East North Central 150
## 2 East South Central 60
## 3 Middle Atlantic 159
## 4 Mountain 47
## 5 New England 58
## 6 Pacific 146
## 7 South Atlantic 214
## 8 West North Central 74
## 9 West South Central 91
poll_data_cleaned <- poll_dataframe_data %>%
filter((poll_dataframe_data$us_region!=""))
poll_data_cleaned %>%
group_by(us_region, celebrate) %>%
summarize(count=n())
## `summarise()` has grouped output by 'us_region'. You can override using the `.groups` argument.
## # A tibble: 18 x 3
## # Groups: us_region [9]
## us_region celebrate count
## <chr> <chr> <int>
## 1 East North Central No 5
## 2 East North Central Yes 145
## 3 East South Central No 4
## 4 East South Central Yes 56
## 5 Middle Atlantic No 14
## 6 Middle Atlantic Yes 145
## 7 Mountain No 6
## 8 Mountain Yes 41
## 9 New England No 3
## 10 New England Yes 55
## 11 Pacific No 16
## 12 Pacific Yes 130
## 13 South Atlantic No 11
## 14 South Atlantic Yes 203
## 15 West North Central No 3
## 16 West North Central Yes 71
## 17 West South Central No 6
## 18 West South Central Yes 85
ggplot(poll_dataframe_data, mapping = aes(poll_dataframe_data$living_region))+
geom_bar(aes(fill=poll_dataframe_data$age_range))+
xlab('Living Region')+labs(fill='gender')+geom_text(stat='count',aes(label = ..count..,y=..count..),vjust=-0.2)+
ggtitle('Poll data')
ggplot(poll_data_cleaned, mapping = aes(poll_data_cleaned$us_region))+
geom_bar(aes(fill=poll_data_cleaned$celebrate))+
xlab('US Region')+labs(fill='age_range')+geom_text(stat='count',aes(label = ..count..,y=..count..),vjust=-0.2)+
theme(axis.text.x = element_text(angle = 90))+
ggtitle('Poll data')
The main data set has 1058 data points and 65 features. I have created a subset and cleaned it for the US Region. From the data visualization it 68 said yes and 931 said no. From the plot, thanksgiving celebration is popular in suburban living region. In addition, thanksgiving celebration is least popular in “mountain” region and most popular in “south atlantic” region