In this lesson students will learn to:
We are using the data from fivethirtyeight the Here’s What Your Part of America Eats on Thanksgiving article https://fivethirtyeight.com/features/heres-what-your-part-of-america-eats-on-thanksgiving/
The original data set can be found here: https://raw.githubusercontent.com/fivethirtyeight/data/master/thanksgiving-2015/thanksgiving-2015-poll-data.csv
The variables for the survey are described here:https://github.com/fivethirtyeight/data/tree/master/thanksgiving-2015
I cleaned up the data, selected a subset of variables, and created binary variables. My dataset can be found here: https://raw.githubusercontent.com/kitadasmalley/FA2020_DataViz/main/data/useThanks.csv
### fivethirtyeight
### THANKSGIVING
useThanks<-read.csv("https://raw.githubusercontent.com/kitadasmalley/FA2020_DataViz/main/data/useThanks.csv",
header=TRUE)
names(useThanks)
## [1] "id" "celebrate" "main"
## [4] "cooked" "stuffing" "cranberry"
## [7] "gravy" "brussel.sprouts" "carrots"
## [10] "cauliflower" "corn" "cornbread"
## [13] "fruit.salad" "green.beans" "mac.n.cheese"
## [16] "mashed.potatoes" "rolls" "squash"
## [19] "salad" "yams.sweet.potato" "apple.pie"
## [22] "buttermilk.pie" "cherry.pie" "chocolate.pie"
## [25] "coconut.pie" "keylime.pie" "peach.pie"
## [28] "pecan.pie" "pumpkin.pie" "sweet.potato.pie"
## [31] "apple.cobbler" "blondies" "brownies"
## [34] "carrot.cake" "cheesecake" "cookies"
## [37] "fudge" "ice.cream" "peach.cobbler"
## [40] "pray" "friendsgiving" "black.friday"
## [43] "area.live" "age" "gender"
## [46] "income" "DivName" "celebrate01"
## [49] "gravy01" "friendsgiving01" "black.friday01"
## [52] "brussel.sprouts01" "carrots01" "cauliflower01"
## [55] "corn01" "cornbread01" "fruit.salad01"
## [58] "green.beans01" "mac.n.cheese01" "mashed.potatoes01"
## [61] "rolls01" "squash01" "salad01"
## [64] "yams.sweet.potato01" "apple.pie01" "buttermilk.pie01"
## [67] "cherry.pie01" "chocolate.pie01" "coconut.pie01"
## [70] "keylime.pie01" "peach.pie01" "pecan.pie01"
## [73] "pumpkin.pie01" "sweet.potato.pie01" "apple.cobbler01"
## [76] "blondies01" "brownies01" "carrot.cake01"
## [79] "cheesecake01" "cookies01" "fudge01"
## [82] "ice.cream01" "peach.cobbler01"
How many people in the sample are from each division?
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
### SAMPLE DIVISION
thisSamp<-useThanks%>%
group_by(DivName)%>%
summarise(samp=n())
thisSamp
## # A tibble: 10 × 2
## DivName samp
## <chr> <int>
## 1 "" 59
## 2 "East North Central" 150
## 3 "East South Central" 60
## 4 "Middle Atlantic" 159
## 5 "Mountain" 47
## 6 "New England" 58
## 7 "Pacific" 146
## 8 "South Atlantic" 214
## 9 "West North Central" 74
## 10 "West South Central" 91
We can use Census data to help re-balance the data.
### CENSUS DATA
popDiv<-data.frame(DivName=c("East North Central",
"East South Central",
"Middle Atlantic",
"Mountain",
"New England",
"Pacific",
"South Atlantic",
"West North Central",
"West South Central"),
pop=c(46798649,
18931477,
41601787,
23811346,
14757573,
52833604,
63991523,
21179519,
39500457))
Create weights:
### WEIGHTS
divJoin<-thisSamp%>%
left_join(popDiv)%>%
mutate(weight=pop/samp)%>%
mutate(FPC=sum(popDiv$pop))
## Joining with `by = join_by(DivName)`
head(divJoin)
## # A tibble: 6 × 5
## DivName samp pop weight FPC
## <chr> <int> <dbl> <dbl> <dbl>
## 1 "" 59 NA NA 323405935
## 2 "East North Central" 150 46798649 311991. 323405935
## 3 "East South Central" 60 18931477 315525. 323405935
## 4 "Middle Atlantic" 159 41601787 261646. 323405935
## 5 "Mountain" 47 23811346 506624. 323405935
## 6 "New England" 58 14757573 254441. 323405935
Join to the data
#### JOIN ON TO THE DATA
joinThanks<-useThanks%>%
left_join(divJoin)%>%
filter(!is.na(pop))
## Joining with `by = join_by(DivName)`
We will pretend that these data come from a simple random sample:
library(survey)
library(srvyr)
### MAKE SURVEY OBJECT
### SURVEY DESIGN OBJECT
thanks_des<- joinThanks %>%
as_survey_design(
weights = weight, #WEIGHTS
fpc = FPC # WITHOUT REPLACEMENT
)
thanks_des
## Independent Sampling design
## Called via srvyr
## Sampling variables:
## - ids: `1`
## - fpc: FPC
## - weights: weight
## Data variables:
## - id (dbl), celebrate (chr), main (chr), cooked (chr), stuffing (chr),
## cranberry (chr), gravy (chr), brussel.sprouts (chr), carrots (chr),
## cauliflower (chr), corn (chr), cornbread (chr), fruit.salad (chr),
## green.beans (chr), mac.n.cheese (chr), mashed.potatoes (chr), rolls (chr),
## squash (chr), salad (chr), yams.sweet.potato (chr), apple.pie (chr),
## buttermilk.pie (chr), cherry.pie (chr), chocolate.pie (chr), coconut.pie
## (chr), keylime.pie (chr), peach.pie (chr), pecan.pie (chr), pumpkin.pie
## (chr), sweet.potato.pie (chr), apple.cobbler (chr), blondies (chr),
## brownies (chr), carrot.cake (chr), cheesecake (chr), cookies (chr), fudge
## (chr), ice.cream (chr), peach.cobbler (chr), pray (chr), friendsgiving
## (chr), black.friday (chr), area.live (chr), age (chr), gender (chr), income
## (chr), DivName (chr), celebrate01 (int), gravy01 (int), friendsgiving01
## (int), black.friday01 (int), brussel.sprouts01 (int), carrots01 (int),
## cauliflower01 (int), corn01 (int), cornbread01 (int), fruit.salad01 (int),
## green.beans01 (int), mac.n.cheese01 (int), mashed.potatoes01 (int), rolls01
## (int), squash01 (int), salad01 (int), yams.sweet.potato01 (int),
## apple.pie01 (int), buttermilk.pie01 (int), cherry.pie01 (int),
## chocolate.pie01 (int), coconut.pie01 (int), keylime.pie01 (int),
## peach.pie01 (int), pecan.pie01 (int), pumpkin.pie01 (int),
## sweet.potato.pie01 (int), apple.cobbler01 (int), blondies01 (int),
## brownies01 (int), carrot.cake01 (int), cheesecake01 (int), cookies01 (int),
## fudge01 (int), ice.cream01 (int), peach.cobbler01 (int), samp (int), pop
## (dbl), weight (dbl), FPC (dbl)
Let’s check to make sure the weights sum to yield the size of each division
### ESTIMATE SIZE
thanks_des %>%
survey_count(DivName, name = "N")
## # A tibble: 9 × 3
## DivName N N_se
## <chr> <dbl> <dbl>
## 1 East North Central 46798649 3524322.
## 2 East South Central 18931477 2370695.
## 3 Middle Atlantic 41601787 3026824.
## 4 Mountain 23811346 3392248.
## 5 New England 14757573 1881611.
## 6 Pacific 52833604 4042429.
## 7 South Atlantic 63991523 3879578.
## 8 West North Central 21179519 2370310.
## 9 West South Central 39500457 3949648.
Look at the national proportions
### PROPORTIONS BY DIVISION
thanks_des %>%
group_by(DivName) %>%
summarize(p = survey_prop())
## When `proportion` is unspecified, `survey_prop()` now defaults to `proportion = TRUE`.
## ℹ This should improve confidence interval coverage.
## This message is displayed once per session.
## # A tibble: 9 × 3
## DivName p p_se
## <chr> <dbl> <dbl>
## 1 East North Central 0.145 0.0110
## 2 East South Central 0.0585 0.00735
## 3 Middle Atlantic 0.129 0.00973
## 4 Mountain 0.0736 0.0102
## 5 New England 0.0456 0.00590
## 6 Pacific 0.163 0.0123
## 7 South Atlantic 0.198 0.0123
## 8 West North Central 0.0655 0.00741
## 9 West South Central 0.122 0.0118
### SAMPLE PROPORTIONS
thanks_des %>%
group_by(cranberry) %>%
summarize(p = survey_prop())
## # A tibble: 5 × 3
## cranberry p p_se
## <chr> <dbl> <dbl>
## 1 "" 0.0706 0.00844
## 2 "Canned" 0.481 0.0161
## 3 "Homemade" 0.284 0.0146
## 4 "None" 0.140 0.0111
## 5 "Other (please specify)" 0.0244 0.00488
Are there differences in this distribution across divisions?
### COMPARING CANNED VS HOMEMADE
divCran<-thanks_des %>%
filter(cranberry%in%c("Canned", "Homemade"))%>%
group_by(DivName, cranberry) %>%
summarize(p = survey_prop())
divCran
## # A tibble: 18 × 4
## # Groups: DivName [9]
## DivName cranberry p p_se
## <chr> <chr> <dbl> <dbl>
## 1 East North Central Canned 0.667 0.0442
## 2 East North Central Homemade 0.333 0.0442
## 3 East South Central Canned 0.761 0.0629
## 4 East South Central Homemade 0.239 0.0629
## 5 Middle Atlantic Canned 0.571 0.0441
## 6 Middle Atlantic Homemade 0.429 0.0441
## 7 Mountain Canned 0.514 0.0822
## 8 Mountain Homemade 0.486 0.0822
## 9 New England Canned 0.580 0.0698
## 10 New England Homemade 0.420 0.0698
## 11 Pacific Canned 0.505 0.0488
## 12 Pacific Homemade 0.495 0.0488
## 13 South Atlantic Canned 0.754 0.0333
## 14 South Atlantic Homemade 0.246 0.0333
## 15 West North Central Canned 0.627 0.0677
## 16 West North Central Homemade 0.373 0.0677
## 17 West South Central Canned 0.629 0.0578
## 18 West South Central Homemade 0.371 0.0578
We can visualize these propotions with a filled bar graph
## FILLED BAR GRAPH
thanks_des %>%
filter(cranberry%in%c("Canned", "Homemade"))%>%
ggplot(aes(x=DivName, y=weight, fill=cranberry))+
geom_bar(stat = "identity", position="fill")+
coord_flip()
We can improve this by ordering the division names
### REORDER
divCranOrd<-divCran%>%
filter(cranberry=="Canned")%>%
arrange(desc(p))
joinThanks$DivName<-factor(joinThanks$DivName, levels=divCranOrd$DivName)
thanks_des<- joinThanks %>%
as_survey_design(
weights = weight, #WEIGHTS
fpc = FPC # WITHOUT REPLACEMENT
)
thanks_des %>%
filter(cranberry%in%c("Canned", "Homemade"))%>%
ggplot(aes(x=DivName, y=weight, fill=cranberry))+
geom_bar(stat = "identity", position="fill")+
coord_flip()
Test:
### CHISQR TEST
chiCran <- thanks_des %>%
filter(cranberry%in%c("Canned", "Homemade"))%>%
svychisq(
formula = ~ DivName + cranberry,
design = .,
statistic = "Chisq",
na.rm = TRUE
)
chiCran
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: NextMethod()
## X-squared = 27.412, df = 8, p-value = 0.0009414
Raw Table:
## RAW TABLE
chiCranObs<- thanks_des %>%
filter(cranberry%in%c("Canned", "Homemade"))%>%
group_by(DivName, cranberry) %>%
summarize(Observed = round(survey_mean(vartype = "ci"), 3))
chiCranObs
## # A tibble: 18 × 5
## # Groups: DivName [9]
## DivName cranberry Observed Observed_low Observed_upp
## <fct> <chr> <dbl> <dbl> <dbl>
## 1 East South Central Canned 0.761 0.637 0.884
## 2 East South Central Homemade 0.239 0.116 0.363
## 3 South Atlantic Canned 0.754 0.689 0.82
## 4 South Atlantic Homemade 0.246 0.18 0.311
## 5 East North Central Canned 0.667 0.58 0.753
## 6 East North Central Homemade 0.333 0.247 0.42
## 7 West South Central Canned 0.629 0.515 0.742
## 8 West South Central Homemade 0.371 0.258 0.485
## 9 West North Central Canned 0.627 0.494 0.76
## 10 West North Central Homemade 0.373 0.24 0.506
## 11 New England Canned 0.58 0.443 0.717
## 12 New England Homemade 0.42 0.283 0.557
## 13 Middle Atlantic Canned 0.571 0.485 0.658
## 14 Middle Atlantic Homemade 0.429 0.342 0.515
## 15 Mountain Canned 0.514 0.352 0.675
## 16 Mountain Homemade 0.486 0.325 0.648
## 17 Pacific Canned 0.505 0.409 0.601
## 18 Pacific Homemade 0.495 0.399 0.591
Formatted Table:
### FORMATTED TABLE
#install.packages("gt")
library(gt)
chiCranTab<- chiCranObs %>%
mutate(prop = paste0(
Observed, " (", Observed_low, ", ",
Observed_upp, ")"
)) %>%
select(DivName, cranberry, prop) %>%
pivot_wider(
names_from = DivName,
values_from = prop
) %>%
gt(rowname_col = "cranberry") %>%
tab_stubhead(label = "Cranberry")
chiCranTab
Cranberry | East South Central | South Atlantic | East North Central | West South Central | West North Central | New England | Middle Atlantic | Mountain | Pacific |
---|---|---|---|---|---|---|---|---|---|
Canned | 0.761 (0.637, 0.884) | 0.754 (0.689, 0.82) | 0.667 (0.58, 0.753) | 0.629 (0.515, 0.742) | 0.627 (0.494, 0.76) | 0.58 (0.443, 0.717) | 0.571 (0.485, 0.658) | 0.514 (0.352, 0.675) | 0.505 (0.409, 0.601) |
Homemade | 0.239 (0.116, 0.363) | 0.246 (0.18, 0.311) | 0.333 (0.247, 0.42) | 0.371 (0.258, 0.485) | 0.373 (0.24, 0.506) | 0.42 (0.283, 0.557) | 0.429 (0.342, 0.515) | 0.486 (0.325, 0.648) | 0.495 (0.399, 0.591) |
These data come from the fivethirtyeight article “When Does Praying In Public Make Others Uncomfortable?” https://fivethirtyeight.com/features/when-does-praying-in-public-make-others-uncomfortable/
This survey was fielded from July 29 and August 1, 2016.
The original data can be accessed here: https://www.kaggle.com/datasets/tunguz/religion-survey/data.
### DATA ON GITHUB
rel<-read.csv("https://raw.githubusercontent.com/kitadasmalley/Teaching/refs/heads/main/DATA429_599/CODE/religion-survey-results.csv",
header=TRUE)
head(names(rel))
## [1] "What.is.your.present.religion..if.any."
## [2] "X"
## [3] "Do.you.consider.yourself.to.be.an.evangelical."
## [4] "Do.you.attend.religious.services"
## [5] "How.often.do.you..Pray.in.public.with.visible.motions..sign.of.the.cross..bowing..prostration..shokeling..etc."
## [6] "How.often.do.you..Pray.in.public.using.some.kind.of.physical.object..rosary..tefillin..etc."
tail(names(rel))
## [1] "How.comfortable.would.you.be.seeing.someone.who.practices.a.different.religion.from.you..Wear.religious.clothing.jewelry..hijab..kippah..wig..kara..turban..cross..etc."
## [2] "How.comfortable.would.you.be.seeing.someone.who.practices.a.different.religion.from.you..Participate.in.a.public.religious.event.on.the.streets..Corpus.Christi.procession..inauguration.of.Torah.scrolls..etc."
## [3] "What.is.your.age."
## [4] "What.is.your.gender."
## [5] "How.much.total.combined.money.did.all.members.of.your.HOUSEHOLD.earn.last.year."
## [6] "US.Region"
These are very long variable names. Let’s make these shorter and easier to work with.
shortColNames<-c("Religion", "RelOther", "Evangelical", "AttendServices",
### SET 1: HOW OFTEN (cols 5 thru 14)
"PrayMotions_Often", "PrayObject_Often", "PrayMeals_Often", "PrayFor_Often",
"PrayWith_Often", "RelConvo_Often", "RelAsk_Often", "Dietary_Often",
"WearRel_Often","PublicRel_Often",
### SET 2: HOW COMFORTABLE DO YOU FEEL (cols 15 thru 24)
"PrayMotions_Comfort", "PrayObject_Comfort", "PrayMeals_Comfort", "PrayFor_Comfort",
"PrayWith_Comfort", "RelConvo_Comfort", "RelAsk_Comfort", "Dietary_Comfort",
"WearRel_Comfort","PublicRel_Comfort",
### SET 3: HOW COMFORTABLE DO YOU THINK SOMEONE OUTSIDE (cols 25 thru 34)
"PrayMotions_Outside", "PrayObject_Outside", "PrayMeals_Outside", "PrayFor_Outside",
"PrayWith_Outside", "RelConvo_Outside", "RelAsk_Outside", "Dietary_Outside",
"WearRel_Outside","PublicRel_Outside",
### SET 4: HOW COMFORTABLE WOULD YOU BE SEEING SOMEONE WHO PRACTICES A DIFFERENT REL (cols 35 thru 44)
"PrayMotions_Different", "PrayObject_Different", "PrayMeals_Different", "PrayFor_Different",
"PrayWith_Different", "RelConvo_Different", "RelAsk_Different", "Dietary_Different",
"WearRel_Different","PublicRel_Different",
### DEMOS
"Age", "Gender", "Income", "US.Region"
)
#### NEW NAMES
colnames(rel)<-shortColNames
str(rel)
## 'data.frame': 1040 obs. of 48 variables:
## $ Religion : chr "Response" "None of these" "Atheist" "Protestant" ...
## $ RelOther : chr "None of these" "Wesleyan Christian" "" "" ...
## $ Evangelical : chr "Response" "No" "" "No" ...
## $ AttendServices : chr "Response" "Weekly or more" "" "Weekly or more" ...
## $ PrayMotions_Often : chr "Response" "A few times per week" "" "A few times per month" ...
## $ PrayObject_Often : chr "Response" "Not applicable to my religious beliefs" "" "Never" ...
## $ PrayMeals_Often : chr "Response" "A few times per month" "" "Once a year or less" ...
## $ PrayFor_Often : chr "Response" "A few times per week" "" "A few times per month" ...
## $ PrayWith_Often : chr "Response" "Never" "" "A few times per month" ...
## $ RelConvo_Often : chr "Response" "A few times per month" "" "A few times per month" ...
## $ RelAsk_Often : chr "Response" "A few times per month" "" "Once a month or less" ...
## $ Dietary_Often : chr "Response" "Once a year or less" "" "Never" ...
## $ WearRel_Often : chr "Response" "A few times per week" "" "Once a month or less" ...
## $ PublicRel_Often : chr "Response" "Once a year or less" "" "Never" ...
## $ PrayMotions_Comfort : chr "Response" "Not so comfortable" "" "Extremely comfortable" ...
## $ PrayObject_Comfort : chr "Response" "I don't do this" "" "I don't do this" ...
## $ PrayMeals_Comfort : chr "Response" "Not at all comfortable" "" "Extremely comfortable" ...
## $ PrayFor_Comfort : chr "Response" "Extremely comfortable" "" "Extremely comfortable" ...
## $ PrayWith_Comfort : chr "Response" "I don't do this" "" "Extremely comfortable" ...
## $ RelConvo_Comfort : chr "Response" "Very comfortable" "" "Extremely comfortable" ...
## $ RelAsk_Comfort : chr "Response" "Very comfortable" "" "Somewhat comfortably" ...
## $ Dietary_Comfort : chr "Response" "Very comfortable" "" "I don't do this" ...
## $ WearRel_Comfort : chr "Response" "Extremely comfortable" "" "Extremely comfortable" ...
## $ PublicRel_Comfort : chr "Response" "Very comfortable" "" "Extremely comfortable" ...
## $ PrayMotions_Outside : chr "Response" "I don't do this" "" "Somewhat comfortably" ...
## $ PrayObject_Outside : chr "Response" "I don't do this" "" "I don't do this" ...
## $ PrayMeals_Outside : chr "Response" "Somewhat comfortably" "" "Somewhat comfortably" ...
## $ PrayFor_Outside : chr "Response" "Somewhat comfortably" "" "Somewhat comfortably" ...
## $ PrayWith_Outside : chr "Response" "Somewhat comfortably" "" "Somewhat comfortably" ...
## $ RelConvo_Outside : chr "Response" "Somewhat comfortably" "" "Somewhat comfortably" ...
## $ RelAsk_Outside : chr "Response" "Somewhat comfortably" "" "Not so comfortable" ...
## $ Dietary_Outside : chr "Response" "Somewhat comfortably" "" "Very comfortable" ...
## $ WearRel_Outside : chr "Response" "Somewhat comfortably" "" "Very comfortable" ...
## $ PublicRel_Outside : chr "Response" "I don't do this" "" "Somewhat comfortably" ...
## $ PrayMotions_Different: chr "Response" "Extremely comfortable" "Extremely comfortable" "Extremely comfortable" ...
## $ PrayObject_Different : chr "Response" "Extremely comfortable" "Extremely comfortable" "Extremely comfortable" ...
## $ PrayMeals_Different : chr "Response" "Extremely comfortable" "Very comfortable" "Extremely comfortable" ...
## $ PrayFor_Different : chr "Response" "Very comfortable" "Extremely comfortable" "Extremely comfortable" ...
## $ PrayWith_Different : chr "Response" "Extremely comfortable" "Very comfortable" "Extremely comfortable" ...
## $ RelConvo_Different : chr "Response" "Extremely comfortable" "Somewhat comfortably" "Extremely comfortable" ...
## $ RelAsk_Different : chr "Response" "Extremely comfortable" "Very comfortable" "Extremely comfortable" ...
## $ Dietary_Different : chr "Response" "Extremely comfortable" "Extremely comfortable" "Extremely comfortable" ...
## $ WearRel_Different : chr "Response" "Extremely comfortable" "Extremely comfortable" "Extremely comfortable" ...
## $ PublicRel_Different : chr "Response" "Extremely comfortable" "Extremely comfortable" "Extremely comfortable" ...
## $ Age : chr "Response" "18 - 29" "18 - 29" "18 - 29" ...
## $ Gender : chr "Response" "Male" "Male" "Male" ...
## $ Income : chr "Response" "$0 to $9,999" "$10,000 to $24,999" "$25,000 to $49,999" ...
## $ US.Region : chr "Response" "East North Central" "Middle Atlantic" "East North Central" ...
How many people in the sample are from each division (which in this dataset is called US.Region)?
library(tidyverse)
### SAMPLE DIVISION (IN THIS DATASET USE US.Region)
thisSamp2<-rel%>%
group_by(US.Region)%>%
summarise(samp=n())
thisSamp2
## # A tibble: 11 × 2
## US.Region samp
## <chr> <int>
## 1 "" 13
## 2 "East North Central" 180
## 3 "East South Central" 54
## 4 "Middle Atlantic" 135
## 5 "Mountain" 74
## 6 "New England" 67
## 7 "Pacific" 150
## 8 "Response" 1
## 9 "South Atlantic" 196
## 10 "West North Central" 73
## 11 "West South Central" 97
Now we can join the sample data and calculate weights again
### CALCULATE WEIGHTS
divJoin2<-popDiv%>%
rename(US.Region=DivName)%>%
left_join(thisSamp2)%>%
mutate(weight=pop/samp)%>%
mutate(FPC=sum(popDiv$pop))
## Joining with `by = join_by(US.Region)`
divJoin2
## US.Region pop samp weight FPC
## 1 East North Central 46798649 180 259992.5 323405935
## 2 East South Central 18931477 54 350582.9 323405935
## 3 Middle Atlantic 41601787 135 308161.4 323405935
## 4 Mountain 23811346 74 321774.9 323405935
## 5 New England 14757573 67 220262.3 323405935
## 6 Pacific 52833604 150 352224.0 323405935
## 7 South Atlantic 63991523 196 326487.4 323405935
## 8 West North Central 21179519 73 290130.4 323405935
## 9 West South Central 39500457 97 407221.2 323405935
Finally, we join the weights back on to the data
### JOIN RELIGION DATA
joinRel<-rel%>%
left_join(divJoin2)%>%
filter(!is.na(pop))
## Joining with `by = join_by(US.Region)`
We will pretend that these data come from a simple random sample:
library(survey)
library(srvyr)
### MAKE SURVEY OBJECT
### SURVEY DESIGN OBJECT
rel_des<- joinRel %>%
as_survey_design(
weights = weight, #WEIGHTS
fpc = FPC # WITHOUT REPLACEMENT
)
rel_des
## Independent Sampling design
## Called via srvyr
## Sampling variables:
## - ids: `1`
## - fpc: FPC
## - weights: weight
## Data variables:
## - Religion (chr), RelOther (chr), Evangelical (chr), AttendServices (chr),
## PrayMotions_Often (chr), PrayObject_Often (chr), PrayMeals_Often (chr),
## PrayFor_Often (chr), PrayWith_Often (chr), RelConvo_Often (chr),
## RelAsk_Often (chr), Dietary_Often (chr), WearRel_Often (chr),
## PublicRel_Often (chr), PrayMotions_Comfort (chr), PrayObject_Comfort (chr),
## PrayMeals_Comfort (chr), PrayFor_Comfort (chr), PrayWith_Comfort (chr),
## RelConvo_Comfort (chr), RelAsk_Comfort (chr), Dietary_Comfort (chr),
## WearRel_Comfort (chr), PublicRel_Comfort (chr), PrayMotions_Outside (chr),
## PrayObject_Outside (chr), PrayMeals_Outside (chr), PrayFor_Outside (chr),
## PrayWith_Outside (chr), RelConvo_Outside (chr), RelAsk_Outside (chr),
## Dietary_Outside (chr), WearRel_Outside (chr), PublicRel_Outside (chr),
## PrayMotions_Different (chr), PrayObject_Different (chr),
## PrayMeals_Different (chr), PrayFor_Different (chr), PrayWith_Different
## (chr), RelConvo_Different (chr), RelAsk_Different (chr), Dietary_Different
## (chr), WearRel_Different (chr), PublicRel_Different (chr), Age (chr),
## Gender (chr), Income (chr), US.Region (chr), pop (dbl), samp (int), weight
## (dbl), FPC (dbl)
Counts:
### COUNTS
### RELIGIOUS IDENTITY
rel_des %>%
survey_count(Religion, name = "N")
## # A tibble: 11 × 3
## Religion N N_se
## <chr> <dbl> <dbl>
## 1 Agnostic 38717639. 3317103.
## 2 Atheist 34627270. 3139165.
## 3 Buddhist 4663575. 1251213.
## 4 Hindu 1877408. 770484.
## 5 Jewish 12604604. 1949303.
## 6 Mormon 3879265. 1116394.
## 7 Muslim 3096895. 981165.
## 8 None of these 76088262. 4388539.
## 9 Orthodox Christian 16684537. 2289042.
## 10 Protestant 73375592. 4301248.
## 11 Roman Catholic 57790889. 3883068.
Proportions:
### PROP BY GROUP
rel_des %>%
group_by(Religion)%>%
summarize(p = survey_prop(vartype = c("se", "ci")))
## # A tibble: 11 × 5
## Religion p p_se p_low p_upp
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 Agnostic 0.120 0.0102 0.101 0.141
## 2 Atheist 0.107 0.00971 0.0895 0.128
## 3 Buddhist 0.0144 0.00387 0.00851 0.0243
## 4 Hindu 0.00581 0.00238 0.00259 0.0130
## 5 Jewish 0.0390 0.00603 0.0287 0.0527
## 6 Mormon 0.0120 0.00345 0.00681 0.0210
## 7 Muslim 0.00958 0.00303 0.00513 0.0178
## 8 None of these 0.235 0.0135 0.210 0.263
## 9 Orthodox Christian 0.0516 0.00706 0.0394 0.0674
## 10 Protestant 0.227 0.0132 0.202 0.254
## 11 Roman Catholic 0.179 0.0120 0.156 0.204
Many religions observe dietary laws (such as kosher or halal) and fasting. Let’s explore the level of comfort that survey respondents reported when asked the following question “How comfortable do you feel when you: Decline some kind of food or beverage for religious reasons (kosher, halal, fasting rules, etc)?”. We will use the variable ‘Dietary_Comfort’.
### HOW MANY PEOPLE REPORTED EACH LEVEL OF COMFORT
### USE Dietary_Comfort
### FASTING
rel_des%>%
survey_count(Dietary_Comfort, name="N")
## # A tibble: 7 × 3
## Dietary_Comfort N N_se
## <chr> <dbl> <dbl>
## 1 "" 38938560. 3300885.
## 2 "Extremely comfortable" 26598908. 2816464.
## 3 "I don't do this" 202611464. 5013345.
## 4 "Not at all comfortable" 4330079. 1205909.
## 5 "Not so comfortable" 6347042. 1418744.
## 6 "Somewhat comfortably" 12831700. 2017265.
## 7 "Very comfortable" 31748182. 3074206.
We might want to look at the proportions for each level:
### PROPORTIONS FOR Dietary_Comfort
rel_des %>%
group_by(Dietary_Comfort)%>%
summarize(p = survey_prop(vartype = c("se", "ci")))
## # A tibble: 7 × 5
## Dietary_Comfort p p_se p_low p_upp
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 "" 0.120 0.0102 0.102 0.142
## 2 "Extremely comfortable" 0.0822 0.00870 0.0667 0.101
## 3 "I don't do this" 0.626 0.0153 0.596 0.656
## 4 "Not at all comfortable" 0.0134 0.00373 0.00774 0.0231
## 5 "Not so comfortable" 0.0196 0.00439 0.0126 0.0304
## 6 "Somewhat comfortably" 0.0397 0.00623 0.0291 0.0539
## 7 "Very comfortable" 0.0982 0.00948 0.0811 0.118
It looks like there is a a fair amount of non-response and people who do not engage in dietary laws or fasting. Given the estimated proportions for these two groups, how many Americans can we estimate engaged in observing dietary laws or fasting?
### YOUR TURN
Let’s look at only people who engage in dietary law or fasting.
We also are going to group the two top categories, which convey the most positive sentiment.
### SHOULD CONDITION ON PEOPLE WHO PRACTICE FASTING
rel_des_fast <- rel_des %>%
mutate(DietaryComfort = case_when(
Dietary_Comfort=="" ~ NA,
Dietary_Comfort=="I don't do this" ~NA,
TRUE ~ Dietary_Comfort %in% c("Extremely comfortable", "Very comfortable") ## PUTTING THE TRUE FIRST ACTS AS AN IF/ELSE
))
rel_des_fast%>%
survey_count(DietaryComfort, name="N")
## # A tibble: 3 × 3
## DietaryComfort N N_se
## <lgl> <dbl> <dbl>
## 1 FALSE 23508821. 2686160.
## 2 TRUE 58347090. 3966796.
## 3 NA 241550024. 4542639.
Suppose that we want to look at the relationship between different religions and comfortably with dietary law and/or fasting.
### Observations
fastObs<- rel_des_fast %>%
filter(is.na(DietaryComfort)==FALSE)%>%
group_by(Religion, DietaryComfort) %>%
summarize(Observed = round(survey_mean(vartype = "ci"), 3))
fastObs
## # A tibble: 19 × 5
## # Groups: Religion [10]
## Religion DietaryComfort Observed Observed_low Observed_upp
## <chr> <lgl> <dbl> <dbl> <dbl>
## 1 Agnostic TRUE 1 1 1
## 2 Buddhist FALSE 0.258 -0.182 0.697
## 3 Buddhist TRUE 0.742 0.303 1.18
## 4 Hindu FALSE 0.436 -0.008 0.879
## 5 Hindu TRUE 0.564 0.121 1.01
## 6 Jewish FALSE 0.079 -0.028 0.186
## 7 Jewish TRUE 0.921 0.814 1.03
## 8 Mormon FALSE 0.302 0.015 0.589
## 9 Mormon TRUE 0.698 0.411 0.985
## 10 Muslim FALSE 0.253 -0.052 0.557
## 11 Muslim TRUE 0.747 0.443 1.05
## 12 None of these FALSE 0.235 0.112 0.358
## 13 None of these TRUE 0.765 0.642 0.888
## 14 Orthodox Christian FALSE 0.497 0.288 0.706
## 15 Orthodox Christian TRUE 0.503 0.294 0.712
## 16 Protestant FALSE 0.326 0.173 0.478
## 17 Protestant TRUE 0.674 0.522 0.827
## 18 Roman Catholic FALSE 0.288 0.198 0.378
## 19 Roman Catholic TRUE 0.712 0.622 0.802
Now we can organize this in a nice table:
#install.packages("gt")
library(gt)
fastTab<- fastObs %>%
mutate(prop = paste0(
Observed, " (", Observed_low, ", ",
Observed_upp, ")"
)) %>%
select(Religion, DietaryComfort, prop) %>%
pivot_wider(
names_from = Religion,
values_from = prop
) %>%
gt(rowname_col = "DietaryComfort") %>%
tab_stubhead(label = "Fasting Comfort")
fastTab
Fasting Comfort | Agnostic | Buddhist | Hindu | Jewish | Mormon | Muslim | None of these | Orthodox Christian | Protestant | Roman Catholic |
---|---|---|---|---|---|---|---|---|---|---|
TRUE | 1 (1, 1) | 0.742 (0.303, 1.182) | 0.564 (0.121, 1.008) | 0.921 (0.814, 1.028) | 0.698 (0.411, 0.985) | 0.747 (0.443, 1.052) | 0.765 (0.642, 0.888) | 0.503 (0.294, 0.712) | 0.674 (0.522, 0.827) | 0.712 (0.622, 0.802) |
FALSE | NA | 0.258 (-0.182, 0.697) | 0.436 (-0.008, 0.879) | 0.079 (-0.028, 0.186) | 0.302 (0.015, 0.589) | 0.253 (-0.052, 0.557) | 0.235 (0.112, 0.358) | 0.497 (0.288, 0.706) | 0.326 (0.173, 0.478) | 0.288 (0.198, 0.378) |
Note the NA for people who Agnostic and do not feel uncomfortable.
Let’s make a graph to go with that
## FILLED BAR GRAPH
rel_des_fast %>%
filter(is.na(DietaryComfort)==FALSE)%>% ## REMOVE NAs
ggplot(aes(x=Religion, y=weight, fill=DietaryComfort))+
geom_bar(stat = "identity", position="fill")+
coord_flip()
Suppose we want to explore if there are differences in comfort across the different religious groups.
### CHI SQUARED
### CHISQ
chiRel <- rel_des_fast %>%
filter(is.na(DietaryComfort)==FALSE)%>% ## REMOVE NAs
filter(Religion!="Agnostic")%>% ## REMOVE AGNOSTIC
svychisq(
formula = ~ Religion + DietaryComfort,
design = .,
statistic = "Chisq",
na.rm = TRUE
)
chiRel
##
## Pearson's X^2: Rao & Scott adjustment
##
## data: NextMethod()
## X-squared = 10.391, df = 8, p-value = 0.2341
What if we want to explore the odds with more than one variable? We can model that!
### LOGISTIC
### LOOK AT RELIGION, AGE, AND GENDER
logistic_fastingComf <- rel_des_fast %>%
filter(Religion!="Agnostic")%>%
svyglm(
design = .,
formula = DietaryComfort ~ Religion+Gender+Age,
family = quasibinomial
)
summary(logistic_fastingComf)
##
## Call:
## svyglm(formula = DietaryComfort ~ Religion + Gender + Age, design = .,
## family = quasibinomial)
##
## Survey design:
## Called via srvyr
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.80989 1.52791 0.530 0.59655
## ReligionHindu -1.29063 1.82402 -0.708 0.47989
## ReligionJewish 0.84686 1.68856 0.502 0.61645
## ReligionMormon -0.31274 1.64275 -0.190 0.84917
## ReligionMuslim -0.02705 1.73504 -0.016 0.98757
## ReligionNone of these -0.07177 1.55258 -0.046 0.96317
## ReligionOrthodox Christian -1.31309 1.59459 -0.823 0.41105
## ReligionProtestant -0.76564 1.58021 -0.485 0.62846
## ReligionRoman Catholic -0.48700 1.53613 -0.317 0.75150
## GenderMale 0.21587 0.30009 0.719 0.47262
## Age30 - 44 -0.17813 0.38645 -0.461 0.64526
## Age45 - 59 1.17562 0.42427 2.771 0.00602 **
## Age60+ 1.06466 0.46953 2.268 0.02424 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for quasibinomial family taken to be 0.9810634)
##
## Number of Fisher Scoring iterations: 5