library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(here)
## here() starts at /Users/caoanjie/Desktop/projects/thrive_coho
library(patchwork)
td <- read_csv(here("data/tidy_data.csv"))
## Rows: 684 Columns: 90
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (79): community_name, community_zipcode, community_stage, community_form...
## dbl (6): Progress, community_households, individual_household_size, individ...
## lgl (5): community_total_units_extra_1_5, community_total_units_extra_6_10,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
total_n = count(td)$n
total_ca_n = count(td %>% filter(community_is_california))$n
total_community_n = (td %>% distinct(community_name) %>% count())$n
na_age_m = round(mean(as.numeric(td$individual_age), na.rm = TRUE), 2)
na_age_sd = round(sd(as.numeric(td$individual_age), na.rm = TRUE), 2)
ca_age_m = round(mean(as.numeric(filter(td, community_is_california)$individual_age), na.rm = TRUE), 2)
ca_age_sd = round(sd(as.numeric(filter(td, community_is_california)$individual_age), na.rm = TRUE), 2)
684 participants from 173 unique co-housing communities responded to this national survey in XXXX.
Respondents in this survey tended to be older (National average: 66.29 YO; SD = 12.22; California average: 67.41 YO, SD: 11.85).
na_white = round((td %>%
mutate(
individual_race_ethnic = case_when(
grepl(",", individual_race_ethnic) ~ "Multiracial",
TRUE ~ individual_race_ethnic
)
) %>%
group_by(individual_race_ethnic) %>% count() %>%
filter(individual_race_ethnic == "White"))$n / total_n, 4)
ca_white = round((td %>%
filter(community_is_california) %>%
mutate(
individual_race_ethnic = case_when(
grepl(",", individual_race_ethnic) ~ "Multiracial",
TRUE ~ individual_race_ethnic
)
) %>%
group_by(individual_race_ethnic) %>% count() %>%
filter(individual_race_ethnic == "White"))$n / total_ca_n, 4)
na_race_piechart <- td %>%
mutate(
individual_race_ethnic = case_when(
grepl(",", individual_race_ethnic) ~ "Multiracial",
TRUE ~ individual_race_ethnic
)
) %>%
group_by(individual_race_ethnic) %>%
count() %>%
mutate(percent = n / total_n) %>%
ggplot(aes(x="",y=n, fill = individual_race_ethnic)) +
geom_bar(stat="identity",color="white")+
coord_polar("y", start=0) +
theme_void() +
guides(fill=guide_legend(title="Self-reported race and ethnicity")) +
labs(title = "National respondents")+
theme(axis.text.x=element_blank())
ca_race_piechart <- td %>%
filter(community_is_california) %>%
mutate(
individual_race_ethnic = case_when(
grepl(",", individual_race_ethnic) ~ "Multiracial",
TRUE ~ individual_race_ethnic
)
) %>%
group_by(individual_race_ethnic) %>%
count() %>%
mutate(percent = n / total_n) %>%
ggplot(aes(x="",y=n, fill = individual_race_ethnic)) +
geom_bar(stat="identity",color="white")+
coord_polar("y", start=0) +
theme(axis.text.x=element_blank()) +
theme_void() +
guides(fill=guide_legend(title="Self-reported race and ethnicity")) +
labs(title = "California respondents")
na_race_piechart
ca_race_piechart
- The majority of the respondents in received graduate degrees
(National: XXXX; California: XXXX)
na_education <- td %>%
group_by(individual_highest_degree) %>%
count() %>%
ggplot(aes(x="",y=n, fill = individual_highest_degree)) +
geom_bar(stat="identity",color="white")+
coord_polar("y", start=0) +
theme(axis.text.x=element_blank()) +
theme_void() +
guides(fill=guide_legend(title="Highest degree received")) +
labs(title = "National respondents")
ca_education <- td %>%
filter(community_is_california) %>%
group_by(individual_highest_degree) %>%
count() %>%
ggplot(aes(x="",y=n, fill = individual_highest_degree)) +
geom_bar(stat="identity",color="white")+
coord_polar("y", start=0) +
theme(axis.text.x=element_blank()) +
theme_void() +
guides(fill=guide_legend(title="Highest degree received")) +
labs(title = "California respondents")
na_education + ca_education
na_household_size_m = round(td %>%
summarise(mean = mean(individual_household_size, na.rm = TRUE)), 2)
na_household_size_sd = round(td %>%
summarise(sd = sd(individual_household_size, na.rm = TRUE)), 2)
ca_household_size_m = round(td %>% filter(community_is_california) %>%
summarise(mean = mean(individual_household_size, na.rm = TRUE)), 2)
ca_household_size_sd = round(td %>% filter(community_is_california) %>%
summarise(sd = sd(individual_household_size, na.rm = TRUE)), 2)
na_children = td %>%
mutate(individual_children_younger_than_17 = as.numeric(individual_children_younger_than_17)) %>%
group_by(individual_children_younger_than_17) %>%
count() %>%
mutate(individual_children_younger_than_17 = as.factor(individual_children_younger_than_17)) %>%
ggplot(aes(x="",y=n, fill = individual_children_younger_than_17)) +
geom_bar(stat="identity",color="white")+
coord_polar("y", start=0) +
theme(axis.text.x=element_blank()) +
theme_void() +
guides(fill=guide_legend(title="Number of Children Younger than 17")) +
labs(title = "National respondents")
ca_children = td %>%
filter(community_is_california) %>%
mutate(individual_children_younger_than_17 = as.numeric(individual_children_younger_than_17)) %>%
group_by(individual_children_younger_than_17) %>%
count() %>%
mutate(individual_children_younger_than_17 = as.factor(individual_children_younger_than_17)) %>%
ggplot(aes(x="",y=n, fill = individual_children_younger_than_17)) +
geom_bar(stat="identity",color="white")+
coord_polar("y", start=0) +
theme(axis.text.x=element_blank()) +
theme_void() +
guides(fill=guide_legend(title="Number of Children Younger than 17")) +
labs(title = "California respondents")
na_children + ca_children
td$individual_household_income_2021 = factor(td$individual_household_income_2021,
levels = c("$350,000 or more",
"$250,000 to $349,999",
"$150,000 to $249,999",
"$100,000 to $149,999",
"$75,000 to $99,999",
"$50,000 to $74,999",
"$35,000 to $49,999",
"$20,000 to $34,999",
"Less than $20,000",
"Prefer not to answer",
NA))
na_income <- td %>%
group_by(individual_household_income_2021) %>%
count() %>%
ggplot(aes(x="",y=n, fill = individual_household_income_2021)) +
geom_bar(stat="identity",color="white")+
coord_polar("y", start=0) +
theme(axis.text.x=element_blank()) +
theme_void() +
guides(fill=guide_legend(title="Household incomes in 2021")) +
labs(title = "National respondents")
ca_income <-
td %>%
filter(community_is_california) %>%
group_by(individual_household_income_2021) %>%
count() %>%
ggplot(aes(x="",y=n, fill = individual_household_income_2021)) +
geom_bar(stat="identity",color="white")+
coord_polar("y", start=0) +
theme(axis.text.x=element_blank()) +
theme_void() +
guides(fill=guide_legend(title="Household incomes in 2021")) +
labs(title = "California respondents")
na_income + ca_income
td$individual_household_asset = factor(td$individual_household_asset,
levels = c("$2,500,000 or more",
"$1,000,000 to $2,499,999",
"$500,000 to $999,999",
"$250,000 to $499,999",
"$100,000 to $249,99",
"$50,000 to $99,999",
"$25,000 to $49,999",
"$10,000 to $24,999",
"$0 to $9,999",
"Less than $0",
"Prefer not to answer",
NA))
na_assets <- td %>%
group_by(individual_household_asset) %>%
count() %>%
ggplot(aes(x="",y=n, fill = individual_household_asset)) +
geom_bar(stat="identity",color="white")+
coord_polar("y", start=0) +
theme(axis.text.x=element_blank()) +
theme_void() +
guides(fill=guide_legend(title="Total household asset")) +
labs(title = "National respondents")
ca_assets <- td %>%
filter(community_is_california) %>%
group_by(individual_household_asset) %>%
count() %>%
ggplot(aes(x="",y=n, fill = individual_household_asset)) +
geom_bar(stat="identity",color="white")+
coord_polar("y", start=0) +
theme(axis.text.x=element_blank()) +
theme_void() +
guides(fill=guide_legend(title="Total household asset")) +
labs(title = "California respondents")
na_assets + ca_assets
Most participants did not respond to this question
td %>%
select(starts_with("individual")) %>%
#filter(!is.na(individual_involved_time)) %>%
group_by(individual_involved_time) %>%
count()
## # A tibble: 9 × 2
## # Groups: individual_involved_time [9]
## individual_involved_time n
## <chr> <int>
## 1 1-3 27
## 2 13-15 1
## 3 19-21 1
## 4 22-24 2
## 5 31-33 1
## 6 4-6 8
## 7 7-9 2
## 8 Less than one year 12
## 9 <NA> 630
how long has they been living in the co-ho
td %>%
mutate(individual_years = as.numeric(individual_years)) %>%
ggplot(aes(x = individual_years)) +
geom_histogram() +
facet_wrap(~community_is_california)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `individual_years = as.numeric(individual_years)`.
## Caused by warning:
## ! NAs introduced by coercion
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 252 rows containing non-finite outside the scale range
## (`stat_bin()`).
td %>%
select(starts_with("individual"))
## # A tibble: 684 × 15
## individual_involved_time individual_years individual_ownership
## <chr> <chr> <chr>
## 1 <NA> 6 Own
## 2 <NA> 16 Own
## 3 <NA> 22 Own
## 4 <NA> 21 Own
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
## 7 <NA> 21 Own
## 8 <NA> 14 Own
## 9 <NA> 2 Own
## 10 <NA> 23 Own
## # ℹ 674 more rows
## # ℹ 12 more variables: individual_household_size <dbl>, individual_age <dbl>,
## # individual_children_younger_than_17 <dbl>, individual_highest_degree <chr>,
## # individual_household_income_2021 <fct>, individual_household_asset <fct>,
## # individual_relationship_status <chr>, individual_race_ethnic <chr>,
## # individual_gender <chr>, individual_religious <chr>,
## # individual_political_party <chr>, individual_political_orientation <chr>
messy data need further cleaning but relationship status
td %>%
select(starts_with("individual"))
## # A tibble: 684 × 15
## individual_involved_time individual_years individual_ownership
## <chr> <chr> <chr>
## 1 <NA> 6 Own
## 2 <NA> 16 Own
## 3 <NA> 22 Own
## 4 <NA> 21 Own
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
## 7 <NA> 21 Own
## 8 <NA> 14 Own
## 9 <NA> 2 Own
## 10 <NA> 23 Own
## # ℹ 674 more rows
## # ℹ 12 more variables: individual_household_size <dbl>, individual_age <dbl>,
## # individual_children_younger_than_17 <dbl>, individual_highest_degree <chr>,
## # individual_household_income_2021 <fct>, individual_household_asset <fct>,
## # individual_relationship_status <chr>, individual_race_ethnic <chr>,
## # individual_gender <chr>, individual_religious <chr>,
## # individual_political_party <chr>, individual_political_orientation <chr>
td %>%
group_by(individual_relationship_status, community_is_california) %>%
count() %>%
ggplot(aes(x = individual_relationship_status, y = n)) +
geom_point() +
facet_wrap(~community_is_california) +
theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
many woman?
td %>%
select(starts_with("individual"))
## # A tibble: 684 × 15
## individual_involved_time individual_years individual_ownership
## <chr> <chr> <chr>
## 1 <NA> 6 Own
## 2 <NA> 16 Own
## 3 <NA> 22 Own
## 4 <NA> 21 Own
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
## 7 <NA> 21 Own
## 8 <NA> 14 Own
## 9 <NA> 2 Own
## 10 <NA> 23 Own
## # ℹ 674 more rows
## # ℹ 12 more variables: individual_household_size <dbl>, individual_age <dbl>,
## # individual_children_younger_than_17 <dbl>, individual_highest_degree <chr>,
## # individual_household_income_2021 <fct>, individual_household_asset <fct>,
## # individual_relationship_status <chr>, individual_race_ethnic <chr>,
## # individual_gender <chr>, individual_religious <chr>,
## # individual_political_party <chr>, individual_political_orientation <chr>
td %>%
group_by(individual_gender, community_is_california) %>%
count()
## # A tibble: 21 × 3
## # Groups: individual_gender, community_is_california [21]
## individual_gender community_is_califor…¹ n
## <chr> <lgl> <int>
## 1 Cisgender (gender identity corresponds with bir… FALSE 25
## 2 Cisgender (gender identity corresponds with bir… TRUE 1
## 3 Cisgender (gender identity corresponds with bir… NA 1
## 4 Genderfluid FALSE 1
## 5 I want to name my own custom gender Female FALSE 1
## 6 I want to name my own custom gender Queer FALSE 1
## 7 I want to name my own custom gender Trans and c… FALSE 1
## 8 I want to name my own custom gender androgynous FALSE 1
## 9 I want to name my own custom gender femail FALSE 1
## 10 I want to name my own custom gender irrelevant FALSE 1
## # ℹ 11 more rows
## # ℹ abbreviated name: ¹community_is_california
a huge variety of religion?
td %>%
#select(starts_with("individual")) %>%
group_by(individual_religious, community_is_california) %>%
count()
## # A tibble: 122 × 3
## # Groups: individual_religious, community_is_california [122]
## individual_religious community_is_california n
## <chr> <lgl> <int>
## 1 Agnosticism NA FALSE 55
## 2 Agnosticism NA TRUE 15
## 3 Atheism NA FALSE 55
## 4 Atheism NA TRUE 7
## 5 Atheism NA NA 2
## 6 Buddhism NA FALSE 31
## 7 Buddhism NA TRUE 5
## 8 Buddhism NA NA 3
## 9 Catholicism NA FALSE 12
## 10 Catholicism NA TRUE 1
## # ℹ 112 more rows
very left leaning?
td %>%
select(starts_with("individual"))
## # A tibble: 684 × 15
## individual_involved_time individual_years individual_ownership
## <chr> <chr> <chr>
## 1 <NA> 6 Own
## 2 <NA> 16 Own
## 3 <NA> 22 Own
## 4 <NA> 21 Own
## 5 <NA> <NA> <NA>
## 6 <NA> <NA> <NA>
## 7 <NA> 21 Own
## 8 <NA> 14 Own
## 9 <NA> 2 Own
## 10 <NA> 23 Own
## # ℹ 674 more rows
## # ℹ 12 more variables: individual_household_size <dbl>, individual_age <dbl>,
## # individual_children_younger_than_17 <dbl>, individual_highest_degree <chr>,
## # individual_household_income_2021 <fct>, individual_household_asset <fct>,
## # individual_relationship_status <chr>, individual_race_ethnic <chr>,
## # individual_gender <chr>, individual_religious <chr>,
## # individual_political_party <chr>, individual_political_orientation <chr>
td %>%
#select(starts_with("individual")) %>%
group_by(individual_political_party, community_is_california) %>%
count()
## # A tibble: 13 × 3
## # Groups: individual_political_party, community_is_california [13]
## individual_political_party community_is_california n
## <chr> <lgl> <int>
## 1 Democrat FALSE 319
## 2 Democrat TRUE 49
## 3 Democrat NA 6
## 4 Independent FALSE 57
## 5 Independent TRUE 5
## 6 Independent NA 4
## 7 Other (please specify) FALSE 33
## 8 Other (please specify) TRUE 6
## 9 Other (please specify) NA 1
## 10 Republican FALSE 2
## 11 <NA> FALSE 170
## 12 <NA> TRUE 19
## 13 <NA> NA 13
very liberal
td %>%
group_by(individual_political_orientation, community_is_california) %>%
count()
## # A tibble: 15 × 3
## # Groups: individual_political_orientation, community_is_california [15]
## individual_political_orientation community_is_califor…¹ n
## <chr> <lgl> <int>
## 1 Conservative on all or most issues FALSE 3
## 2 Conservative on social issues, liberal on econo… FALSE 2
## 3 Conservative on social issues, liberal on econo… TRUE 1
## 4 Liberal on all or most issues FALSE 320
## 5 Liberal on all or most issues TRUE 51
## 6 Liberal on all or most issues NA 6
## 7 Liberal on social issues, conservative on econo… FALSE 57
## 8 Liberal on social issues, conservative on econo… TRUE 6
## 9 Liberal on social issues, conservative on econo… NA 2
## 10 Other (please specify) FALSE 26
## 11 Other (please specify) TRUE 2
## 12 Other (please specify) NA 2
## 13 <NA> FALSE 173
## 14 <NA> TRUE 19
## 15 <NA> NA 14
## # ℹ abbreviated name: ¹community_is_california