library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(here)
## here() starts at /Users/caoanjie/Desktop/projects/thrive_coho
library(patchwork)
library(ggthemes)
library(zipcodeR)
td <- read_csv(here("data/tidy_data.csv"))
## Rows: 684 Columns: 90
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (79): community_name, community_zipcode, community_stage, community_form...
## dbl  (6): Progress, community_households, individual_household_size, individ...
## lgl  (5): community_total_units_extra_1_5, community_total_units_extra_6_10,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cd <- td %>% filter(community_is_california)

plot map – where in california?

There are many CoHo community in California (N = 26), with many of them located in Northern California. But interestingly, none of them is located in San Mateo County.

cd_community_zipcode <- cd %>% distinct(community_name, community_zipcode) %>% 
  ungroup() %>% 
  group_by(community_zipcode) %>% 
  count() %>% 
  mutate(coords = map(community_zipcode, geocode_zip)) %>% 
  unnest(coords)


ca_map <- map_data("county", regions = "CA") %>% 
  mutate(is_san_mateo = (subregion == "san mateo"))

ggplot() + 
  geom_polygon( data=ca_map, aes(x=long, y=lat, group=group, fill = is_san_mateo),
                color="black") +
  scale_fill_manual(values = c("white", "red"))+

  geom_point(data =cd_community_zipcode, aes(x = lng, y = lat), 
               color = "blue", fill = "white") + 
   theme_few() + 
  xlim(-125, -110) + 

  theme(
   axis.ticks = element_blank(), 
   axis.text = element_blank(),
   legend.position = "none") + 
  xlab("") + ylab("") + 
  labs(title = "Distribution of CoHo community in California", 
       subtitle = "Red indicates San Mateo County")

demographic overview

see the other docs with national comparison

land acquisiton time

sometimes the same communtiy members give different response, if that’s the case count all! looks like there was some peak around 2000, but really nothing much latter.

cd %>% distinct(community_name, community_land_acquisition_time) %>% 
  arrange(community_name) %>% 
  filter(!community_land_acquisition_time %in% c("Don't know", 
                                                 "N/A", 
                                                 NA, 
                                                 "We don't have land yet")) %>% 
  mutate(single_year = substring(community_land_acquisition_time, 1, 4)) %>% 
  group_by(single_year) %>% 
  count() %>% 
  ungroup() %>% 
  ggplot() + 
  geom_point(aes(x = single_year, y = n))  + 
  geom_line(aes(x = single_year, y = n, group = 1)) + 
  theme_few() + 
  xlab("California CoHo Community reporting land acquisition") + 
  ylab("Year")

individual living time?

some evidence for a right skewed distribution for living time – many people joined recently?

cd %>% select( individual_years) %>% 
  filter(!is.na(individual_years)) %>% 
  mutate(individual_years = case_when(
    individual_years == "Less than one year" ~ "0.5",
    individual_years == "I am not living there yet" ~ "0",
    TRUE ~ individual_years
  )) %>% 
  mutate(individual_years = as.numeric(individual_years)) %>% 
  ggplot(aes(x = individual_years)) + 
  geom_histogram()+ 
  theme_few() + 
  ylab("Number of participants") + 
  xlab("Number of years individual has been living in CoHo")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

total units

sometimes same community gives different answer, if that’s the case take average since the responses are range, here we take the mean (e.g. reporting 11-15, converted to 13). the response is relatively sparse (half of the CoHo community in CA doesn’t know how many units in total), but with the existing data the scale of the CoHo community is not small. Most have 10+ units. Some even close to 50.

cd_units <- cd %>% select(
  community_name,
  community_total_units, 
              community_units_for_sale,
              community_total_units_extra_1_5, # no one response in this subset
              community_total_units_extra_6_10, # no one response in this subset
              community_total_affordable_units_subsidized_by_ngo, 
              community_total_affordable_units_subsidized_by_private, 
              community_total_affordable_units_subsidized_by_public)

cd_units %>% 
  distinct(community_name, community_total_units) %>% 
  arrange(community_total_units) %>% 
  mutate(community_total_units_num = case_when(
    community_total_units == "6-10" ~ "8", 
    community_total_units == "11-15" ~ "13", 
    community_total_units == "16-20" ~ "18", 
    community_total_units == "26-30" ~ "28", 
    community_total_units == "31-35" ~ "33", 
    community_total_units == "41-45" ~ "43", 
    community_total_units == "46-50" ~ "48", 
  )) %>% 
  group_by(community_name) %>% 
  summarise(
    mean_units_reported = mean(as.numeric(community_total_units_num))
  ) %>% 
  filter(!is.na(mean_units_reported)) %>% 
  ggplot(aes(x = mean_units_reported)) + 
  geom_histogram() + 
  theme_few() + 
  ylab("Number of CoHo with information available") + 
  xlab("Number of years individual has been living in CoHo")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

units for sale?

Most coho units don’t have units available for sale (saturated?). A few have a lot. Some coho has conflicting evidence (the numbers don’t add up to total number of unique coho community)

cd_units %>% 
  distinct(community_name, community_units_for_sale) %>% 
  group_by(community_units_for_sale) %>% 
  filter(!is.na(community_units_for_sale)) %>% 
  filter(community_units_for_sale != "Don't know") %>% 
  count()
## # A tibble: 5 × 2
## # Groups:   community_units_for_sale [5]
##   community_units_for_sale     n
##   <chr>                    <int>
## 1 0                           15
## 2 1-2                          8
## 3 5-6                          1
## 4 7-8                          1
## 5 More than 10                 5

affordable units?

Most communities do not have any affordable subsidized by anything

cd_units %>% 
  distinct(community_name, community_total_affordable_units_subsidized_by_ngo) %>% 
  group_by(community_total_affordable_units_subsidized_by_ngo) %>% 
  count() %>% 
  filter(!is.na(community_total_affordable_units_subsidized_by_ngo))
## # A tibble: 4 × 2
## # Groups:   community_total_affordable_units_subsidized_by_ngo [4]
##   community_total_affordable_units_subsidized_by_ngo     n
##   <chr>                                              <int>
## 1 0                                                     21
## 2 1-5                                                    2
## 3 Don't know                                             3
## 4 More than 20                                           1
cd_units %>% 
  distinct(community_name, community_total_affordable_units_subsidized_by_private) %>% 
  group_by(community_total_affordable_units_subsidized_by_private) %>% 
  count() %>% 
  filter(!is.na(community_total_affordable_units_subsidized_by_private))
## # A tibble: 3 × 2
## # Groups:   community_total_affordable_units_subsidized_by_private [3]
##   community_total_affordable_units_subsidized_by_private     n
##   <chr>                                                  <int>
## 1 0                                                         19
## 2 1-5                                                        5
## 3 Don't know                                                 4
cd_units %>% 
  distinct(community_name, community_total_affordable_units_subsidized_by_public) %>% 
  group_by(community_total_affordable_units_subsidized_by_public) %>% 
  count() %>% 
  filter(!is.na(community_total_affordable_units_subsidized_by_public))
## # A tibble: 3 × 2
## # Groups:   community_total_affordable_units_subsidized_by_public [3]
##   community_total_affordable_units_subsidized_by_public     n
##   <chr>                                                 <int>
## 1 0                                                        17
## 2 1-5                                                       7
## 3 Don't know                                                4

a range of diversity

recoded:

None at all = 0 A little = 1 A moderate amount = 2 A lot = 3 A great deal = 4 Don’t know = NA

Diversity is lacking in all aspects

recode_function <- function(text){
  if (is.na(text)){
    return(NA)
  }
  if (text == "None at all"){
    return(0)
  }else if(text == "A little"){
    return(1)
  }else if (text == "A moderate amount"){
    return(2)
  }else if (text == "A lot"){
    return(3)
  }else if (text == "A great deal"){
    return(4)
  }else if (text == "Don't know"){
    return(NA)
  }
}

cd %>% 
  select(contains("diversity")) %>% 
  select(-community_recruiting_increasing_diversity, -community_recruiting_increasing_diversity_yes_how) %>% 
  rowwise() %>% 
  mutate(across(everything(), ~ recode_function(.x))) %>% 
  pivot_longer(cols = everything()) %>% 
  mutate(name_short = sub("community_diversity_", "", name)) %>% 
  ggplot(aes(x = forcats::fct_reorder(factor(name_short), value, mean), y = value)) + 
  geom_point(position = position_jitter(width = .2), alpha = .3) + 
  stat_summary(fun.data = "mean_cl_boot", color = "red") + 
  theme_few() + 
  xlab("Diversity category") + 
  ylab("Self-reported score") + 
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1)) + 
  geom_hline(yintercept = 2, alpha = .8, linetype = "dotted", color = "gray")
## Warning: `fct_reorder()` removing 218 missing values.
## ℹ Use `.na_rm = TRUE` to silence this message.
## ℹ Use `.na_rm = FALSE` to preserve NAs.
## `fct_reorder()` removing 218 missing values.
## ℹ Use `.na_rm = TRUE` to silence this message.
## ℹ Use `.na_rm = FALSE` to preserve NAs.
## Warning: Removed 218 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Warning: Removed 218 rows containing missing values or values outside the scale range
## (`geom_point()`).

how community has helped with increasing diversity?

cd %>% select(community_recruiting_increasing_diversity_yes_how) %>% 
  filter(!is.na(community_recruiting_increasing_diversity_yes_how)) %>% 
  pull(community_recruiting_increasing_diversity_yes_how)
##  [1] "We welcome diversity but we don't get much of it."                                                                                                                                                                                                                                                                                                                       
##  [2] "Our Common House was rebuilt to included an accessible one-bedroom - lower kitchenette, roll-in shower, wider doorways, ramp to door."                                                                                                                                                                                                                                   
##  [3] "Recruiting among the minority groups in the area; assisting with financial arrangements for families with children"                                                                                                                                                                                                                                                      
##  [4] "We have reviewed our recruitment materials and worked to make them more welcoming of diversity."                                                                                                                                                                                                                                                                         
##  [5] "We have met with a consultant to help make our marketing inclusive."                                                                                                                                                                                                                                                                                                     
##  [6] "We hired a co-housing diversity specialist who helped us develop marketing tools for diverse communities. We currently have members from South Korea, India, Israel, and China."                                                                                                                                                                                         
##  [7] "We hired a racial diversity consultant"                                                                                                                                                                                                                                                                                                                                  
##  [8] "there was an effort, but it seems to have fallen by the wayside"                                                                                                                                                                                                                                                                                                         
##  [9] "We encourage active adults by emphasizing how active we are in our newsletters and website."                                                                                                                                                                                                                                                                             
## [10] "We have an Equity, Diversity and Belonging group but it is the minority and there appears to be significant resistance to diversification."                                                                                                                                                                                                                              
## [11] "In process of formation"                                                                                                                                                                                                                                                                                                                                                 
## [12] "we have a shared culture that values inclusion. That trickles down through the outreach and interactions with interested parties."                                                                                                                                                                                                                                       
## [13] "We are very welcoming when they come to our open houses and encourage people we know in this group to check us out and make an offer."                                                                                                                                                                                                                                   
## [14] "First of all, please clarify your hypothesis with regard to the design of this survey.  I see lists of incoherent elements across this survey, and now I see \"kinds of diversity\" with a list.  Please be informed:  We are professionals, and read the research literature.  Please read David Graeber or Kim Stanley Robinson or other interesting forward thinkers."
## [15] "Its not working"