Overview

Number of Participations

df %>% 
  ggplot(aes(x = participations)) + 
  geom_histogram(bins = 20) + 
  scale_x_continuous(breaks = seq(0,6,1)) + 
  xlab("Number of participations")

Age

df %>% 
  ggplot(aes(x = `Child's Age Today`)) + 
  geom_histogram(bins = 20) + 
  #scale_x_continuous(breaks = seq(0,6,1)) + 
  xlab("Child's Age Today")

Race

Information Availability

df %>% 
  rowwise() %>% 
  mutate(
    has_info = case_when(
      race == "No information" ~ FALSE, 
      race == "Prefer not to answer" ~ FALSE, 
      TRUE ~ TRUE
    ), 
    has_info_print = case_when(
      has_info == FALSE ~ "Unavailable", 
      has_info == TRUE ~ "Available"
    ), 
    type_info_missing = case_when(
      race == "No information" ~ "No information", 
      race == "Prefer not to answer" ~ "Prefer Not to answer", 
      TRUE ~ "Has information available"
    )
  ) %>% 
ggplot(aes(x=has_info_print, fill = type_info_missing)) + 
  geom_bar() +
  #theme(axis.text.x = element_text(angle = 90)) +
  geom_text(stat='count', aes(label=..count..), vjust=-1) + 
  xlab("Race information availabilities") + 
  labs(title ="Race categories breakdown" )

Available information breakdown

df_race = df %>% 
  rowwise() %>% 
  mutate(
    has_info = case_when(
      race == "No information" ~ FALSE, 
      race == "Prefer not to answer" ~ FALSE, 
      TRUE ~ TRUE
    )  
  ) %>% 
  filter(has_info==TRUE) %>% 
  group_by(race) 
ggplot(df_race, aes(x=reorder(race,race,
                     function(x)-length(x)))) + 
  geom_bar() +
  theme(axis.text.x = element_text(angle = 90)) +
  geom_text(stat='count', aes(label=..count..), vjust=-1) + 
  xlab("Race") + 
  labs(title ="Race categories breakdown" )

only look at people who have participated

df %>% 
  rowwise() %>% 
  mutate(
    has_info = case_when(
      race == "No information" ~ FALSE, 
      race == "Prefer not to answer" ~ FALSE, 
      TRUE ~ TRUE
    ), 
    has_info_print = case_when(
      has_info == FALSE ~ "Unavailable", 
      has_info == TRUE ~ "Available"
    ), 
    type_info_missing = case_when(
      race == "No information" ~ "No information", 
      race == "Prefer not to answer" ~ "Prefer Not to answer", 
      TRUE ~ "Has information available"
    )
  ) %>%
  filter(participations != 0) %>% 
ggplot(aes(x=has_info_print, fill = type_info_missing)) + 
  geom_bar() +
  #theme(axis.text.x = element_text(angle = 90)) +
  geom_text(stat='count', aes(label=..count..), vjust=-1) + 
  xlab("Race information availabilities") + 
  labs(title ="Race categories breakdown" )

df_race = df %>% 
  rowwise() %>% 
  mutate(
    has_info = case_when(
      race == "No information" ~ FALSE, 
      race == "Prefer not to answer" ~ FALSE, 
      TRUE ~ TRUE
    )  
  ) %>% 
  filter(participations!=0) %>% 
  group_by(race) 
ggplot(df_race, aes(x=reorder(race,race,
                     function(x)-length(x)))) + 
  geom_bar() +
  theme(axis.text.x = element_text(angle = 90)) +
  geom_text(stat='count', aes(label=..count..), vjust=-1) + 
  xlab("Race") + 
  labs(title ="Race categories breakdown" )

Income

#read in US median household income by zipcode
HH.income = read_csv(here("data/income_by_zipcode.csv"))

#filter TCL.zipcodes for actual zipcodes
zipcodes = df %>% 
  select(`Zip Code`) %>% 
  filter(`Zip Code` %in% HH.income$Zip.Code)

#reformat HH.income
HH.income = HH.income %>% 
  pivot_wider(names_from = Zip.Code, values_from = Median.Income)

#create lookup function
lookup <- function(x) {  
  return(HH.income[[x]])
}

#look up TCL zipcodes' median incomes
zipcodes = zipcodes %>% 
  mutate(median_income = lapply(`Zip Code`, lookup) %>% 
           unlist() %>% 
           as.numeric()
         )

General income information

zipcodes %>% 
  summarise(mean_income = mean(median_income),
            median_median_income = median(median_income),
            sd_income = sd(median_income),
            lowest_income = min(median_income),
            highest_income = max(median_income),
            total_participants = length(`Zip Code`),
            unique_zipcodes = length(unique(`Zip Code`))
            ) %>% 
  kableExtra::kable()
mean_income median_median_income sd_income lowest_income highest_income total_participants unique_zipcodes
106459.6 100474 44800.58 20564 200001 1314 589

Density plot of median incomes

ggplot(zipcodes, aes(x = median_income)) +
  geom_histogram(aes(y=..density..), binwidth = 3000) +
  geom_density(alpha = .5, size = 2, color = "red")

Income and Participations

zipcodes <- janitor::clean_names(zipcodes)
df <- janitor::clean_names(df)
left_join(zipcodes, df,
          by = "zip_code") %>% 
  mutate(
    has_info = case_when(
      race == "No information" ~ FALSE, 
      race == "Prefer not to answer" ~ FALSE, 
      TRUE ~ TRUE
    )  
  ) %>% 
  filter(has_info==TRUE) %>% 
  filter(participations != 0) %>%
  ggplot(aes(x = median_income, 
         y = participations, 
         color = race)) + 
  geom_point()