Overview

Number of Participations

df %>% 
  ggplot(aes(x = participations)) + 
  geom_histogram(bins = 20) + 
  scale_x_continuous(breaks = seq(0,6,1)) + 
  xlab("Number of participations")

Age

df %>% 
  ggplot(aes(x = `Child's Age Today`)) + 
  geom_histogram(bins = 20) + 
  #scale_x_continuous(breaks = seq(0,6,1)) + 
  xlab("Child's Age Today")

Race

Information Availability

df %>% 
  rowwise() %>% 
  mutate(
    has_info = case_when(
      race == "No information" ~ FALSE, 
      race == "Prefer not to answer" ~ FALSE, 
      TRUE ~ TRUE
    ), 
    has_info_print = case_when(
      has_info == FALSE ~ "Unavailable", 
      has_info == TRUE ~ "Available"
    ), 
    type_info_missing = case_when(
      race == "No information" ~ "No information", 
      race == "Prefer not to answer" ~ "Prefer Not to answer", 
      TRUE ~ "Has information available"
    )
  ) %>% 
ggplot(aes(x=has_info_print, fill = type_info_missing)) + 
  geom_bar() +
  #theme(axis.text.x = element_text(angle = 90)) +
  geom_text(stat='count', aes(label=..count..), vjust=-1) + 
  xlab("Race information availabilities") + 
  labs(title ="Race categories breakdown" )

Available information breakdown

df_race = df %>% 
  rowwise() %>% 
  mutate(
    has_info = case_when(
      race == "No information" ~ FALSE, 
      race == "Prefer not to answer" ~ FALSE, 
      TRUE ~ TRUE
    )  
  ) %>% 
  filter(has_info==TRUE) %>% 
  group_by(race) 
ggplot(df_race, aes(x=reorder(race,race,
                     function(x)-length(x)))) + 
  geom_bar() +
  theme(axis.text.x = element_text(angle = 90)) +
  geom_text(stat='count', aes(label=..count..), vjust=-1) + 
  xlab("Race") + 
  labs(title ="Race categories breakdown" )

only look at people who have participated

df %>% 
  rowwise() %>% 
  mutate(
    has_info = case_when(
      race == "No information" ~ FALSE, 
      race == "Prefer not to answer" ~ FALSE, 
      TRUE ~ TRUE
    ), 
    has_info_print = case_when(
      has_info == FALSE ~ "Unavailable", 
      has_info == TRUE ~ "Available"
    ), 
    type_info_missing = case_when(
      race == "No information" ~ "No information", 
      race == "Prefer not to answer" ~ "Prefer Not to answer", 
      TRUE ~ "Has information available"
    )
  ) %>%
  filter(participations != 0) %>% 
ggplot(aes(x=has_info_print, fill = type_info_missing)) + 
  geom_bar() +
  #theme(axis.text.x = element_text(angle = 90)) +
  geom_text(stat='count', aes(label=..count..), vjust=-1) + 
  xlab("Race information availabilities") + 
  labs(title ="Race categories breakdown" )

df_race = df %>% 
  rowwise() %>% 
  mutate(
    has_info = case_when(
      race == "No information" ~ FALSE, 
      race == "Prefer not to answer" ~ FALSE, 
      TRUE ~ TRUE
    )  
  ) %>% 
  filter(participations!=0) %>% 
  group_by(race) 
ggplot(df_race, aes(x=reorder(race,race,
                     function(x)-length(x)))) + 
  geom_bar() +
  theme(axis.text.x = element_text(angle = 90)) +
  geom_text(stat='count', aes(label=..count..), vjust=-1) + 
  xlab("Race") + 
  labs(title ="Race categories breakdown" )

Income

#read in US median household income by zipcode
HH.income = read_csv(here("data/income_by_zipcode.csv"))

#filter TCL.zipcodes for actual zipcodes
zipcodes = df %>% 
  select(`Zip Code`) %>% 
  filter(`Zip Code` %in% HH.income$Zip.Code)

#reformat HH.income
HH.income = HH.income %>% 
  pivot_wider(names_from = Zip.Code, values_from = Median.Income)

#create lookup function
lookup <- function(x) {  
  return(HH.income[[x]])
}

#look up TCL zipcodes' median incomes
zipcodes = zipcodes %>% 
  mutate(median_income = lapply(`Zip Code`, lookup) %>% 
           unlist() %>% 
           as.numeric()
         )

General income information

zipcodes %>% 
  summarise(mean_income = mean(median_income),
            median_median_income = median(median_income),
            sd_income = sd(median_income),
            lowest_income = min(median_income),
            highest_income = max(median_income),
            total_participants = length(`Zip Code`),
            unique_zipcodes = length(unique(`Zip Code`))
            ) %>% 
  kableExtra::kable()

mean_income	median_median_income	sd_income	lowest_income	highest_income	total_participants	unique_zipcodes
106459.6	100474	44800.58	20564	200001	1314	589

Density plot of median incomes

ggplot(zipcodes, aes(x = median_income)) +
  geom_histogram(aes(y=..density..), binwidth = 3000) +
  geom_density(alpha = .5, size = 2, color = "red")

Income and Participations

zipcodes <- janitor::clean_names(zipcodes)
df <- janitor::clean_names(df)
left_join(zipcodes, df,
          by = "zip_code") %>% 
  mutate(
    has_info = case_when(
      race == "No information" ~ FALSE, 
      race == "Prefer not to answer" ~ FALSE, 
      TRUE ~ TRUE
    )  
  ) %>% 
  filter(has_info==TRUE) %>% 
  filter(participations != 0) %>%
  ggplot(aes(x = median_income, 
         y = participations, 
         color = race)) + 
  geom_point()

visualization

anjie & aaron

3/31/2021

Overview

Number of Participations

Age

Race

Information Availability

Available information breakdown

only look at people who have participated

Income

Density plot of median incomes

Income and Participations