df %>%
ggplot(aes(x = participations)) +
geom_histogram(bins = 20) +
scale_x_continuous(breaks = seq(0,6,1)) +
xlab("Number of participations")
df %>%
ggplot(aes(x = `Child's Age Today`)) +
geom_histogram(bins = 20) +
#scale_x_continuous(breaks = seq(0,6,1)) +
xlab("Child's Age Today")
df %>%
rowwise() %>%
mutate(
has_info = case_when(
race == "No information" ~ FALSE,
race == "Prefer not to answer" ~ FALSE,
TRUE ~ TRUE
),
has_info_print = case_when(
has_info == FALSE ~ "Unavailable",
has_info == TRUE ~ "Available"
),
type_info_missing = case_when(
race == "No information" ~ "No information",
race == "Prefer not to answer" ~ "Prefer Not to answer",
TRUE ~ "Has information available"
)
) %>%
ggplot(aes(x=has_info_print, fill = type_info_missing)) +
geom_bar() +
#theme(axis.text.x = element_text(angle = 90)) +
geom_text(stat='count', aes(label=..count..), vjust=-1) +
xlab("Race information availabilities") +
labs(title ="Race categories breakdown" )
df_race = df %>%
rowwise() %>%
mutate(
has_info = case_when(
race == "No information" ~ FALSE,
race == "Prefer not to answer" ~ FALSE,
TRUE ~ TRUE
)
) %>%
filter(has_info==TRUE) %>%
group_by(race)
ggplot(df_race, aes(x=reorder(race,race,
function(x)-length(x)))) +
geom_bar() +
theme(axis.text.x = element_text(angle = 90)) +
geom_text(stat='count', aes(label=..count..), vjust=-1) +
xlab("Race") +
labs(title ="Race categories breakdown" )
df %>%
rowwise() %>%
mutate(
has_info = case_when(
race == "No information" ~ FALSE,
race == "Prefer not to answer" ~ FALSE,
TRUE ~ TRUE
),
has_info_print = case_when(
has_info == FALSE ~ "Unavailable",
has_info == TRUE ~ "Available"
),
type_info_missing = case_when(
race == "No information" ~ "No information",
race == "Prefer not to answer" ~ "Prefer Not to answer",
TRUE ~ "Has information available"
)
) %>%
filter(participations != 0) %>%
ggplot(aes(x=has_info_print, fill = type_info_missing)) +
geom_bar() +
#theme(axis.text.x = element_text(angle = 90)) +
geom_text(stat='count', aes(label=..count..), vjust=-1) +
xlab("Race information availabilities") +
labs(title ="Race categories breakdown" )
df_race = df %>%
rowwise() %>%
mutate(
has_info = case_when(
race == "No information" ~ FALSE,
race == "Prefer not to answer" ~ FALSE,
TRUE ~ TRUE
)
) %>%
filter(participations!=0) %>%
group_by(race)
ggplot(df_race, aes(x=reorder(race,race,
function(x)-length(x)))) +
geom_bar() +
theme(axis.text.x = element_text(angle = 90)) +
geom_text(stat='count', aes(label=..count..), vjust=-1) +
xlab("Race") +
labs(title ="Race categories breakdown" )
#read in US median household income by zipcode
HH.income = read_csv(here("data/income_by_zipcode.csv"))
#filter TCL.zipcodes for actual zipcodes
zipcodes = df %>%
select(`Zip Code`) %>%
filter(`Zip Code` %in% HH.income$Zip.Code)
#reformat HH.income
HH.income = HH.income %>%
pivot_wider(names_from = Zip.Code, values_from = Median.Income)
#create lookup function
lookup <- function(x) {
return(HH.income[[x]])
}
#look up TCL zipcodes' median incomes
zipcodes = zipcodes %>%
mutate(median_income = lapply(`Zip Code`, lookup) %>%
unlist() %>%
as.numeric()
)
General income information
zipcodes %>%
summarise(mean_income = mean(median_income),
median_median_income = median(median_income),
sd_income = sd(median_income),
lowest_income = min(median_income),
highest_income = max(median_income),
total_participants = length(`Zip Code`),
unique_zipcodes = length(unique(`Zip Code`))
) %>%
kableExtra::kable()
mean_income | median_median_income | sd_income | lowest_income | highest_income | total_participants | unique_zipcodes |
---|---|---|---|---|---|---|
106459.6 | 100474 | 44800.58 | 20564 | 200001 | 1314 | 589 |
ggplot(zipcodes, aes(x = median_income)) +
geom_histogram(aes(y=..density..), binwidth = 3000) +
geom_density(alpha = .5, size = 2, color = "red")
zipcodes <- janitor::clean_names(zipcodes)
df <- janitor::clean_names(df)
left_join(zipcodes, df,
by = "zip_code") %>%
mutate(
has_info = case_when(
race == "No information" ~ FALSE,
race == "Prefer not to answer" ~ FALSE,
TRUE ~ TRUE
)
) %>%
filter(has_info==TRUE) %>%
filter(participations != 0) %>%
ggplot(aes(x = median_income,
y = participations,
color = race)) +
geom_point()