This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
colorado_income <- get_acs(geography = "school district (unified)",
variables = "B19013_001",
state = "CO", key = apikey)
## Getting data from the 2013-2017 5-year ACS
colorado_school <- school_districts(state = "CO",
type = "unified",
class = "sf")
##
|
| | 0%
|
| | 1%
|
|= | 1%
|
|== | 3%
|
|=== | 4%
|
|=== | 5%
|
|==== | 6%
|
|==== | 7%
|
|===== | 7%
|
|===== | 8%
|
|====== | 9%
|
|======= | 10%
|
|======= | 11%
|
|======== | 12%
|
|======== | 13%
|
|========= | 13%
|
|========= | 14%
|
|========== | 15%
|
|========== | 16%
|
|=========== | 17%
|
|============ | 18%
|
|============ | 19%
|
|============= | 19%
|
|============= | 20%
|
|============== | 21%
|
|============== | 22%
|
|=============== | 23%
|
|================ | 24%
|
|================ | 25%
|
|================= | 26%
|
|================= | 27%
|
|================== | 27%
|
|================== | 28%
|
|=================== | 29%
|
|==================== | 30%
|
|==================== | 31%
|
|===================== | 32%
|
|===================== | 33%
|
|====================== | 33%
|
|====================== | 34%
|
|======================= | 35%
|
|======================= | 36%
|
|======================== | 37%
|
|========================= | 38%
|
|========================= | 39%
|
|========================== | 39%
|
|========================== | 40%
|
|=========================== | 41%
|
|=========================== | 42%
|
|============================ | 43%
|
|============================= | 44%
|
|============================= | 45%
|
|============================== | 45%
|
|============================== | 46%
|
|============================== | 47%
|
|=============================== | 47%
|
|=============================== | 48%
|
|================================ | 49%
|
|================================= | 50%
|
|================================= | 51%
|
|================================= | 52%
|
|================================== | 52%
|
|================================== | 53%
|
|=================================== | 53%
|
|=================================== | 54%
|
|==================================== | 55%
|
|==================================== | 56%
|
|===================================== | 57%
|
|====================================== | 58%
|
|====================================== | 59%
|
|======================================= | 60%
|
|======================================== | 61%
|
|======================================== | 62%
|
|========================================= | 63%
|
|========================================= | 64%
|
|========================================== | 64%
|
|========================================== | 65%
|
|=========================================== | 65%
|
|=========================================== | 66%
|
|=========================================== | 67%
|
|============================================ | 68%
|
|============================================= | 69%
|
|============================================== | 70%
|
|============================================== | 71%
|
|=============================================== | 72%
|
|=============================================== | 73%
|
|================================================ | 73%
|
|================================================ | 74%
|
|================================================= | 75%
|
|================================================= | 76%
|
|================================================== | 77%
|
|=================================================== | 79%
|
|==================================================== | 80%
|
|===================================================== | 81%
|
|====================================================== | 83%
|
|====================================================== | 84%
|
|======================================================= | 84%
|
|======================================================= | 85%
|
|======================================================== | 85%
|
|======================================================== | 87%
|
|========================================================= | 88%
|
|========================================================== | 89%
|
|=========================================================== | 90%
|
|=========================================================== | 91%
|
|============================================================ | 92%
|
|============================================================ | 93%
|
|============================================================= | 94%
|
|============================================================== | 95%
|
|============================================================== | 96%
|
|=============================================================== | 96%
|
|=============================================================== | 97%
|
|=============================================================== | 98%
|
|================================================================ | 99%
|
|=================================================================| 100%
co_school_joined <- left_join(colorado_school,
colorado_income,
by = "GEOID")
# Remove
rm("colorado_income", 'colorado_school')
You can also embed plots, for example:
profiles <- read_excel("./data/blueprint_designed/SFA_profile_Feb_2019.xls",
col_types = c("text", "text", "text","logical",
"numeric","numeric",
"numeric", "logical",
"logical", "logical",
"logical","logical"))
profiles <- profiles %>%
dplyr::rename(CDE_AGREEMENT = "Sponsor #",
day4 = "4 Day School Week",
student_count = "Pupil Count",
free_perc = "Free %",
redu_perc = "Reduced-Price %",
sbp = "Breakfast",
nslp = "Lunch",
smp = "Milk",
snack = "Snack",
sfsp = "SFSP",
sfa_name = "Sponsor Name") %>%
mutate(sfa_name = str_to_lower(sfa_name)) %>%
mutate(free_students = free_perc * student_count) %>% # count of free students
mutate(redu_students = redu_perc * student_count) %>% # count of redu students
mutate(free_and_red_perc = free_perc + redu_perc) # combined free & redu
# The above actually takes the raw excel into a semi-usable dataframe
profiles <- filter(profiles, Profile == "Public")
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.
# Import all the schools with GEOID!
# Must have current directory above data
site_all <- read_excel("./data/CDE_to_GEOID.xlsx", col_types = "text") %>%
mutate(GEOID = ACTUAL_GEOID) %>%
select(GEOID, CDE_AGREEMENT, everything(), -ACTUAL_GEOID)
site_all <- group_by(site_all,
CDE_AGREEMENT)
# Gets all the districts with their site count and GEOID
(districts <- summarise(site_all,
count_schools = n(),
GEOID = first(GEOID)))
## # A tibble: 176 x 3
## CDE_AGREEMENT count_schools GEOID
## <chr> <int> <chr>
## 1 0010 17 0805550
## 2 0020 47 0806900
## 3 0030 11 0801950
## 4 0040 25 0802580
## 5 0050 3 0802430
## 6 0060 4 0806750
## 7 0070 20 0807230
## 8 0100 3 0802070
## 9 0110 2 0806420
## 10 0120 8 0803780
## # ... with 166 more rows
# Combines districts with GEOID number
geoid_districts <- left_join(districts, profiles, by = "CDE_AGREEMENT")
### This WORKS!!
combined_geo_dist <- left_join(co_school_joined, geoid_districts,
by = "GEOID")
#remove
rm("site_all", "districts", "geoid_districts")
list.files("./data/meal_count", pattern = "^SY")
## [1] "SY10-11.xls" "SY11-12.xls" "SY12-13.xls" "SY13-14.xls" "SY14-15.xls"
## [6] "SY15-16.xls" "SY16-17.xls" "SY17-18.xls"
meal_count_directory <- "./data/meal_count/"
count_files <- list.files("./data/meal_count", pattern = "^SY")
# creates a list of the matching files path
count_files <- paste0(meal_count_directory,count_files)
# Creates list of with col_type = col_name, use names() to use the col_type
meal_count_column_unknown <- c("text" = "sfa_num",
"text" = "sfa_name",
"text" = "site_num",
"text" ="site_name",
"date"= "claim_date",
"text" ="meal_type",
"text" = "earning_type",
"text" = "rate_level",
"text" = "provision",
"numeric" = "count_days_served",
"numeric" = "free_meals_served",
"numeric" = "reduced_meals_served",
"numeric" = "paid_meals_served",
"numeric" = "total_meals_served")
# Imports all the files into a large list of lists
import_meal_count <-lapply(count_files,
read_excel,
range = "a2:n50000",
col_names = meal_count_column_unknown,
col_types = names(meal_count_column_unknown)
)
# Combines all the lists into a single dataframe, removes all the NAs
# created by the large loaded range from the excels
huge_meal_count <- bind_rows(import_meal_count) %>%
filter(!is.na(sfa_num))
# reduces working memory load by removing the imported dataframe
rm("import_meal_count", "meal_count_column_unknown",
"meal_count_directory", "count_files")
# creates the sfa num format and site number needed for matching 0000
# creates the school year for the claim.
district_meal_count <- mutate(huge_meal_count,
sfa_num_char = str_pad(sfa_num, width = 4,
side = "left", pad = "0"),
site_num_char = str_pad(site_num, width = 4,
side = "left", pad = "0"),
claim_year = year(claim_date),
claim_month = month(claim_date),
school_year = year(claim_date) +
(month(claim_date) <= 7),
meal_type = rename_meal_types(meal_type))
# remove and reorder a number of un-needed variables
district_meal_count <- select(district_meal_count, sfa_num_char, site_num_char,
school_year, meal_type,
everything(), -sfa_num, -site_num,
-earning_type, -claim_year)
district_meal_count %>%
group_by(sfa_num_char) %>%
ggplot() +
geom_point(aes(y = total_meals_served, x = claim_date), position = "jitter") +
facet_wrap(~rate_level, nrow = 2) +
geom_smooth(aes(y = total_meals_served, x = claim_date))
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
district_meal_count %>%
ggplot() +
geom_point(aes(y = total_meals_served, x = claim_date), position = "jitter")
district_meal_count %>%
group_by(sfa_num_char) %>%
ggplot() +
geom_point(aes(y = total_meals_served, x = claim_date), position = "jitter") +
facet_wrap(~provision) +
geom_smooth(aes(y = total_meals_served, x = claim_date))
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
directory_four_day <- list.dirs("./data/four_day")
file_four_day <- list.files("./data/four_day")
path_four_day <- paste0(directory_four_day,"/", file_four_day)
column_names_four_day <- c("text" = "school_year",
"text" = "sfa_num",
"text" = "sfa_name",
"text" = "site_num",
"text" ="site_name",
"text" ="meal_type",
"numeric" = "pk_12_enrollment",
"text" = "free_perc",
"text" = "redu_perc",
"text" = "free_and_redu_perc",
"text" = "four_day",
"text" = "county",
"text" = "urban_rural")
# imports the file with appropriate types and names
imported_four_day <- read_excel(path_four_day,
col_names = column_names_four_day,
col_types = names(column_names_four_day),
skip = 1)
# creates a working file to prevent need to reload.
# Selects only the variables needed for this analysis
working_four_day <- imported_four_day %>%
select(school_year,
sfa_num,
site_num,
four_day,
urban_rural)
mutate_four_day <- mutate(working_four_day, week_length = case_when(
four_day == "T-F" ~4,
four_day == "M-Th" ~4,
four_day == "N" ~5),
school_year = SY_YY_XX(school_year))
# Summarize by week length
sum_mutate_four_day <- group_by(mutate_four_day, school_year, sfa_num, urban_rural) %>%
summarize(week_length = mean(week_length))
# create the score 4-day score and removes extra variables
score_four_day <- mutate(sum_mutate_four_day, score_4day = case_when(
week_length == 5 ~1,
week_length == 4 ~0)) %>%
select(everything(),-week_length)
# need to convert following uses of score_four_day to final_four_day
final_four_day <- score_four_day
# Extra elements to remove
# mutate_four_day
# score_four_day
# sum_mutate_four_day
# working_four_day
# directory_four_day
# file_four_day
# path_four_day
# Elements to Keep
# imported_four_day
# final_four_day
# Take the monthly claims into a yearly claim
by_month <- group_by(district_meal_count, school_year,
claim_month, sfa_num_char, site_num_char, meal_type ) %>%
summarize(total_meals = sum(total_meals_served),
total_days = sum(count_days_served),
adp_average = total_meals/total_days)
# makes a df with district/year/mealtype
by_district <- group_by(district_meal_count,
school_year,
sfa_num_char,
meal_type,
provision) %>%
summarize(total_meals = sum(total_meals_served),
total_days = sum(count_days_served),
adp_average = total_meals/total_days,
free_meals = sum(free_meals_served),
reduced_meals = sum(reduced_meals_served),
paid_meals = sum(paid_meals_served))
by_district_17 <- filter(by_district, school_year == 2017)
# Makes a df with sites/year/meal
by_site <- group_by(district_meal_count,
school_year,
sfa_num_char,
site_num_char,
meal_type) %>%
summarize(total_meals = sum(total_meals_served),
total_days = sum(count_days_served),
adp_average = total_meals/total_days)
This Graph shows the ADP of all districts by month between 2010-2018
ggplot(data = by_month) +
geom_point(mapping=aes(x = claim_month,
y = adp_average), position = "jitter") +
labs(
title = "District ADP average by month 2010-2019",
x = "Claim Month",
y = "Average ADP"
)
rm("by_month")
raw_student_count <- select(imported_four_day,
school_year:site_name,
pk_12_enrollment:redu_perc) %>%
filter( !is.na(pk_12_enrollment)) %>%
mutate(school_year_num = SY_YY_XX(.$school_year),
free_perc = parse_number(free_perc)/100,
redu_perc = parse_number(redu_perc)/100)
# I need to take the different district sizes, compare them to the list above
working_student_count <- group_by(raw_student_count,
sfa_num, school_year_num) %>%
summarize(pk_12_enrollment_total = sum(pk_12_enrollment),
free_perc = mean(free_perc),
redu_perc = mean(redu_perc),
district_size_score = Score_District_Size(pk_12_enrollment_total))
ggplot(data = working_student_count) +
geom_histogram(mapping=aes(x = district_size_score))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
final_student_count <- working_student_count
#Can now remove the imported_four_day
rm("imported_four_day", "raw_student_count")
# SFSP
sfsp_path <- "./data/sfsp/" # place the file storage here
(sfsp_file <- list.files(sfsp_path))
## [1] "SFSP_2017_2018_Comparison.xlsx"
sfsp_file_path <- paste0(sfsp_path,sfsp_file)
# Names and types for each column in the excel file
column_names_sfsp <- c("text" = "sfa_num",
"text" = "sfa_name",
"text" = "site_num",
"text" ="site_name",
"numeric" = "meal_2017_sfsp",
"numeric" = "meal_2018_sfsp",
"numeric" = "change_meal_count_sfsp",
"numeric" = "change_meal_percent_sfsp",
"numeric" = "adp_2017_sfsp",
"numeric" = "adp_2018_sfsp",
"numeric" = "change_adp_count_sfsp",
"numeric" = "change_adp_percent_sfsp")
# Imports raw excel using above column names and types skipping the header line
raw_sfsp <- read_excel(sfsp_file_path,
col_names = column_names_sfsp,
col_types = names(column_names_sfsp),
skip = 1)
# Creates padded sfa_num & site_num to compare with other data sources
imported_sfsp <- mutate(raw_sfsp,
sfa_num_char = Pad_to_character(sfa_num),
site_num_char = Pad_to_character(site_num)) %>%
select(sfa_num_char, site_num_char, everything())
sfsp_1718 <- group_by(imported_sfsp,
sfa_num_char) %>%
summarize(site_count_sfsp = n(),
meal_count_sfsp_2017 = sum(meal_2017_sfsp),
meal_count_sfsp_2018 = sum(meal_2018_sfsp),
adp_sfsp_2017 = sum(adp_2017_sfsp),
adp_sfsp_2018 = sum(adp_2018_sfsp))
combined_sfsp_geodist <- left_join(combined_geo_dist,sfsp_1718,
by = c("CDE_AGREEMENT" = "sfa_num_char"))
# Removes all the temporary variables that we wont be using
rm("raw_sfsp", "column_names_sfsp", "sfsp_file", "sfsp_file_path", "sfsp_path", "sfsp_1718")