R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

colorado_income <- get_acs(geography = "school district (unified)",
                           variables = "B19013_001",
                           state = "CO", key = apikey)

## Getting data from the 2013-2017 5-year ACS

colorado_school <- school_districts(state = "CO",
                                    type = "unified",
                                    class = "sf")

## 
  |                                                                       
  |                                                                 |   0%
  |                                                                       
  |                                                                 |   1%
  |                                                                       
  |=                                                                |   1%
  |                                                                       
  |==                                                               |   3%
  |                                                                       
  |===                                                              |   4%
  |                                                                       
  |===                                                              |   5%
  |                                                                       
  |====                                                             |   6%
  |                                                                       
  |====                                                             |   7%
  |                                                                       
  |=====                                                            |   7%
  |                                                                       
  |=====                                                            |   8%
  |                                                                       
  |======                                                           |   9%
  |                                                                       
  |=======                                                          |  10%
  |                                                                       
  |=======                                                          |  11%
  |                                                                       
  |========                                                         |  12%
  |                                                                       
  |========                                                         |  13%
  |                                                                       
  |=========                                                        |  13%
  |                                                                       
  |=========                                                        |  14%
  |                                                                       
  |==========                                                       |  15%
  |                                                                       
  |==========                                                       |  16%
  |                                                                       
  |===========                                                      |  17%
  |                                                                       
  |============                                                     |  18%
  |                                                                       
  |============                                                     |  19%
  |                                                                       
  |=============                                                    |  19%
  |                                                                       
  |=============                                                    |  20%
  |                                                                       
  |==============                                                   |  21%
  |                                                                       
  |==============                                                   |  22%
  |                                                                       
  |===============                                                  |  23%
  |                                                                       
  |================                                                 |  24%
  |                                                                       
  |================                                                 |  25%
  |                                                                       
  |=================                                                |  26%
  |                                                                       
  |=================                                                |  27%
  |                                                                       
  |==================                                               |  27%
  |                                                                       
  |==================                                               |  28%
  |                                                                       
  |===================                                              |  29%
  |                                                                       
  |====================                                             |  30%
  |                                                                       
  |====================                                             |  31%
  |                                                                       
  |=====================                                            |  32%
  |                                                                       
  |=====================                                            |  33%
  |                                                                       
  |======================                                           |  33%
  |                                                                       
  |======================                                           |  34%
  |                                                                       
  |=======================                                          |  35%
  |                                                                       
  |=======================                                          |  36%
  |                                                                       
  |========================                                         |  37%
  |                                                                       
  |=========================                                        |  38%
  |                                                                       
  |=========================                                        |  39%
  |                                                                       
  |==========================                                       |  39%
  |                                                                       
  |==========================                                       |  40%
  |                                                                       
  |===========================                                      |  41%
  |                                                                       
  |===========================                                      |  42%
  |                                                                       
  |============================                                     |  43%
  |                                                                       
  |=============================                                    |  44%
  |                                                                       
  |=============================                                    |  45%
  |                                                                       
  |==============================                                   |  45%
  |                                                                       
  |==============================                                   |  46%
  |                                                                       
  |==============================                                   |  47%
  |                                                                       
  |===============================                                  |  47%
  |                                                                       
  |===============================                                  |  48%
  |                                                                       
  |================================                                 |  49%
  |                                                                       
  |=================================                                |  50%
  |                                                                       
  |=================================                                |  51%
  |                                                                       
  |=================================                                |  52%
  |                                                                       
  |==================================                               |  52%
  |                                                                       
  |==================================                               |  53%
  |                                                                       
  |===================================                              |  53%
  |                                                                       
  |===================================                              |  54%
  |                                                                       
  |====================================                             |  55%
  |                                                                       
  |====================================                             |  56%
  |                                                                       
  |=====================================                            |  57%
  |                                                                       
  |======================================                           |  58%
  |                                                                       
  |======================================                           |  59%
  |                                                                       
  |=======================================                          |  60%
  |                                                                       
  |========================================                         |  61%
  |                                                                       
  |========================================                         |  62%
  |                                                                       
  |=========================================                        |  63%
  |                                                                       
  |=========================================                        |  64%
  |                                                                       
  |==========================================                       |  64%
  |                                                                       
  |==========================================                       |  65%
  |                                                                       
  |===========================================                      |  65%
  |                                                                       
  |===========================================                      |  66%
  |                                                                       
  |===========================================                      |  67%
  |                                                                       
  |============================================                     |  68%
  |                                                                       
  |=============================================                    |  69%
  |                                                                       
  |==============================================                   |  70%
  |                                                                       
  |==============================================                   |  71%
  |                                                                       
  |===============================================                  |  72%
  |                                                                       
  |===============================================                  |  73%
  |                                                                       
  |================================================                 |  73%
  |                                                                       
  |================================================                 |  74%
  |                                                                       
  |=================================================                |  75%
  |                                                                       
  |=================================================                |  76%
  |                                                                       
  |==================================================               |  77%
  |                                                                       
  |===================================================              |  79%
  |                                                                       
  |====================================================             |  80%
  |                                                                       
  |=====================================================            |  81%
  |                                                                       
  |======================================================           |  83%
  |                                                                       
  |======================================================           |  84%
  |                                                                       
  |=======================================================          |  84%
  |                                                                       
  |=======================================================          |  85%
  |                                                                       
  |========================================================         |  85%
  |                                                                       
  |========================================================         |  87%
  |                                                                       
  |=========================================================        |  88%
  |                                                                       
  |==========================================================       |  89%
  |                                                                       
  |===========================================================      |  90%
  |                                                                       
  |===========================================================      |  91%
  |                                                                       
  |============================================================     |  92%
  |                                                                       
  |============================================================     |  93%
  |                                                                       
  |=============================================================    |  94%
  |                                                                       
  |==============================================================   |  95%
  |                                                                       
  |==============================================================   |  96%
  |                                                                       
  |===============================================================  |  96%
  |                                                                       
  |===============================================================  |  97%
  |                                                                       
  |===============================================================  |  98%
  |                                                                       
  |================================================================ |  99%
  |                                                                       
  |=================================================================| 100%

co_school_joined <- left_join(colorado_school,
                              colorado_income,
                              by = "GEOID")

# Remove 
rm("colorado_income", 'colorado_school')

Including Plots

You can also embed plots, for example:

profiles <- read_excel("./data/blueprint_designed/SFA_profile_Feb_2019.xls", 
                       col_types = c("text", "text", "text","logical", 
                                     "numeric","numeric", 
                                     "numeric", "logical",
                                     "logical", "logical", 
                                     "logical","logical"))

profiles <- profiles %>% 
  dplyr::rename(CDE_AGREEMENT = "Sponsor #", 
                day4 = "4 Day School Week",
                student_count = "Pupil Count",
                free_perc = "Free %",
                redu_perc = "Reduced-Price %", 
                sbp = "Breakfast", 
                nslp = "Lunch",
                smp = "Milk",
                snack = "Snack",
                sfsp = "SFSP",
                sfa_name = "Sponsor Name") %>% 
  mutate(sfa_name = str_to_lower(sfa_name)) %>% 
  mutate(free_students = free_perc * student_count) %>% # count of free students
  mutate(redu_students = redu_perc * student_count) %>% # count of redu students
  mutate(free_and_red_perc = free_perc + redu_perc) # combined free & redu


# The above actually takes the raw excel into a semi-usable dataframe
profiles <- filter(profiles, Profile == "Public")

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

# Import all the schools with GEOID!
# Must have current directory above data

site_all <- read_excel("./data/CDE_to_GEOID.xlsx", col_types = "text") %>% 
  mutate(GEOID = ACTUAL_GEOID) %>% 
  select(GEOID, CDE_AGREEMENT, everything(), -ACTUAL_GEOID)

site_all <- group_by(site_all,
                     CDE_AGREEMENT)

# Gets all the districts with their site count and GEOID
(districts <- summarise(site_all, 
                        count_schools = n(),
                        GEOID = first(GEOID)))

## # A tibble: 176 x 3
##    CDE_AGREEMENT count_schools GEOID  
##    <chr>                 <int> <chr>  
##  1 0010                     17 0805550
##  2 0020                     47 0806900
##  3 0030                     11 0801950
##  4 0040                     25 0802580
##  5 0050                      3 0802430
##  6 0060                      4 0806750
##  7 0070                     20 0807230
##  8 0100                      3 0802070
##  9 0110                      2 0806420
## 10 0120                      8 0803780
## # ... with 166 more rows

# Combines districts with GEOID number
geoid_districts <- left_join(districts, profiles, by = "CDE_AGREEMENT")

### This WORKS!!
combined_geo_dist <- left_join(co_school_joined, geoid_districts, 
                               by = "GEOID")

#remove
rm("site_all", "districts", "geoid_districts")

list.files("./data/meal_count", pattern = "^SY")

## [1] "SY10-11.xls" "SY11-12.xls" "SY12-13.xls" "SY13-14.xls" "SY14-15.xls"
## [6] "SY15-16.xls" "SY16-17.xls" "SY17-18.xls"

meal_count_directory <-  "./data/meal_count/"
count_files <- list.files("./data/meal_count", pattern = "^SY")

# creates a list of the matching files path
count_files <- paste0(meal_count_directory,count_files)

# Creates list of with col_type = col_name, use names() to use the col_type
meal_count_column_unknown <- c("text" = "sfa_num",
                               "text" = "sfa_name", 
                               "text" = "site_num",
                               "text" ="site_name", 
                               "date"= "claim_date", 
                               "text" ="meal_type",
                               "text" = "earning_type",
                               "text" = "rate_level",
                               "text" = "provision",
                               "numeric" = "count_days_served", 
                               "numeric" = "free_meals_served", 
                               "numeric" = "reduced_meals_served", 
                               "numeric" = "paid_meals_served", 
                               "numeric" = "total_meals_served")

# Imports all the files into a large list of lists
import_meal_count <-lapply(count_files, 
                           read_excel,
                           range = "a2:n50000",
                           col_names = meal_count_column_unknown,
                           col_types = names(meal_count_column_unknown)
)

# Combines all the lists into a single dataframe, removes all the NAs 
# created by the large loaded range from the excels
huge_meal_count <- bind_rows(import_meal_count) %>% 
  filter(!is.na(sfa_num))

# reduces working memory load by removing the imported dataframe
rm("import_meal_count", "meal_count_column_unknown",
   "meal_count_directory", "count_files")

# creates the sfa num format and site number needed for matching 0000
# creates the school year for the claim.  
district_meal_count <- mutate(huge_meal_count, 
                              sfa_num_char = str_pad(sfa_num, width = 4, 
                                                     side = "left", pad = "0"),
                              site_num_char = str_pad(site_num, width = 4, 
                                                      side = "left", pad = "0"),
                              claim_year = year(claim_date),
                              claim_month = month(claim_date),
                              school_year = year(claim_date) + 
                                (month(claim_date) <= 7),
                              meal_type = rename_meal_types(meal_type))


# remove and reorder a number of un-needed variables
district_meal_count <- select(district_meal_count, sfa_num_char, site_num_char, 
                              school_year, meal_type,
                              everything(), -sfa_num, -site_num, 
                              -earning_type, -claim_year)

 district_meal_count %>% 
  group_by(sfa_num_char) %>% 
  ggplot() +
  geom_point(aes(y = total_meals_served, x = claim_date), position = "jitter") +
  facet_wrap(~rate_level, nrow = 2) +
  geom_smooth(aes(y = total_meals_served, x = claim_date))

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

district_meal_count %>% 
  ggplot() +
  geom_point(aes(y = total_meals_served, x = claim_date), position = "jitter")

district_meal_count %>% 
  group_by(sfa_num_char) %>% 
  ggplot() +
  geom_point(aes(y = total_meals_served, x = claim_date), position = "jitter") +
  facet_wrap(~provision) +
  geom_smooth(aes(y = total_meals_served, x = claim_date))

## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

directory_four_day <- list.dirs("./data/four_day")
file_four_day <- list.files("./data/four_day")
path_four_day <- paste0(directory_four_day,"/", file_four_day)

column_names_four_day <- c("text" = "school_year",
                           "text" = "sfa_num",
                           "text" = "sfa_name", 
                           "text" = "site_num",
                           "text" ="site_name",
                           "text" ="meal_type",
                           "numeric" = "pk_12_enrollment",
                           "text" = "free_perc",
                           "text" = "redu_perc",
                           "text" = "free_and_redu_perc",
                           "text" = "four_day",
                           "text" = "county",
                           "text" = "urban_rural")

# imports the file with appropriate types and names
imported_four_day <- read_excel(path_four_day, 
                                col_names = column_names_four_day,
                                col_types = names(column_names_four_day),
                                skip = 1)

# creates a working file to prevent need to reload. 
# Selects only the variables needed for this analysis
working_four_day <- imported_four_day %>% 
  select(school_year, 
         sfa_num, 
         site_num, 
         four_day, 
         urban_rural)

mutate_four_day <- mutate(working_four_day, week_length = case_when(
  four_day == "T-F" ~4, 
  four_day == "M-Th" ~4, 
  four_day == "N" ~5),
  school_year = SY_YY_XX(school_year))


# Summarize by week length
sum_mutate_four_day <- group_by(mutate_four_day, school_year, sfa_num, urban_rural) %>% 
  summarize(week_length = mean(week_length))

# create the score 4-day score and removes extra variables
score_four_day <- mutate(sum_mutate_four_day, score_4day = case_when(
  week_length == 5 ~1,
  week_length == 4 ~0)) %>% 
  select(everything(),-week_length)
# need to convert following uses of score_four_day to final_four_day
final_four_day <- score_four_day 


# Extra elements to remove
# mutate_four_day
# score_four_day
# sum_mutate_four_day
# working_four_day
# directory_four_day
# file_four_day
# path_four_day

# Elements to Keep
# imported_four_day
# final_four_day

# Take the monthly claims into a yearly claim
by_month <- group_by(district_meal_count, school_year,
                     claim_month, sfa_num_char, site_num_char, meal_type ) %>%
  summarize(total_meals = sum(total_meals_served),
            total_days = sum(count_days_served),
            adp_average = total_meals/total_days)

# makes a df with district/year/mealtype
by_district <- group_by(district_meal_count,
                        school_year, 
                        sfa_num_char,
                        meal_type,
                        provision) %>% 
  summarize(total_meals = sum(total_meals_served),
            total_days = sum(count_days_served),
            adp_average = total_meals/total_days,
            free_meals = sum(free_meals_served),
            reduced_meals = sum(reduced_meals_served),
            paid_meals = sum(paid_meals_served))

by_district_17 <- filter(by_district, school_year == 2017)

# Makes a df with sites/year/meal
by_site <- group_by(district_meal_count,
                    school_year,
                    sfa_num_char,
                    site_num_char,
                    meal_type) %>%
  summarize(total_meals = sum(total_meals_served),
            total_days = sum(count_days_served),
            adp_average = total_meals/total_days)

Monthly ADP over time

This Graph shows the ADP of all districts by month between 2010-2018

ggplot(data = by_month) +
  geom_point(mapping=aes(x = claim_month,
                         y = adp_average), position = "jitter") +
  labs(
    title = "District ADP average by month 2010-2019",
    x = "Claim Month",
    y = "Average ADP"
  )

rm("by_month")

raw_student_count <- select(imported_four_day,
                            school_year:site_name, 
                            pk_12_enrollment:redu_perc) %>% 
  filter( !is.na(pk_12_enrollment)) %>% 
  mutate(school_year_num = SY_YY_XX(.$school_year),
         free_perc = parse_number(free_perc)/100,
         redu_perc = parse_number(redu_perc)/100)
  
# I need to take the different district sizes, compare them to the list above




working_student_count <- group_by(raw_student_count, 
                                  sfa_num, school_year_num) %>% 
  summarize(pk_12_enrollment_total = sum(pk_12_enrollment),
            free_perc = mean(free_perc),
            redu_perc = mean(redu_perc),
            district_size_score = Score_District_Size(pk_12_enrollment_total))


ggplot(data = working_student_count) +
  geom_histogram(mapping=aes(x = district_size_score))

## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

final_student_count <- working_student_count
#Can now remove the imported_four_day
rm("imported_four_day", "raw_student_count")

# SFSP

sfsp_path <- "./data/sfsp/" # place the file storage here
(sfsp_file <- list.files(sfsp_path))

## [1] "SFSP_2017_2018_Comparison.xlsx"

sfsp_file_path <- paste0(sfsp_path,sfsp_file)

# Names and types for each column in the excel file
column_names_sfsp <- c("text" = "sfa_num",
                                "text" = "sfa_name",
                                "text" = "site_num",
                                "text" ="site_name",
                                "numeric" = "meal_2017_sfsp",
                                "numeric" = "meal_2018_sfsp",
                                "numeric" = "change_meal_count_sfsp",
                                "numeric" = "change_meal_percent_sfsp",
                                "numeric" = "adp_2017_sfsp",
                                "numeric" = "adp_2018_sfsp",
                                "numeric" = "change_adp_count_sfsp",
                                "numeric" = "change_adp_percent_sfsp")

# Imports raw excel using above column names and types skipping the header line
raw_sfsp <- read_excel(sfsp_file_path, 
           col_names = column_names_sfsp,
           col_types = names(column_names_sfsp),
           skip = 1)  

# Creates padded sfa_num & site_num to compare with other data sources
imported_sfsp <- mutate(raw_sfsp,
                        sfa_num_char = Pad_to_character(sfa_num),
                        site_num_char = Pad_to_character(site_num)) %>% 
  select(sfa_num_char, site_num_char, everything())



sfsp_1718 <- group_by(imported_sfsp, 
                      sfa_num_char) %>% 
  summarize(site_count_sfsp = n(),
            meal_count_sfsp_2017 = sum(meal_2017_sfsp),
            meal_count_sfsp_2018 = sum(meal_2018_sfsp),
            adp_sfsp_2017 = sum(adp_2017_sfsp),
            adp_sfsp_2018 = sum(adp_2018_sfsp))


combined_sfsp_geodist <- left_join(combined_geo_dist,sfsp_1718, 
                                   by = c("CDE_AGREEMENT" = "sfa_num_char"))            


# Removes all the temporary variables that we wont be using
rm("raw_sfsp", "column_names_sfsp", "sfsp_file", "sfsp_file_path", "sfsp_path", "sfsp_1718")

Untitled

Cameron

4/18/2019

R Markdown

Including Plots

Monthly ADP over time