Loading Data
Data was downloaded from: https://rptsvr1.tea.texas.gov/adhocrpt/adstc.html
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.2 ✔ tibble 3.2.1
✔ lubridate 1.9.4 ✔ tidyr 1.3.1
✔ purrr 1.0.4
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(tidyr)
econ_dis_23_24 <- read.csv('Economically Disadvantaged Report_Statewide_Counties_2023-2024.csv')
locations <- read.csv("../../location_countyNames copy.csv") |>
select(Location, LocationId)
County.Name Eligible.For.Free.Meals.Count
1 ANDERSON COUNTY 4349
2 ANDREWS COUNTY 1886
3 ANGELINA COUNTY 9006
4 ARANSAS COUNTY 332
5 ARCHER COUNTY 571
6 ARMSTRONG COUNTY -999
Eligible.For.Free.Meals.Percent Eligible.For.Reduced.Price.Meals.Count
1 55.34 366
2 44.18 214
3 56.75 640
4 11.19 0
5 26.57 138
6 -999.00 -999
Eligible.For.Reduced.Price.Meals.Percent
1 4.66
2 5.01
3 4.03
4 0.00
5 6.42
6 -999.00
Other.Economically.Disadvantaged.Count
1 724
2 12
3 1765
4 1476
5 0
6 0
Other.Economically.Disadvantaged.Percent Not.Economically.Disadvantaged.Count
1 9.21 2419
2 0.28 2157
3 11.12 4460
4 49.73 1160
5 0.00 1440
6 0.00 236
Not.Economically.Disadvantaged.Percent Total.Count
1 30.78 7858
2 50.53 4269
3 28.10 15871
4 39.08 2968
5 67.01 2149
6 73.98 319
normalize_county_name <- function(name) {
name <- str_to_title(name)
if (name == "Dewitt") return("DeWitt")
if (name == "Mcculloch") return("McCulloch")
if (name == "Mclennan") return("McLennan")
if (name == "Mcmullen") return("McMullen")
return(name)
}
make_data_row <- function(county_name, time_frame) {
missing_row <- data.frame(
Location = county_name,
Nums = NA,
Pct = NA,
TimeFrame = time_frame
)
return(missing_row)
}
Cleaning data
Noticed that there are values of -999.00 in data and per previous documentation, advised by TEA to make it 2.5, so will be part of cleaning process.
clean_start <- econ_dis_23_24 |>
mutate(across(-County.Name, ~ ifelse(.x == -999.00, 2.5, .x))) |>
select(County.Name, Not.Economically.Disadvantaged.Count, Not.Economically.Disadvantaged.Percent, Total.Count)
cleaned_data <- clean_start |>
mutate(County.Name = gsub(" County", "", str_to_title(County.Name))) |>
mutate(County.Name = gsub(" ", "", County.Name)) |>
mutate(County.Name = sapply(County.Name, normalize_county_name)) |>
rename(Location = County.Name) |>
mutate(TimeFrame = "2023-2024") |>
group_by(Location) |>
summarise(Total_Not_ED = sum(Not.Economically.Disadvantaged.Count),
Pct_Not_ED = sum(Not.Economically.Disadvantaged.Percent),
Total_Num = sum(Total.Count))
#state
state_total <- sum(cleaned_data$Total_Num)
state_not_ed <- sum(cleaned_data$Total_Not_ED)
state_pct <- 1 - (state_not_ed/state_total)
state_num <- state_total-state_not_ed
texas <- data.frame(
Location = "Texas",
Nums = state_num,
Pct = state_pct,
TimeFrame = "2023-2024"
)
loving <- make_data_row("Loving", "2023-2024")
joined <- cleaned_data |>
mutate(Nums = Total_Num - Total_Not_ED,
Pct = 100 - Pct_Not_ED) |>
select(Location, Nums, Pct) |>
mutate(TimeFrame = "2023-2024")|>
rbind(loving) |>
rbind(texas)
nums <- joined |>
select(-Pct) |>
rename(Data = Nums) |>
mutate(DataFormat = "Number")
pcts <- joined |>
select(-Nums) |>
rename(Data = Pct) |>
mutate(DataFormat = "Percent")
final_data <- rbind(nums, pcts) |>
mutate(LocationType = ifelse(Location == "Texas", "State", "County")) |>
arrange(desc(LocationType), Location, DataFormat) |>
left_join(locations, by = "Location")
write.csv(file = "CLEANED_9.6_Economically_Disadvantaged_2023_2024.csv", final_data, row.names = FALSE)