Load assessment data
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(rpart)
library(readxl)
assessment_path <- 'Historical_AssessmentResults_SY15-to-SY21.xlsx'
t_assess_raw_school <- read_excel(path = assessment_path,
sheet = 'SY21 School & District',
range = 'b2:f7312')
t_assess_raw_science <- read_excel(path = assessment_path,
sheet = 'SY21 School & District',
range = 'db3:db7312',
col_names = c('science_proficiency'),
na = '**')
t_assess_raw <- t_assess_raw_school %>%
bind_cols(t_assess_raw_science) %>%
janitor::clean_names()
# Remove subgroups
t_assess <- t_assess_raw %>%
filter(school == 999) %>%
filter(population_group == 'Total Population') %>%
filter(county != 'Statewide') %>%
mutate(proficiency = science_proficiency)
print(t_assess)
## # A tibble: 55 × 7
## county school school_name population_group subgroup science_proficiency
## <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 Barbour 999 Barbour Count… Total Population Total 26.0
## 2 Berkeley 999 Berkeley Coun… Total Population Total 28.6
## 3 Boone 999 Boone County … Total Population Total 19.6
## 4 Braxton 999 Braxton Count… Total Population Total 22.6
## 5 Brooke 999 Brooke County… Total Population Total 21.1
## 6 Cabell 999 Cabell County… Total Population Total 30.8
## 7 Calhoun 999 Calhoun Count… Total Population Total 27.8
## 8 Clay 999 Clay County T… Total Population Total 23.3
## 9 Doddridge 999 Doddridge Cou… Total Population Total 31.3
## 10 Fayette 999 Fayette Count… Total Population Total 17.4
## # ℹ 45 more rows
## # ℹ 1 more variable: proficiency <dbl>
Load spending data
spending_path <- 'elsec22t.xls'
t_spending_raw <- read_excel(path = spending_path,,
sheet = 'elsec22t',
range = 'a1:gb14106') %>%
janitor::clean_names()
## New names:
## • `` -> `...73`
## • `` -> `...74`
## • `` -> `...75`
## • `` -> `...76`
## • `` -> `...77`
## • `` -> `...78`
## • `` -> `...79`
## • `` -> `...80`
## • `` -> `...81`
## • `` -> `...82`
## • `` -> `...83`
## • `` -> `...84`
## • `` -> `...85`
## • `` -> `...86`
## • `` -> `...87`
## • `` -> `...88`
## • `` -> `...89`
## • `` -> `...90`
## • `` -> `...91`
## • `` -> `...92`
## • `` -> `...93`
## • `` -> `...94`
## • `` -> `...95`
## • `` -> `...96`
## • `` -> `...97`
## • `` -> `...98`
## • `` -> `...99`
## • `` -> `...100`
## • `` -> `...101`
## • `` -> `...102`
## • `` -> `...103`
## • `` -> `...104`
## • `` -> `...105`
## • `` -> `...106`
## • `` -> `...107`
## • `` -> `...108`
## • `` -> `...109`
## • `` -> `...110`
## • `` -> `...111`
## • `` -> `...112`
## • `` -> `...113`
## • `` -> `...114`
## • `` -> `...115`
## • `` -> `...116`
## • `` -> `...117`
## • `` -> `...118`
## • `` -> `...119`
## • `` -> `...120`
## • `` -> `...121`
## • `` -> `...122`
## • `` -> `...123`
## • `` -> `...124`
## • `` -> `...125`
## • `` -> `...126`
## • `` -> `...127`
## • `` -> `...128`
## • `` -> `...129`
## • `` -> `...130`
## • `` -> `...131`
## • `` -> `...132`
## • `` -> `...133`
## • `` -> `...134`
## • `` -> `...135`
## • `` -> `...136`
## • `` -> `...137`
## • `` -> `...138`
## • `` -> `...139`
## • `` -> `...140`
## • `` -> `...141`
## • `` -> `...142`
## • `` -> `...143`
## • `` -> `...144`
## • `` -> `...145`
## • `` -> `...146`
## • `` -> `...147`
## • `` -> `...148`
## • `` -> `...149`
## • `` -> `...150`
## • `` -> `...151`
## • `` -> `...152`
## • `` -> `...153`
## • `` -> `...154`
## • `` -> `...155`
## • `` -> `...156`
## • `` -> `...157`
## • `` -> `...158`
## • `` -> `...159`
## • `` -> `...160`
## • `` -> `...161`
## • `` -> `...162`
## • `` -> `...163`
## • `` -> `...164`
## • `` -> `...165`
## • `` -> `...166`
## • `` -> `...167`
## • `` -> `...168`
## • `` -> `...169`
## • `` -> `...170`
## • `` -> `...171`
## • `` -> `...172`
## • `` -> `...173`
## • `` -> `...174`
## • `` -> `...175`
## • `` -> `...176`
## • `` -> `...177`
## • `` -> `...178`
## • `` -> `...179`
## • `` -> `...180`
## • `` -> `...181`
## • `` -> `...182`
## • `` -> `...183`
## • `` -> `...184`
cooperates <- c('MOUNTAIN STATE EDUCATIONAL SERVICES COOPERATIVE',
'EASTERN PANHANDLE INSTRUCTIONAL COOPERATIVE',
'SOUTHERN EDUCATIONAL SERVICES COOPERATIVE')
t_spending <- t_spending_raw %>%
filter(state == 49) %>%
filter(!name %in% cooperates) %>%
select(name, enroll, tfedrev, tstrev, tlocrev, totalexp, ppcstot) %>%
mutate(county = str_to_title(str_split_i(name, ' ',1)),
county = ifelse(county == 'Mc', 'McDowell', county))
print(t_spending)
## # A tibble: 55 × 8
## name enroll tfedrev tstrev tlocrev totalexp ppcstot county
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 BARBOUR CO SCH DIST 2144 7559 16584 5872 28021 11885 Barbour
## 2 BERKELEY CO SCH DIST 19722 48407 140127 86699 264253 12704 Berkeley
## 3 BOONE CO SCH DIST 3177 8194 26858 14564 48642 14663 Boone
## 4 BRAXTON CO SCH DIST 1747 5479 12748 6404 24417 13153 Braxton
## 5 BROOKE CO SCH DIST 2582 6791 17114 21352 41908 15642 Brooke
## 6 CABELL CO SCH DIST 11667 42518 88337 66699 183621 14538 Cabell
## 7 CALHOUN CO SCH DIST 861 3254 9953 3190 15154 16085 Calhoun
## 8 CLAY CO SCH DIST 1669 6157 17655 2791 25963 13825 Clay
## 9 DODDRIDGE CO SCH DIST 1082 3455 3999 31752 38493 23563 Doddrid…
## 10 FAYETTE CO SCH DIST 5594 15293 51759 23477 83373 13777 Fayette
## # ℹ 45 more rows
Load demographic data
t_demographics_unemployed <- read_csv('unemployed.csv',
skip = 4,
na = 'N/A') %>%
janitor::clean_names() %>%
filter(county != 'West Virginia',
county != 'United States',
!is.na(value_percent) ) %>%
select(county, value_percent) %>%
rename(unemployed = value_percent) %>%
mutate(county = str_remove(county, "\\s*County$"))
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 62 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): County, FIPS, Rank within US (of 3143 counties)
## dbl (2): Value (Percent), People (Unemployed)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
t_demographics <- t_demographics_unemployed
print(t_demographics)
## # A tibble: 55 × 2
## county unemployed
## <chr> <dbl>
## 1 McDowell 15.1
## 2 Braxton 14.4
## 3 Logan 13.3
## 4 Calhoun 12.2
## 5 Roane 11.7
## 6 Clay 11.2
## 7 Mingo 11.2
## 8 Webster 11.1
## 9 Monroe 10.6
## 10 Barbour 10.1
## # ℹ 45 more rows
Joined data
# Merge data
t2 <- t_assess %>%
full_join(t_spending, by = 'county')
t <- t2 %>%
full_join(t_demographics, by = 'county') %>%
mutate(totalrev = tfedrev + tstrev +tlocrev,
profit = totalrev - totalexp,
state = 'West Virginia')
Correlations
library(ggcorrplot)
tcorr <- t %>%
select(where(is.numeric))
pairs(tcorr)

ggcorrplot(cor(tcorr),
lab = T)

Linear Regression Model
m3 <- lm(science_proficiency ~ ppcstot + I(ppcstot^2) + unemployed + enroll, data = t)
summary(m3)
##
## Call:
## lm(formula = science_proficiency ~ ppcstot + I(ppcstot^2) + unemployed +
## enroll, data = t)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.8181 -4.1178 0.0407 3.3716 13.3661
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.661e+01 2.761e+01 0.602 0.5501
## ppcstot 7.928e-04 3.320e-03 0.239 0.8122
## I(ppcstot^2) -9.082e-09 9.784e-08 -0.093 0.9264
## unemployed -4.013e-01 2.730e-01 -1.470 0.1478
## enroll 4.353e-04 1.837e-04 2.369 0.0217 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.413 on 50 degrees of freedom
## Multiple R-squared: 0.1982, Adjusted R-squared: 0.1341
## F-statistic: 3.091 on 4 and 50 DF, p-value: 0.02378
library(usmap)
data("countypop", package = "usmap")
wv_fips <- countypop %>%
filter(abbr == "WV") %>%
select(county_name = county, fips) %>%
mutate(county = str_remove(county_name, "\\s*County$"))
t <- t %>%
mutate(county = str_to_title(county)) %>%
left_join(wv_fips, by = c("county" = "county"))
plot_usmap(data = t,
values = "proficiency",
include = 'West Virginia',
regions = "counties") +
scale_fill_continuous(name = "proficiency",
low = 'red',
high = 'blue') +
theme(legend.position = "right") +
labs(title = 'Proficiency')
