library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(rpart)
library(readxl)
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(stargazer)
## 
## Please cite as: 
## 
##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
#loading testing (joinable) data and filtering down to just math scores
historical_raw <- excel_sheets("C:/Users/paulm/OneDrive/Desktop/Acct426_Project2/Historical_AssessmentResults_SY15-to-SY21.xlsx")

current_raw <- read_excel("C:/Users/paulm/OneDrive/Desktop/Acct426_Project2/SY22_AssessmentProficiency_Public_All_Final.xlsx")
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...28`
## • `` -> `...29`
## • `` -> `...30`
## • `` -> `...31`
## • `` -> `...33`
## • `` -> `...34`
## • `` -> `...35`
## • `` -> `...36`
## • `` -> `...38`
## • `` -> `...39`
## • `` -> `...40`
## • `` -> `...41`
## • `` -> `...43`
## • `` -> `...44`
## • `` -> `...45`
## • `` -> `...46`
## • `` -> `...48`
## • `` -> `...49`
## • `` -> `...50`
## • `` -> `...51`
## • `` -> `...53`
## • `` -> `...54`
## • `` -> `...55`
## • `` -> `...56`
## • `` -> `...58`
## • `` -> `...59`
## • `` -> `...60`
## • `` -> `...61`
## • `` -> `...63`
## • `` -> `...64`
## • `` -> `...65`
## • `` -> `...66`
## • `` -> `...68`
## • `` -> `...69`
## • `` -> `...70`
## • `` -> `...71`
## • `` -> `...73`
## • `` -> `...74`
## • `` -> `...75`
## • `` -> `...76`
## • `` -> `...78`
## • `` -> `...79`
## • `` -> `...80`
## • `` -> `...81`
## • `` -> `...83`
## • `` -> `...84`
## • `` -> `...85`
## • `` -> `...86`
## • `` -> `...88`
## • `` -> `...89`
## • `` -> `...90`
## • `` -> `...91`
## • `` -> `...93`
## • `` -> `...94`
## • `` -> `...95`
## • `` -> `...96`
## • `` -> `...98`
## • `` -> `...99`
## • `` -> `...100`
## • `` -> `...101`
## • `` -> `...103`
## • `` -> `...104`
## • `` -> `...105`
## • `` -> `...106`
current_math <- current_raw %>% 
  select(1:41)

historical_test_raw_2021 <- read_excel("C:/Users/paulm/OneDrive/Desktop/Acct426_Project2/Historical_AssessmentResults_SY15-to-SY21.xlsx",
                                  sheet=1)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...28`
## • `` -> `...29`
## • `` -> `...30`
## • `` -> `...31`
## • `` -> `...33`
## • `` -> `...34`
## • `` -> `...35`
## • `` -> `...36`
## • `` -> `...38`
## • `` -> `...39`
## • `` -> `...40`
## • `` -> `...41`
## • `` -> `...43`
## • `` -> `...44`
## • `` -> `...45`
## • `` -> `...46`
## • `` -> `...48`
## • `` -> `...49`
## • `` -> `...50`
## • `` -> `...51`
## • `` -> `...53`
## • `` -> `...54`
## • `` -> `...55`
## • `` -> `...56`
## • `` -> `...58`
## • `` -> `...59`
## • `` -> `...60`
## • `` -> `...61`
## • `` -> `...63`
## • `` -> `...64`
## • `` -> `...65`
## • `` -> `...66`
## • `` -> `...68`
## • `` -> `...69`
## • `` -> `...70`
## • `` -> `...71`
## • `` -> `...73`
## • `` -> `...74`
## • `` -> `...75`
## • `` -> `...76`
## • `` -> `...78`
## • `` -> `...79`
## • `` -> `...80`
## • `` -> `...81`
## • `` -> `...83`
## • `` -> `...84`
## • `` -> `...85`
## • `` -> `...86`
## • `` -> `...88`
## • `` -> `...89`
## • `` -> `...90`
## • `` -> `...91`
## • `` -> `...93`
## • `` -> `...94`
## • `` -> `...95`
## • `` -> `...96`
## • `` -> `...98`
## • `` -> `...99`
## • `` -> `...100`
## • `` -> `...101`
## • `` -> `...103`
## • `` -> `...104`
## • `` -> `...105`
## • `` -> `...106`
test_2021_math <- historical_test_raw_2021 %>% 
  select(1:41)
historical_test_raw_2019 <- read_excel("C:/Users/paulm/OneDrive/Desktop/Acct426_Project2/Historical_AssessmentResults_SY15-to-SY21.xlsx",
                                       sheet=2)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...28`
## • `` -> `...29`
## • `` -> `...30`
## • `` -> `...31`
## • `` -> `...33`
## • `` -> `...34`
## • `` -> `...35`
## • `` -> `...36`
## • `` -> `...38`
## • `` -> `...39`
## • `` -> `...40`
## • `` -> `...41`
## • `` -> `...43`
## • `` -> `...44`
## • `` -> `...45`
## • `` -> `...46`
## • `` -> `...48`
## • `` -> `...49`
## • `` -> `...50`
## • `` -> `...51`
## • `` -> `...53`
## • `` -> `...54`
## • `` -> `...55`
## • `` -> `...56`
## • `` -> `...58`
## • `` -> `...59`
## • `` -> `...60`
## • `` -> `...61`
## • `` -> `...63`
## • `` -> `...64`
## • `` -> `...65`
## • `` -> `...66`
## • `` -> `...68`
## • `` -> `...69`
## • `` -> `...70`
## • `` -> `...71`
## • `` -> `...73`
## • `` -> `...74`
## • `` -> `...75`
## • `` -> `...76`
## • `` -> `...78`
## • `` -> `...79`
## • `` -> `...80`
## • `` -> `...81`
## • `` -> `...83`
## • `` -> `...84`
## • `` -> `...85`
## • `` -> `...86`
## • `` -> `...88`
## • `` -> `...89`
## • `` -> `...90`
## • `` -> `...91`
## • `` -> `...93`
## • `` -> `...94`
## • `` -> `...95`
## • `` -> `...96`
## • `` -> `...98`
## • `` -> `...99`
## • `` -> `...100`
## • `` -> `...101`
## • `` -> `...103`
## • `` -> `...104`
## • `` -> `...105`
## • `` -> `...106`
test_2019_math <- historical_test_raw_2019 %>% 
  select(1:41)

historical_test_raw_2018 <- read_excel("C:/Users/paulm/OneDrive/Desktop/Acct426_Project2/Historical_AssessmentResults_SY15-to-SY21.xlsx",
                                       sheet=3)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...28`
## • `` -> `...29`
## • `` -> `...30`
## • `` -> `...31`
## • `` -> `...33`
## • `` -> `...34`
## • `` -> `...35`
## • `` -> `...36`
## • `` -> `...38`
## • `` -> `...39`
## • `` -> `...40`
## • `` -> `...41`
## • `` -> `...43`
## • `` -> `...44`
## • `` -> `...45`
## • `` -> `...46`
## • `` -> `...48`
## • `` -> `...49`
## • `` -> `...50`
## • `` -> `...51`
## • `` -> `...53`
## • `` -> `...54`
## • `` -> `...55`
## • `` -> `...56`
## • `` -> `...58`
## • `` -> `...59`
## • `` -> `...60`
## • `` -> `...61`
## • `` -> `...63`
## • `` -> `...64`
## • `` -> `...65`
## • `` -> `...66`
## • `` -> `...68`
## • `` -> `...69`
## • `` -> `...70`
## • `` -> `...71`
## • `` -> `...73`
## • `` -> `...74`
## • `` -> `...75`
## • `` -> `...76`
## • `` -> `...78`
## • `` -> `...79`
## • `` -> `...80`
## • `` -> `...81`
## • `` -> `...83`
## • `` -> `...84`
## • `` -> `...85`
## • `` -> `...86`
test_2018_math <- historical_test_raw_2018 %>% 
  select(1:41)

historical_test_raw_2017 <- read_excel("C:/Users/paulm/OneDrive/Desktop/Acct426_Project2/Historical_AssessmentResults_SY15-to-SY21.xlsx",
                                       sheet=4)
## New names:
## • `` -> `...1`
## • `` -> `...2`
## • `` -> `...3`
## • `` -> `...4`
## • `` -> `...5`
## • `` -> `...6`
## • `` -> `...8`
## • `` -> `...9`
## • `` -> `...10`
## • `` -> `...11`
## • `` -> `...13`
## • `` -> `...14`
## • `` -> `...15`
## • `` -> `...16`
## • `` -> `...18`
## • `` -> `...19`
## • `` -> `...20`
## • `` -> `...21`
## • `` -> `...23`
## • `` -> `...24`
## • `` -> `...25`
## • `` -> `...26`
## • `` -> `...28`
## • `` -> `...29`
## • `` -> `...30`
## • `` -> `...31`
## • `` -> `...33`
## • `` -> `...34`
## • `` -> `...35`
## • `` -> `...36`
## • `` -> `...38`
## • `` -> `...39`
## • `` -> `...40`
## • `` -> `...41`
## • `` -> `...43`
## • `` -> `...44`
## • `` -> `...45`
## • `` -> `...46`
## • `` -> `...48`
## • `` -> `...49`
## • `` -> `...50`
## • `` -> `...51`
## • `` -> `...53`
## • `` -> `...54`
## • `` -> `...55`
## • `` -> `...56`
## • `` -> `...58`
## • `` -> `...59`
## • `` -> `...60`
## • `` -> `...61`
## • `` -> `...63`
## • `` -> `...64`
## • `` -> `...65`
## • `` -> `...66`
## • `` -> `...68`
## • `` -> `...69`
## • `` -> `...70`
## • `` -> `...71`
## • `` -> `...73`
## • `` -> `...74`
## • `` -> `...75`
## • `` -> `...76`
## • `` -> `...78`
## • `` -> `...79`
## • `` -> `...80`
## • `` -> `...81`
## • `` -> `...83`
## • `` -> `...84`
## • `` -> `...85`
## • `` -> `...86`
test_2017_math <- historical_test_raw_2017 %>%
  select(1:41)


#fixing column names
test_2021_math <- test_2021_math %>% 
  row_to_names(row_number = 1)
## Warning: Row 1 does not provide unique names. Consider running clean_names()
## after row_to_names().
test_2021_math <- test_2021_math %>% 
  clean_names()

test_2019_math <- test_2019_math %>% 
  row_to_names(row_number=1)
## Warning: Row 1 does not provide unique names. Consider running clean_names()
## after row_to_names().
test_2019_math <- test_2019_math %>% 
  clean_names()

test_2018_math <- test_2018_math %>% 
  row_to_names(row_number=1)
## Warning: Row 1 does not provide unique names. Consider running clean_names()
## after row_to_names().
test_2018_math <- test_2018_math %>% 
  clean_names()

test_2017_math <- test_2017_math %>% 
  row_to_names(row_number=1)
## Warning: Row 1 does not provide unique names. Consider running clean_names()
## after row_to_names().
test_2017_math <- test_2017_math %>% 
  clean_names()

current_math <- current_math %>% 
  row_to_names(row_number = 1)
## Warning: Row 1 does not provide unique names. Consider running clean_names()
## after row_to_names().
current_math <- current_math %>% 
  clean_names()

#filter county name (Kanawha) in each of the tables
current_math <- current_math %>% 
  filter(district=="Kanawha")

test_2021_math <- test_2021_math %>% 
  filter(county=="Kanawha")

test_2019_math <- test_2019_math %>% 
  filter(county=="Kanawha")

test_2018_math <- test_2018_math %>% 
  filter(county=="Kanawha")

test_2017_math <- test_2017_math %>% 
  filter(county=="Kanawha")

# Changing N/A's  and ** to zero
current_math <- current_math %>%
  mutate(across(where(is.character), ~ replace_na(., "0")),
         across(where(is.numeric),   ~ replace_na(., 0)))

current_math <- current_math %>%
  mutate(across(everything(), ~ ifelse(. == "**", 0, .)))

test_2021_math <- test_2021_math %>%
  mutate(across(where(is.character), ~ replace_na(., "0")),
         across(where(is.numeric),   ~ replace_na(., 0)))

test_2021_math <- test_2021_math %>%
  mutate(across(everything(), ~ ifelse(. == "**", 0, .)))

test_2019_math <- test_2019_math %>%
  mutate(across(where(is.character), ~ replace_na(., "0")),
         across(where(is.numeric),   ~ replace_na(., 0)))

test_2019_math <- test_2019_math %>%
  mutate(across(everything(), ~ ifelse(. == "**", 0, .)))

test_2018_math <- test_2018_math %>%
  mutate(across(where(is.character), ~ replace_na(., "0")),
         across(where(is.numeric),   ~ replace_na(., 0)))

test_2018_math <- test_2018_math %>%
  mutate(across(everything(), ~ ifelse(. == "**", 0, .)))

test_2017_math <- test_2017_math %>%
  mutate(across(where(is.character), ~ replace_na(., "0")),
         across(where(is.numeric),   ~ replace_na(., 0)))

test_2017_math <- test_2017_math %>%
  mutate(across(everything(), ~ ifelse(. == "**", 0, .)))
#loading spending data pulled from checkbook

edu_spending_raw <- read_csv("C:/Users/paulm/OneDrive/Desktop/Acct426_Project2/West Virginia Department of Education Data Snapshot.csv")
## New names:
## Rows: 61 Columns: 8
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): ...1 num (7): 2016-17 Actual, 2017-18 Actual, 2018-19 Actual, 2019-20
## Actual, 202...
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
#dropping N/A values
edu_spending_clean <- edu_spending_raw %>% 
  drop_na()

# Drop the second column due to it falling outside the scope of the analysis
edu_spending_clean <- edu_spending_clean %>% 
  select(-2)