library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(readxl)
district_data <- read_excel("district.xls")
clean_data <- district_data |> select(DISTNAME, DDA00A001222R, DPFEAINSP, DPFPAREGP, DPETECOP, DPSTEXPA)
clean_data <- district_data |>select(district_name = DISTNAME,staar_meets = DDA00A001222R, exp_instruction = DPFEAINSP, exp_stuservices = DPFPAREGP, econ_disadv = DPETECOP, teacher_exp = DPSTEXPA) |>
mutate(across(where(is.character), readr::parse_number)) |>
drop_na(staar_meets, exp_instruction, exp_stuservices, econ_disadv, teacher_exp)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(where(is.character), readr::parse_number)`.
## Caused by warning:
## ! 1206 parsing failures.
## row col expected actual
## 1 -- a number CAYUGA ISD
## 2 -- a number ELKHART ISD
## 3 -- a number FRANKSTON ISD
## 4 -- a number NECHES ISD
## 5 -- a number PALESTINE ISD
## ... ... ........ .............
## See problems(...) for more details.
summary(clean_data)
## district_name staar_meets exp_instruction exp_stuservices
## Min. : NA Min. : 0.00 Min. :18.50 Min. : 2.00
## 1st Qu.: NA 1st Qu.:37.00 1st Qu.:52.02 1st Qu.:35.12
## Median : NA Median :46.00 Median :55.10 Median :39.70
## Mean :NaN Mean :46.37 Mean :54.77 Mean :39.81
## 3rd Qu.: NA 3rd Qu.:55.00 3rd Qu.:57.80 3rd Qu.:43.90
## Max. : NA Max. :88.00 Max. :84.40 Max. :79.10
## NA's :1198
## econ_disadv teacher_exp
## Min. : 0.00 Min. : 0.00
## 1st Qu.: 48.00 1st Qu.:10.10
## Median : 61.85 Median :12.00
## Mean : 60.82 Mean :11.78
## 3rd Qu.: 76.97 3rd Qu.:13.90
## Max. :100.00 Max. :22.90
##
plot(clean_data$exp_instruction, clean_data$staar_meets,main = "Instructional Spending vs STAAR Meets",xlab = "Instructional Expenditure (%)",ylab = "STAAR Meets (%)",pch = 19,col = "red")

plot(clean_data$econ_disadv, clean_data$staar_meets, main = "Economic Disadvantage vs STAAR Meets", xlab = "Economically Disadvantaged (%)", ylab = "STAAR Meets (%)", pch = 19,col = "blue")

cor(select(clean_data, staar_meets, exp_instruction, exp_stuservices, econ_disadv, teacher_exp),use = "complete.obs")
## staar_meets exp_instruction exp_stuservices econ_disadv
## staar_meets 1.0000000 0.2150228 0.35432970 -0.6964191
## exp_instruction 0.2150228 1.0000000 0.48358599 -0.1924036
## exp_stuservices 0.3543297 0.4835860 1.00000000 -0.4761955
## econ_disadv -0.6964191 -0.1924036 -0.47619545 1.0000000
## teacher_exp 0.3333607 0.1297148 -0.02474583 -0.2327761
## teacher_exp
## staar_meets 0.33336067
## exp_instruction 0.12971478
## exp_stuservices -0.02474583
## econ_disadv -0.23277614
## teacher_exp 1.00000000
hist(clean_data$econ_disadv, main = "Distribution of Economic Disadvantage (%)", xlab = "Economically Disadvantaged Students (%)", col = "Red", border = "white")

hist(clean_data$exp_instruction, main = "Distribution of Instructional Expenditure (%)", xlab = "Instructional Expenditure (%)",col = "green", border = "white")

hist(clean_data$staar_meets, main = "Distribution of STAAR Meets (%)", xlab = "STAAR Meets (%)",col = "blue", border = "white")

hist(clean_data$exp_stuservices,main = "Distribution of Student Services Expenditure (%)",xlab = "Student Services Expenditure (%)", col = "pink", border = "white")

hist(clean_data$teacher_exp,main = "Distribution of Teacher Experience (Years)", xlab = "Average Years of Experience", col = "gray", border = "white")
