library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(readxl)

district_data <- read_excel("district.xls")
clean_data <- district_data |> select(DISTNAME, DDA00A001222R, DPFEAINSP, DPFPAREGP, DPETECOP, DPSTEXPA)

clean_data <- district_data |>select(district_name = DISTNAME,staar_meets = DDA00A001222R, exp_instruction = DPFEAINSP, exp_stuservices = DPFPAREGP, econ_disadv = DPETECOP, teacher_exp = DPSTEXPA) |>
mutate(across(where(is.character), readr::parse_number)) |>
drop_na(staar_meets, exp_instruction, exp_stuservices, econ_disadv, teacher_exp)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(where(is.character), readr::parse_number)`.
## Caused by warning:
## ! 1206 parsing failures.
## row col expected        actual
##   1  -- a number CAYUGA ISD   
##   2  -- a number ELKHART ISD  
##   3  -- a number FRANKSTON ISD
##   4  -- a number NECHES ISD   
##   5  -- a number PALESTINE ISD
## ... ... ........ .............
## See problems(...) for more details.
pastecs::stat.desc(clean_data$econ_disadv)
##      nbr.val     nbr.null       nbr.na          min          max        range 
## 1.198000e+03 3.000000e+00 0.000000e+00 0.000000e+00 1.000000e+02 1.000000e+02 
##          sum       median         mean      SE.mean CI.mean.0.95          var 
## 7.285790e+04 6.185000e+01 6.081628e+01 6.212789e-01 1.218917e+00 4.624129e+02 
##      std.dev     coef.var 
## 2.150379e+01 3.535861e-01
hist(clean_data$econ_disadv, main = "Distribution of Economically Disadvantaged Students", xlab = "Economically Disadvantaged (%)", col = "blue", border = "white")

clean_data <- clean_data |> mutate(log_econ_disadv = log(econ_disadv + 1))
hist(clean_data$log_econ_disadv, main = "Log-Transformed Distribution of Economic Disadvantage", xlab = "Log(Economically Disadvantaged + 1)", col = "Red", border = "white")