library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)

grad_rates<-read.csv("grad_rates.csv")
student_spending<-read.csv("student_spending.csv")
colnames(grad_rates)
##  [1] "School.Year"      "State"            "NCES.LEA.ID"      "LEA"             
##  [5] "School"           "NCES.SCH.ID"      "Data.Group"       "Data.Description"
##  [9] "Value"            "Numerator"        "Denominator"      "Population"      
## [13] "Subgroup"         "Characteristics"  "Age.Grade"        "Academic.Subject"
## [17] "Outcome"          "Program.Type"
pastecs::stat.desc(grad_rates$Value)
##  nbr.val nbr.null   nbr.na      min      max    range      sum   median 
##       NA       NA       NA       NA       NA       NA       NA       NA 
##     mean  SE.mean  CI.mean      var  std.dev coef.var 
##       NA       NA       NA       NA       NA       NA
str(grad_rates$Value)
##  chr [1:701] "86.60%" "73.90%" "93.70%" "N/A" "N/A" "81.00%" "82.80%" "N/A" ...
str(student_spending)
## 'data.frame':    91 obs. of  28 variables:
##  $ Table.4..Student.membership.and.current.expenditures.per.pupil.for.public.elementary.and.secondary.education..by.function..subfunction..and.state.or.jurisdiction..FY.2021: chr  "State or jurisdiction" "" "" "United States6" ...
##  $ X                                                                                                                                                                         : chr  "School year 2020\x9621 \nstudent membership1" "" "" "49,211,213" ...
##  $ X.1                                                                                                                                                                       : logi  NA NA NA NA NA NA ...
##  $ X.2                                                                                                                                                                       : chr  "Current expenditures per pupil2" "" "Total" "$14,295" ...
##  $ X.3                                                                                                                                                                       : chr  "" "" "" "7, 8" ...
##  $ X.4                                                                                                                                                                       : chr  "" "" "Instruction" "$8,680" ...
##  $ X.5                                                                                                                                                                       : chr  "" "" "" "7, 8" ...
##  $ X.6                                                                                                                                                                       : chr  "" "Support services3" "Total support services" "$5,161" ...
##  $ X.7                                                                                                                                                                       : chr  "" "" "" "7, 8" ...
##  $ X.8                                                                                                                                                                       : chr  "" "" "Student support services5" "$927" ...
##  $ X.9                                                                                                                                                                       : chr  "" "" "" "7, 8" ...
##  $ X.10                                                                                                                                                                      : chr  "" "" "Instruc-\ntional staff\nsupport" "$727" ...
##  $ X.11                                                                                                                                                                      : chr  "" "" "" "7, 8" ...
##  $ X.12                                                                                                                                                                      : chr  "" "" "General \nadminis-\ntration" "$279" ...
##  $ X.13                                                                                                                                                                      : chr  "" "" "" "7, 8" ...
##  $ X.14                                                                                                                                                                      : chr  "" "" "School \nadminis-\ntration" "$818" ...
##  $ X.15                                                                                                                                                                      : chr  "" "" "" "7, 8" ...
##  $ X.16                                                                                                                                                                      : chr  "" "" "Operations \nand \nmaintenance" "$1,331" ...
##  $ X.17                                                                                                                                                                      : chr  "" "" "" "7, 8" ...
##  $ X.18                                                                                                                                                                      : chr  "" "" "Student \ntrans-\nportation" "$501" ...
##  $ X.19                                                                                                                                                                      : chr  "" "" "" "7, 8" ...
##  $ X.20                                                                                                                                                                      : chr  "" "" "Other \nsupport \nservices" "$580" ...
##  $ X.21                                                                                                                                                                      : chr  "" "" "" "7, 8" ...
##  $ X.22                                                                                                                                                                      : chr  "" "" "Food services" "$435" ...
##  $ X.23                                                                                                                                                                      : chr  "" "" "" "7, 8" ...
##  $ X.24                                                                                                                                                                      : chr  "" "" "Enterprise operations4" "$19" ...
##  $ X.25                                                                                                                                                                      : int  NA NA NA 8 NA NA NA NA NA 8 ...
##  $ X.26                                                                                                                                                                      : logi  NA NA NA NA NA NA ...
colnames(student_spending)
##  [1] "Table.4..Student.membership.and.current.expenditures.per.pupil.for.public.elementary.and.secondary.education..by.function..subfunction..and.state.or.jurisdiction..FY.2021"
##  [2] "X"                                                                                                                                                                         
##  [3] "X.1"                                                                                                                                                                       
##  [4] "X.2"                                                                                                                                                                       
##  [5] "X.3"                                                                                                                                                                       
##  [6] "X.4"                                                                                                                                                                       
##  [7] "X.5"                                                                                                                                                                       
##  [8] "X.6"                                                                                                                                                                       
##  [9] "X.7"                                                                                                                                                                       
## [10] "X.8"                                                                                                                                                                       
## [11] "X.9"                                                                                                                                                                       
## [12] "X.10"                                                                                                                                                                      
## [13] "X.11"                                                                                                                                                                      
## [14] "X.12"                                                                                                                                                                      
## [15] "X.13"                                                                                                                                                                      
## [16] "X.14"                                                                                                                                                                      
## [17] "X.15"                                                                                                                                                                      
## [18] "X.16"                                                                                                                                                                      
## [19] "X.17"                                                                                                                                                                      
## [20] "X.18"                                                                                                                                                                      
## [21] "X.19"                                                                                                                                                                      
## [22] "X.20"                                                                                                                                                                      
## [23] "X.21"                                                                                                                                                                      
## [24] "X.22"                                                                                                                                                                      
## [25] "X.23"                                                                                                                                                                      
## [26] "X.24"                                                                                                                                                                      
## [27] "X.25"                                                                                                                                                                      
## [28] "X.26"
grad_rates$Value<-as.numeric(gsub("%","", grad_rates$Value))
## Warning: NAs introduced by coercion
str(grad_rates$Value)
##  num [1:701] 86.6 73.9 93.7 NA NA 81 82.8 NA 89.8 71.4 ...
pastecs::stat.desc(grad_rates$Value)
##      nbr.val     nbr.null       nbr.na          min          max        range 
## 6.760000e+02 0.000000e+00 2.500000e+01 3.000000e+01 9.700000e+01 6.700000e+01 
##          sum       median         mean      SE.mean CI.mean.0.95          var 
## 5.265680e+04 8.000000e+01 7.789467e+01 4.721739e-01 9.271061e-01 1.507129e+02 
##      std.dev     coef.var 
## 1.227652e+01 1.576041e-01

The average graduation rate is 67.9%, with a range from 0% to 97%, indicating variation across different states and districts.

ggplot(grad_rates, aes(x = Value)) +
  geom_histogram(binwidth = 5) +
  labs(title = "Distribution of Graduation Rates", 
       x = "Graduation Rate (%)", 
       y = "Count") +
  theme_minimal()
## Warning: Removed 25 rows containing non-finite outside the scale range
## (`stat_bin()`).

grad_rates_clean <- grad_rates %>% dplyr::mutate(Sqrt_Grad_Rate = sqrt(Value))
ggplot(grad_rates_clean, aes(x = Sqrt_Grad_Rate)) + geom_histogram(binwidth = 1) + labs(title = "Histogram of Transformed Graduation Rates",x = "Square Root of Graduation Rate",y = "Count") + theme_minimal()
## Warning: Removed 25 rows containing non-finite outside the scale range
## (`stat_bin()`).