Data Set

This homework shows the requested chunk source code for the district.xls data set.

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
district <- read_xls("district.xls")
# Quick check of structure
str(district)
## tibble [1,207 × 137] (S3: tbl_df/tbl/data.frame)
##  $ DISTNAME     : chr [1:1207] "CAYUGA ISD" "ELKHART ISD" "FRANKSTON ISD" "NECHES ISD" ...
##  $ DISTRICT     : chr [1:1207] "001902" "001903" "001904" "001906" ...
##  $ DZCNTYNM     : chr [1:1207] "001 ANDERSON" "001 ANDERSON" "001 ANDERSON" "001 ANDERSON" ...
##  $ REGION       : chr [1:1207] "07" "07" "07" "07" ...
##  $ DZRATING     : chr [1:1207] "A" "A" "A" "A" ...
##  $ DZCAMPUS     : num [1:1207] 3 4 3 2 6 4 2 6 4 5 ...
##  $ DPETALLC     : num [1:1207] 574 1150 808 342 3360 ...
##  $ DPETBLAP     : num [1:1207] 4.4 4 8.5 8.2 25.1 19.7 0.3 0.8 15.7 7.2 ...
##  $ DPETHISP     : num [1:1207] 11.5 11.8 11.3 13.5 42.9 26.2 8.6 68.7 31.2 27.9 ...
##  $ DPETWHIP     : num [1:1207] 79.1 80.3 75.2 75.1 27.3 48 87 28.2 48.5 60.6 ...
##  $ DPETINDP     : num [1:1207] 0 0.3 0.4 0.3 0.2 0.7 0 0.3 0.1 0.3 ...
##  $ DPETASIP     : num [1:1207] 0.5 0.2 1 0.3 0.7 0.5 0.6 0.3 1 1 ...
##  $ DPETPCIP     : num [1:1207] 0 0 0 0 0.1 0.1 0 0 0.1 0.1 ...
##  $ DPETTWOP     : num [1:1207] 4.5 3.4 3.6 2.6 3.7 4.9 3.6 1.7 3.4 3 ...
##  $ DPETECOP     : num [1:1207] 40.8 45.4 54.2 54.1 81.6 74 46.8 49.6 57.8 50.1 ...
##  $ DPETLEPP     : num [1:1207] 1 2.8 4.1 2 17.7 7.1 0.6 14.2 5.1 6.9 ...
##  $ DPETSPEP     : num [1:1207] 14.6 12.1 13.1 10.5 13.5 14.5 14.7 10.4 11.6 11.9 ...
##  $ DPETBILP     : num [1:1207] 1 2.7 4.1 2 16.1 6.8 0.6 15.2 5 6 ...
##  $ DPETVOCP     : num [1:1207] 30.5 31.8 43.9 29.5 30.6 38.7 37.7 24.8 18.9 34.4 ...
##  $ DPETGIFP     : num [1:1207] 6.1 4.6 7.3 5.6 2.3 3.2 3.3 6.8 9.2 6 ...
##  $ DA0AT21R     : num [1:1207] 96.7 96 95.4 95.8 93.7 94.5 96.7 92.8 97.3 95.2 ...
##  $ DA0912DR21R  : num [1:1207] 0 0.3 0.4 0 0 0 0 0.4 0.4 0.7 ...
##  $ DAGC4X21R    : num [1:1207] 100 100 95.2 95.8 99 97.8 100 96.8 100 94.1 ...
##  $ DAGC5X20R    : num [1:1207] 100 98.9 100 97 99.6 97 100 97.2 100 95.6 ...
##  $ DAGC6X19R    : num [1:1207] 96 98.8 33.3 100 98.6 97.4 100 96.7 100 95.9 ...
##  $ DA0GR21N     : num [1:1207] 36 91 41 23 201 95 32 293 52 196 ...
##  $ DA0GS21N     : num [1:1207] 34 79 40 17 198 77 27 238 52 154 ...
##  $ DDA00A001S22R: num [1:1207] 84 85 83 90 74 69 86 76 82 86 ...
##  $ DDA00A001222R: num [1:1207] 62 59 57 64 46 40 55 47 56 60 ...
##  $ DDA00A001322R: num [1:1207] 33 30 25 27 20 16 25 21 30 31 ...
##  $ DDA00AR01S22R: num [1:1207] 81 85 84 87 72 70 86 75 82 84 ...
##  $ DDA00AR01222R: num [1:1207] 67 64 63 67 48 45 66 50 60 62 ...
##  $ DDA00AR01322R: num [1:1207] 39 34 24 30 20 19 31 22 31 31 ...
##  $ DDA00AM01S22R: num [1:1207] 88 84 85 94 75 66 81 76 81 88 ...
##  $ DDA00AM01222R: num [1:1207] 65 49 57 69 44 34 42 44 53 62 ...
##  $ DDA00AM01322R: num [1:1207] 34 23 26 27 20 14 19 21 29 33 ...
##  $ DDA00AC01S22R: num [1:1207] 85 86 81 90 78 73 96 75 83 84 ...
##  $ DDA00AC01222R: num [1:1207] 54 63 49 54 48 41 45 46 57 52 ...
##  $ DDA00AC01322R: num [1:1207] 22 29 21 23 22 15 16 18 27 21 ...
##  $ DDA00AS01S22R: num [1:1207] 78 90 74 83 72 68 92 81 82 87 ...
##  $ DDA00AS01222R: num [1:1207] 47 63 48 51 42 38 73 50 51 60 ...
##  $ DDA00AS01322R: num [1:1207] 21 42 26 26 20 15 38 27 32 36 ...
##  $ DDB00A001S22R: num [1:1207] 60 46 74 88 64 56 -1 71 68 71 ...
##  $ DDB00A001222R: num [1:1207] 17 22 38 48 33 26 -1 41 38 37 ...
##  $ DDB00A001322R: num [1:1207] 3 8 6 19 11 11 -1 13 14 14 ...
##  $ DDH00A001S22R: num [1:1207] 74 85 75 91 73 69 87 72 81 81 ...
##  $ DDH00A001222R: num [1:1207] 53 56 46 69 44 36 57 42 50 53 ...
##  $ DDH00A001322R: num [1:1207] 24 25 19 26 19 12 20 17 24 24 ...
##  $ DDW00A001S22R: num [1:1207] 87 88 85 89 83 75 86 84 88 89 ...
##  $ DDW00A001222R: num [1:1207] 66 61 62 66 60 48 55 58 67 66 ...
##  $ DDW00A001322R: num [1:1207] 35 32 28 29 29 21 26 29 40 35 ...
##  $ DDI00A001S22R: num [1:1207] NA 100 80 -1 75 NA NA 83 -1 62 ...
##  $ DDI00A001222R: num [1:1207] NA 100 20 -1 50 NA NA 28 -1 8 ...
##  $ DDI00A001322R: num [1:1207] NA 100 20 -1 17 NA NA 6 -1 0 ...
##  $ DD300A001S22R: num [1:1207] 33 -1 84 -1 85 100 NA 100 93 97 ...
##  $ DD300A001222R: num [1:1207] 33 -1 53 -1 77 100 NA 87 73 82 ...
##  $ DD300A001322R: num [1:1207] 17 -1 16 -1 44 88 NA 67 53 56 ...
##  $ DD400A001S22R: num [1:1207] NA NA NA NA -1 -1 NA NA -1 -1 ...
##  $ DD400A001222R: num [1:1207] NA NA NA NA -1 -1 NA NA -1 -1 ...
##  $ DD400A001322R: num [1:1207] NA NA NA NA -1 -1 NA NA -1 -1 ...
##  $ DD200A001S22R: num [1:1207] 83 77 75 -1 74 62 88 85 74 83 ...
##  $ DD200A001222R: num [1:1207] 54 46 58 -1 44 38 50 58 48 50 ...
##  $ DD200A001322R: num [1:1207] 34 23 28 -1 18 13 6 31 13 29 ...
##  $ DDE00A001S22R: num [1:1207] 76 77 77 86 70 65 81 67 77 78 ...
##  $ DDE00A001222R: num [1:1207] 50 42 49 53 40 34 45 36 48 46 ...
##  $ DDE00A001322R: num [1:1207] 23 19 17 17 16 14 17 14 23 19 ...
##  $ DA0CT21R     : num [1:1207] 58.3 51.6 92.7 87 43.3 40 12.5 42 9.6 38.3 ...
##  $ DA0CC21R     : num [1:1207] 19 27.7 36.8 15 49.4 28.9 -1 35.8 60 60 ...
##  $ DA0CSA21R    : num [1:1207] 980 979 980 1007 1048 ...
##  $ DA0CAA21R    : num [1:1207] NA -1 -1 18.8 21 -1 -1 22.3 NA 23.1 ...
##  $ DPSATOFC     : num [1:1207] 99.9 186.6 146.7 60.1 553.4 ...
##  $ DPSTTOFC     : num [1:1207] 46.7 104.9 74.5 30.2 260.3 ...
##  $ DPSCTOFP     : num [1:1207] 1.5 1.1 1.4 3.1 2.1 1.1 4.1 1.5 4.5 0.9 ...
##  $ DPSSTOFP     : num [1:1207] 5 2.1 3.5 5 3.4 4.6 3.4 2.6 3.1 3.9 ...
##  $ DPSUTOFP     : num [1:1207] 5.4 4.9 2 1.7 8.3 4.4 3 5.8 10 6 ...
##  $ DPSTTOFP     : num [1:1207] 46.8 56.2 50.8 50.3 47 45.5 56.7 50.8 50 49.7 ...
##  $ DPSETOFP     : num [1:1207] 14.8 16.2 15 13.7 19.7 19.2 9.8 15.4 11.1 8.2 ...
##  $ DPSXTOFP     : num [1:1207] 26.5 19.5 27.4 26.2 19.5 25.2 23 23.9 21.4 31.3 ...
##  $ DPSCTOSA     : num [1:1207] 93333 100313 98293 85537 99324 ...
##  $ DPSSTOSA     : num [1:1207] 73300 79305 71215 81593 80415 ...
##  $ DPSUTOSA     : num [1:1207] 59550 60616 58022 77642 63829 ...
##  $ DPSTTOSA     : num [1:1207] 55570 47916 50382 55346 48825 ...
##  $ DPSAMIFP     : num [1:1207] 15.6 13.4 10.9 16.3 32.1 29.9 1.9 41.3 22.2 18.8 ...
##  $ DPSAKIDR     : num [1:1207] 5.7 6.2 5.5 5.7 6.1 5 5.2 7.3 7.4 6.5 ...
##  $ DPSTKIDR     : num [1:1207] 12.3 11 10.8 11.3 12.9 11 9.3 14.4 14.8 13.2 ...
##  $ DPST05FP     : num [1:1207] 10.4 23.8 32.7 9.7 33.8 44.8 17.9 21.5 35 21.9 ...
##  $ DPSTEXPA     : num [1:1207] 16.7 13.5 12.8 14.8 12.7 10.3 15.4 13.8 10.2 13.8 ...
##  $ DPSTADFP     : num [1:1207] 14.8 19 30.7 9.6 15.4 17.4 16.9 24.3 18.5 22.4 ...
##  $ DPSTURNR     : num [1:1207] 19.1 13.9 21.6 18.3 17.9 30.6 14.6 11.5 17 9.5 ...
##  $ DPSTBLFP     : num [1:1207] 8.3 2.9 4 6.5 9.6 11.6 0 1.4 4.4 0.5 ...
##  $ DPSTHIFP     : num [1:1207] 0 6.7 1.3 0 13.8 6.6 0 25.7 8.9 5.6 ...
##  $ DPSTWHFP     : num [1:1207] 91.7 90.5 93.3 93.5 74.6 80.9 100 69 86.7 93.9 ...
##  $ DPSTINFP     : num [1:1207] 0 0 0 0 0 0.8 0 0.3 0 0 ...
##  $ DPSTASFP     : num [1:1207] 0 0 0 0 0 0 0 0.7 0 0 ...
##  $ DPSTPIFP     : num [1:1207] 0 0 0 0 0 0 0 0 0 0 ...
##  $ DPSTTWFP     : num [1:1207] 0 0 1.3 0 1.9 0 0 2.8 0 0 ...
##  $ DPSTREFP     : num [1:1207] 81.6 71.5 87.6 70 71.4 71.4 61 41.7 82.7 66.4 ...
##  $ DPSTSPFP     : num [1:1207] 9.9 8.4 7.5 5.5 10.2 6.4 5.8 14.4 6.8 9.6 ...
##  $ DPSTCOFP     : num [1:1207] 0 4.9 2.7 12 5 6.1 19.2 6.5 7.4 9.2 ...
##   [list output truncated]

New Data Frame

# Select only the columns we care about
dist_df <- district %>%
select(DISTNAME, DPETSPEP, DPFPASPEP)
head(dist_df)
## # A tibble: 6 × 3
##   DISTNAME      DPETSPEP DPFPASPEP
##   <chr>            <dbl>     <dbl>
## 1 CAYUGA ISD        14.6      28.9
## 2 ELKHART ISD       12.1       8.8
## 3 FRANKSTON ISD     13.1       8.4
## 4 NECHES ISD        10.5      10.1
## 5 PALESTINE ISD     13.5       6.1
## 6 WESTWOOD ISD      14.5       9.4

Summary Percent of Special Ed

# Summary of percent special education
summary(dist_df$DPETSPEP)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    9.90   12.10   12.27   14.20   51.70

Summary Money Spent on Special Ed

# Summary of money spent on special education
summary(dist_df$DPFPASPEP)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   0.000   5.800   8.900   9.711  12.500  49.000       5

Variables with Missing Value

# Count missing values per column
colSums(is.na(dist_df))
##  DISTNAME  DPETSPEP DPFPASPEP 
##         0         0         5

Variables with Missing Value

# Remove rows with any NA values
dist_clean <- na.omit(dist_df)

Rows with Remaining Values

# How many rows remain?
nrow(dist_clean)
## [1] 1202

Comparision Point Graph

ggplot(dist_clean, aes(x = DPETSPEP, y = DPFPASPEP)) +
  geom_point(color = "blue", alpha = 0.6) +
  labs(
    title = "Special Education Spending vs Percent of Students in Special Education",
    x = "Percent Special Education (DPETSPEP)",
    y = "Spending on Special Education (DPFPASPEP)"
  )

Mathematical Check of DPFPASPEP and DPETSPEP

cor_result <- cor(dist_clean$DPETSPEP, dist_clean$DPFPASPEP)
cor_result
## [1] 0.3700234

Interpretation

Districts allocate resources in proportion to student need.

The correlation result is r round (cor_result, 3). This number tells us how closely two things move together. If it’s closer to +1, there is a strong positive relationship—districts with a higher percentage of special education students also tend to spend more on special education. If it’s closer to 0, there is little to no relationship. A value near -1 would mean the opposite, that districts with more special education students actually spend less, though that is unlikely in this case.

Based on both the scatterplot and the correlation result, the data suggests that spending generally increases as the percentage of special education students increases, though the strength of this connection depends on the exact value of the correlation.