library(ggplot2)
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.1.0 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(readxl)
district <- read_excel("Desktop/My Class Stuff/Tuesday Class/district.xls")
head(district)
## # A tibble: 6 × 137
## DISTNAME DISTRICT DZCNTYNM REGION DZRATING DZCAMPUS DPETALLC DPETBLAP DPETHISP
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 CAYUGA … 001902 001 AND… 07 A 3 574 4.4 11.5
## 2 ELKHART… 001903 001 AND… 07 A 4 1150 4 11.8
## 3 FRANKST… 001904 001 AND… 07 A 3 808 8.5 11.3
## 4 NECHES … 001906 001 AND… 07 A 2 342 8.2 13.5
## 5 PALESTI… 001907 001 AND… 07 B 6 3360 25.1 42.9
## 6 WESTWOO… 001908 001 AND… 07 B 4 1332 19.7 26.2
## # ℹ 128 more variables: DPETWHIP <dbl>, DPETINDP <dbl>, DPETASIP <dbl>,
## # DPETPCIP <dbl>, DPETTWOP <dbl>, DPETECOP <dbl>, DPETLEPP <dbl>,
## # DPETSPEP <dbl>, DPETBILP <dbl>, DPETVOCP <dbl>, DPETGIFP <dbl>,
## # DA0AT21R <dbl>, DA0912DR21R <dbl>, DAGC4X21R <dbl>, DAGC5X20R <dbl>,
## # DAGC6X19R <dbl>, DA0GR21N <dbl>, DA0GS21N <dbl>, DDA00A001S22R <dbl>,
## # DDA00A001222R <dbl>, DDA00A001322R <dbl>, DDA00AR01S22R <dbl>,
## # DDA00AR01222R <dbl>, DDA00AR01322R <dbl>, DDA00AM01S22R <dbl>, …
str(district)
## tibble [1,207 × 137] (S3: tbl_df/tbl/data.frame)
## $ DISTNAME : chr [1:1207] "CAYUGA ISD" "ELKHART ISD" "FRANKSTON ISD" "NECHES ISD" ...
## $ DISTRICT : chr [1:1207] "001902" "001903" "001904" "001906" ...
## $ DZCNTYNM : chr [1:1207] "001 ANDERSON" "001 ANDERSON" "001 ANDERSON" "001 ANDERSON" ...
## $ REGION : chr [1:1207] "07" "07" "07" "07" ...
## $ DZRATING : chr [1:1207] "A" "A" "A" "A" ...
## $ DZCAMPUS : num [1:1207] 3 4 3 2 6 4 2 6 4 5 ...
## $ DPETALLC : num [1:1207] 574 1150 808 342 3360 ...
## $ DPETBLAP : num [1:1207] 4.4 4 8.5 8.2 25.1 19.7 0.3 0.8 15.7 7.2 ...
## $ DPETHISP : num [1:1207] 11.5 11.8 11.3 13.5 42.9 26.2 8.6 68.7 31.2 27.9 ...
## $ DPETWHIP : num [1:1207] 79.1 80.3 75.2 75.1 27.3 48 87 28.2 48.5 60.6 ...
## $ DPETINDP : num [1:1207] 0 0.3 0.4 0.3 0.2 0.7 0 0.3 0.1 0.3 ...
## $ DPETASIP : num [1:1207] 0.5 0.2 1 0.3 0.7 0.5 0.6 0.3 1 1 ...
## $ DPETPCIP : num [1:1207] 0 0 0 0 0.1 0.1 0 0 0.1 0.1 ...
## $ DPETTWOP : num [1:1207] 4.5 3.4 3.6 2.6 3.7 4.9 3.6 1.7 3.4 3 ...
## $ DPETECOP : num [1:1207] 40.8 45.4 54.2 54.1 81.6 74 46.8 49.6 57.8 50.1 ...
## $ DPETLEPP : num [1:1207] 1 2.8 4.1 2 17.7 7.1 0.6 14.2 5.1 6.9 ...
## $ DPETSPEP : num [1:1207] 14.6 12.1 13.1 10.5 13.5 14.5 14.7 10.4 11.6 11.9 ...
## $ DPETBILP : num [1:1207] 1 2.7 4.1 2 16.1 6.8 0.6 15.2 5 6 ...
## $ DPETVOCP : num [1:1207] 30.5 31.8 43.9 29.5 30.6 38.7 37.7 24.8 18.9 34.4 ...
## $ DPETGIFP : num [1:1207] 6.1 4.6 7.3 5.6 2.3 3.2 3.3 6.8 9.2 6 ...
## $ DA0AT21R : num [1:1207] 96.7 96 95.4 95.8 93.7 94.5 96.7 92.8 97.3 95.2 ...
## $ DA0912DR21R : num [1:1207] 0 0.3 0.4 0 0 0 0 0.4 0.4 0.7 ...
## $ DAGC4X21R : num [1:1207] 100 100 95.2 95.8 99 97.8 100 96.8 100 94.1 ...
## $ DAGC5X20R : num [1:1207] 100 98.9 100 97 99.6 97 100 97.2 100 95.6 ...
## $ DAGC6X19R : num [1:1207] 96 98.8 33.3 100 98.6 97.4 100 96.7 100 95.9 ...
## $ DA0GR21N : num [1:1207] 36 91 41 23 201 95 32 293 52 196 ...
## $ DA0GS21N : num [1:1207] 34 79 40 17 198 77 27 238 52 154 ...
## $ DDA00A001S22R: num [1:1207] 84 85 83 90 74 69 86 76 82 86 ...
## $ DDA00A001222R: num [1:1207] 62 59 57 64 46 40 55 47 56 60 ...
## $ DDA00A001322R: num [1:1207] 33 30 25 27 20 16 25 21 30 31 ...
## $ DDA00AR01S22R: num [1:1207] 81 85 84 87 72 70 86 75 82 84 ...
## $ DDA00AR01222R: num [1:1207] 67 64 63 67 48 45 66 50 60 62 ...
## $ DDA00AR01322R: num [1:1207] 39 34 24 30 20 19 31 22 31 31 ...
## $ DDA00AM01S22R: num [1:1207] 88 84 85 94 75 66 81 76 81 88 ...
## $ DDA00AM01222R: num [1:1207] 65 49 57 69 44 34 42 44 53 62 ...
## $ DDA00AM01322R: num [1:1207] 34 23 26 27 20 14 19 21 29 33 ...
## $ DDA00AC01S22R: num [1:1207] 85 86 81 90 78 73 96 75 83 84 ...
## $ DDA00AC01222R: num [1:1207] 54 63 49 54 48 41 45 46 57 52 ...
## $ DDA00AC01322R: num [1:1207] 22 29 21 23 22 15 16 18 27 21 ...
## $ DDA00AS01S22R: num [1:1207] 78 90 74 83 72 68 92 81 82 87 ...
## $ DDA00AS01222R: num [1:1207] 47 63 48 51 42 38 73 50 51 60 ...
## $ DDA00AS01322R: num [1:1207] 21 42 26 26 20 15 38 27 32 36 ...
## $ DDB00A001S22R: num [1:1207] 60 46 74 88 64 56 -1 71 68 71 ...
## $ DDB00A001222R: num [1:1207] 17 22 38 48 33 26 -1 41 38 37 ...
## $ DDB00A001322R: num [1:1207] 3 8 6 19 11 11 -1 13 14 14 ...
## $ DDH00A001S22R: num [1:1207] 74 85 75 91 73 69 87 72 81 81 ...
## $ DDH00A001222R: num [1:1207] 53 56 46 69 44 36 57 42 50 53 ...
## $ DDH00A001322R: num [1:1207] 24 25 19 26 19 12 20 17 24 24 ...
## $ DDW00A001S22R: num [1:1207] 87 88 85 89 83 75 86 84 88 89 ...
## $ DDW00A001222R: num [1:1207] 66 61 62 66 60 48 55 58 67 66 ...
## $ DDW00A001322R: num [1:1207] 35 32 28 29 29 21 26 29 40 35 ...
## $ DDI00A001S22R: num [1:1207] NA 100 80 -1 75 NA NA 83 -1 62 ...
## $ DDI00A001222R: num [1:1207] NA 100 20 -1 50 NA NA 28 -1 8 ...
## $ DDI00A001322R: num [1:1207] NA 100 20 -1 17 NA NA 6 -1 0 ...
## $ DD300A001S22R: num [1:1207] 33 -1 84 -1 85 100 NA 100 93 97 ...
## $ DD300A001222R: num [1:1207] 33 -1 53 -1 77 100 NA 87 73 82 ...
## $ DD300A001322R: num [1:1207] 17 -1 16 -1 44 88 NA 67 53 56 ...
## $ DD400A001S22R: num [1:1207] NA NA NA NA -1 -1 NA NA -1 -1 ...
## $ DD400A001222R: num [1:1207] NA NA NA NA -1 -1 NA NA -1 -1 ...
## $ DD400A001322R: num [1:1207] NA NA NA NA -1 -1 NA NA -1 -1 ...
## $ DD200A001S22R: num [1:1207] 83 77 75 -1 74 62 88 85 74 83 ...
## $ DD200A001222R: num [1:1207] 54 46 58 -1 44 38 50 58 48 50 ...
## $ DD200A001322R: num [1:1207] 34 23 28 -1 18 13 6 31 13 29 ...
## $ DDE00A001S22R: num [1:1207] 76 77 77 86 70 65 81 67 77 78 ...
## $ DDE00A001222R: num [1:1207] 50 42 49 53 40 34 45 36 48 46 ...
## $ DDE00A001322R: num [1:1207] 23 19 17 17 16 14 17 14 23 19 ...
## $ DA0CT21R : num [1:1207] 58.3 51.6 92.7 87 43.3 40 12.5 42 9.6 38.3 ...
## $ DA0CC21R : num [1:1207] 19 27.7 36.8 15 49.4 28.9 -1 35.8 60 60 ...
## $ DA0CSA21R : num [1:1207] 980 979 980 1007 1048 ...
## $ DA0CAA21R : num [1:1207] NA -1 -1 18.8 21 -1 -1 22.3 NA 23.1 ...
## $ DPSATOFC : num [1:1207] 99.9 186.6 146.7 60.1 553.4 ...
## $ DPSTTOFC : num [1:1207] 46.7 104.9 74.5 30.2 260.3 ...
## $ DPSCTOFP : num [1:1207] 1.5 1.1 1.4 3.1 2.1 1.1 4.1 1.5 4.5 0.9 ...
## $ DPSSTOFP : num [1:1207] 5 2.1 3.5 5 3.4 4.6 3.4 2.6 3.1 3.9 ...
## $ DPSUTOFP : num [1:1207] 5.4 4.9 2 1.7 8.3 4.4 3 5.8 10 6 ...
## $ DPSTTOFP : num [1:1207] 46.8 56.2 50.8 50.3 47 45.5 56.7 50.8 50 49.7 ...
## $ DPSETOFP : num [1:1207] 14.8 16.2 15 13.7 19.7 19.2 9.8 15.4 11.1 8.2 ...
## $ DPSXTOFP : num [1:1207] 26.5 19.5 27.4 26.2 19.5 25.2 23 23.9 21.4 31.3 ...
## $ DPSCTOSA : num [1:1207] 93333 100313 98293 85537 99324 ...
## $ DPSSTOSA : num [1:1207] 73300 79305 71215 81593 80415 ...
## $ DPSUTOSA : num [1:1207] 59550 60616 58022 77642 63829 ...
## $ DPSTTOSA : num [1:1207] 55570 47916 50382 55346 48825 ...
## $ DPSAMIFP : num [1:1207] 15.6 13.4 10.9 16.3 32.1 29.9 1.9 41.3 22.2 18.8 ...
## $ DPSAKIDR : num [1:1207] 5.7 6.2 5.5 5.7 6.1 5 5.2 7.3 7.4 6.5 ...
## $ DPSTKIDR : num [1:1207] 12.3 11 10.8 11.3 12.9 11 9.3 14.4 14.8 13.2 ...
## $ DPST05FP : num [1:1207] 10.4 23.8 32.7 9.7 33.8 44.8 17.9 21.5 35 21.9 ...
## $ DPSTEXPA : num [1:1207] 16.7 13.5 12.8 14.8 12.7 10.3 15.4 13.8 10.2 13.8 ...
## $ DPSTADFP : num [1:1207] 14.8 19 30.7 9.6 15.4 17.4 16.9 24.3 18.5 22.4 ...
## $ DPSTURNR : num [1:1207] 19.1 13.9 21.6 18.3 17.9 30.6 14.6 11.5 17 9.5 ...
## $ DPSTBLFP : num [1:1207] 8.3 2.9 4 6.5 9.6 11.6 0 1.4 4.4 0.5 ...
## $ DPSTHIFP : num [1:1207] 0 6.7 1.3 0 13.8 6.6 0 25.7 8.9 5.6 ...
## $ DPSTWHFP : num [1:1207] 91.7 90.5 93.3 93.5 74.6 80.9 100 69 86.7 93.9 ...
## $ DPSTINFP : num [1:1207] 0 0 0 0 0 0.8 0 0.3 0 0 ...
## $ DPSTASFP : num [1:1207] 0 0 0 0 0 0 0 0.7 0 0 ...
## $ DPSTPIFP : num [1:1207] 0 0 0 0 0 0 0 0 0 0 ...
## $ DPSTTWFP : num [1:1207] 0 0 1.3 0 1.9 0 0 2.8 0 0 ...
## $ DPSTREFP : num [1:1207] 81.6 71.5 87.6 70 71.4 71.4 61 41.7 82.7 66.4 ...
## $ DPSTSPFP : num [1:1207] 9.9 8.4 7.5 5.5 10.2 6.4 5.8 14.4 6.8 9.6 ...
## $ DPSTCOFP : num [1:1207] 0 4.9 2.7 12 5 6.1 19.2 6.5 7.4 9.2 ...
## [list output truncated]
2.create a new data frame with “DISTNAME”, “DPETSPEP” (percent special education) and “DPFPASPEP” (money spent on special education). call the dataframe whatever you want
district_data_frame <- district %>%
select (DISTNAME, DPETSPEP, DPFPASPEP)
give me “summary()” statistics for both DPETSPEP and DFPASPEP. You can summarize them separately if you want
Which variable has missing values?
summary(district_data_frame)
## DISTNAME DPETSPEP DPFPASPEP
## Length:1207 Min. : 0.00 Min. : 0.000
## Class :character 1st Qu.: 9.90 1st Qu.: 5.800
## Mode :character Median :12.10 Median : 8.900
## Mean :12.27 Mean : 9.711
## 3rd Qu.:14.20 3rd Qu.:12.500
## Max. :51.70 Max. :49.000
## NA's :5
summary(district_data_frame$DPFPASPEP)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 0.000 5.800 8.900 9.711 12.500 49.000 5
colSums(is.na(district_data_frame))
## DISTNAME DPETSPEP DPFPASPEP
## 0 0 5
district_data_frame_clean <- district_data_frame %>% drop_na()
summary(district_data_frame_clean)
## DISTNAME DPETSPEP DPFPASPEP
## Length:1202 Min. : 0.0 Min. : 0.000
## Class :character 1st Qu.: 9.9 1st Qu.: 5.800
## Mode :character Median :12.2 Median : 8.900
## Mean :12.3 Mean : 9.711
## 3rd Qu.:14.2 3rd Qu.:12.500
## Max. :51.7 Max. :49.000
##because i realized that this is similar to my previous issue i found that i had to install this package as well. I’m confused because i have done both of the funtions already, will i have to install each time? i believe that is a step i have been confused on if so.
library(ggplot2)
ggplot(district_data_frame_clean,aes(x=DPFPASPEP,y=DPETSPEP)) + geom_point() +
labs(
x = "Percent Special Education (DPETSPEP)",
y = "Money Spent on Special Education (DPFPASPEP)",
title = "Special Education % vs. Spending")
7. Do a mathematical check (cor()) of DPFPASPEP and DPETSPEP. What is
the result?
cor(district_data_frame_clean$DPFPASPEP,district_data_frame_clean$DPETSPEP)
## [1] 0.3700234