When looking at the structure of the data, I am noticing that there are several variables that have a few rows without any reported data. Per the Education Data Portal, this is expected as information is not available for specific years.
Education <- readr::read_csv("C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/EducationDataPortalDistricts.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## lea_name = col_character(),
## state_name = col_character(),
## city_location = col_character(),
## enrollment = col_character(),
## english_language_learners = col_character(),
## teachers_total_fte = col_character(),
## read_test_pct_prof_midpt = col_character(),
## read_test_pct_prof_high = col_character(),
## read_test_pct_prof_low = col_character(),
## math_test_pct_prof_midpt = col_character(),
## math_test_pct_prof_high = col_character(),
## math_test_pct_prof_low = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 167 parsing failures.
## row col expected actual file
## 1038 rev_total a double Missing/not reported 'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/EducationDataPortalDistricts.csv'
## 1038 rev_fed_total a double Missing/not reported 'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/EducationDataPortalDistricts.csv'
## 1038 rev_state_total a double Missing/not reported 'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/EducationDataPortalDistricts.csv'
## 1038 rev_local_total a double Missing/not reported 'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/EducationDataPortalDistricts.csv'
## 1038 salaries_total a double Missing/not reported 'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/EducationDataPortalDistricts.csv'
## .... ............... ........ .................... ........................................................................................................................................
## See problems(...) for more details.
Education$teacherstudentratio <- (as.numeric(Education$teachers_total_fte)/as.numeric(Education$enrollment))
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
str(Education)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 12387 obs. of 29 variables:
## $ year : num 2015 2015 2015 2015 2015 ...
## $ leaid : num 4800001 4800002 4800003 4800005 4800006 ...
## $ lea_name : chr "CROSBYTON CISD" "SPUR ISD" "ROCKSPRINGS ISD" "BLACKWELL CISD" ...
## $ state_name : chr "Texas" "Texas" "Texas" "Texas" ...
## $ city_location : chr "CROSBYTON" "SPUR" "ROCKSPRINGS" "BLACKWELL" ...
## $ latitude : num 33.7 33.5 30 32.1 33.1 ...
## $ longitude : num -101 -101 -100 -100 -100 ...
## $ number_of_schools : num 4 1 1 2 2 1 7 4 24 7 ...
## $ enrollment : chr "387" "272" "266" "136" ...
## $ english_language_learners : chr "4" "3" "12" "3" ...
## $ teachers_total_fte : chr "36" "29" "29" "22" ...
## $ read_test_num_valid : num 241 166 174 80 148 ...
## $ read_test_pct_prof_midpt : chr "63" "77" "72" "87" ...
## $ read_test_pct_prof_high : chr "63" "79" "74" "89" ...
## $ read_test_pct_prof_low : chr "63" "75" "70" "85" ...
## $ math_test_num_valid : num 209 147 144 68 130 ...
## $ math_test_pct_prof_midpt : chr "62" "67" "47" "82" ...
## $ math_test_pct_prof_high : chr "62" "69" "49" "84" ...
## $ math_test_pct_prof_low : chr "62" "65" "45" "80" ...
## $ rev_total : num 5076000 5111000 4626000 6183000 3338000 ...
## $ rev_fed_total : num 571000 222000 329000 50000 145000 ...
## $ rev_state_total : num 3391000 1981000 922000 1098000 242000 ...
## $ rev_local_total : num 1114000 2908000 3375000 5035000 2951000 ...
## $ salaries_total : num 2823000 2272000 2248000 2020000 1852000 ...
## $ salaries_instruction : num 1830000 1418000 1497000 1293000 1284000 ...
## $ salaries_teachers_regular_prog: num 1274000 1005000 928000 961000 684000 ...
## $ salaries_teachers_sped : num 67000 38000 118000 46000 83000 ...
## $ benefits_employee_total : num 467000 317000 452000 325000 317000 ...
## $ teacherstudentratio : num 0.093 0.107 0.109 0.162 0.103 ...
## - attr(*, "problems")=Classes 'tbl_df', 'tbl' and 'data.frame': 167 obs. of 5 variables:
## ..$ row : int 1038 1038 1038 1038 1038 1038 1038 1038 1038 1039 ...
## ..$ col : chr "rev_total" "rev_fed_total" "rev_state_total" "rev_local_total" ...
## ..$ expected: chr "a double" "a double" "a double" "a double" ...
## ..$ actual : chr "Missing/not reported" "Missing/not reported" "Missing/not reported" "Missing/not reported" ...
## ..$ file : chr "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ ...
## - attr(*, "spec")=
## .. cols(
## .. year = col_double(),
## .. leaid = col_double(),
## .. lea_name = col_character(),
## .. state_name = col_character(),
## .. city_location = col_character(),
## .. latitude = col_double(),
## .. longitude = col_double(),
## .. number_of_schools = col_double(),
## .. enrollment = col_character(),
## .. english_language_learners = col_character(),
## .. teachers_total_fte = col_character(),
## .. read_test_num_valid = col_double(),
## .. read_test_pct_prof_midpt = col_character(),
## .. read_test_pct_prof_high = col_character(),
## .. read_test_pct_prof_low = col_character(),
## .. math_test_num_valid = col_double(),
## .. math_test_pct_prof_midpt = col_character(),
## .. math_test_pct_prof_high = col_character(),
## .. math_test_pct_prof_low = col_character(),
## .. rev_total = col_double(),
## .. rev_fed_total = col_double(),
## .. rev_state_total = col_double(),
## .. rev_local_total = col_double(),
## .. salaries_total = col_double(),
## .. salaries_instruction = col_double(),
## .. salaries_teachers_regular_prog = col_double(),
## .. salaries_teachers_sped = col_double(),
## .. benefits_employee_total = col_double()
## .. )
summary(Education)
## year leaid lea_name state_name
## Min. :2004 Min. :4800001 Length:12387 Length:12387
## 1st Qu.:2006 1st Qu.:4816860 Class :character Class :character
## Median :2009 Median :4826340 Mode :character Mode :character
## Mean :2009 Mean :4826747
## 3rd Qu.:2012 3rd Qu.:4836960
## Max. :2015 Max. :4899130
##
## city_location latitude longitude number_of_schools
## Length:12387 Min. :25.93 Min. :-106.60 Min. : 0.000
## Class :character 1st Qu.:30.10 1st Qu.: -99.18 1st Qu.: 2.000
## Mode :character Median :31.83 Median : -97.42 Median : 4.000
## Mean :31.55 Mean : -97.83 Mean : 8.395
## 3rd Qu.:33.02 3rd Qu.: -95.97 3rd Qu.: 7.000
## Max. :36.49 Max. : -93.67 Max. :313.000
## NA's :2077 NA's :2077 NA's :14
## enrollment english_language_learners teachers_total_fte
## Length:12387 Length:12387 Length:12387
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
##
##
##
##
## read_test_num_valid read_test_pct_prof_midpt read_test_pct_prof_high
## Min. : 4 Length:12387 Length:12387
## 1st Qu.: 218 Class :character Class :character
## Median : 562 Mode :character Mode :character
## Mean : 2806
## 3rd Qu.: 1730
## Max. :132328
## NA's :5200
## read_test_pct_prof_low math_test_num_valid math_test_pct_prof_midpt
## Length:12387 Min. : 4 Length:12387
## Class :character 1st Qu.: 197 Class :character
## Mode :character Median : 515 Mode :character
## Mean : 2543
## 3rd Qu.: 1599
## Max. :122246
## NA's :5200
## math_test_pct_prof_high math_test_pct_prof_low rev_total
## Length:12387 Length:12387 Min. :2.970e+05
## Class :character Class :character 1st Qu.:4.935e+06
## Mode :character Mode :character Median :1.102e+07
## Mean :4.692e+07
## 3rd Qu.:3.090e+07
## Max. :2.480e+09
## NA's :33
## rev_fed_total rev_state_total rev_local_total
## Min. : 0 Min. : 18000 Min. :0.000e+00
## 1st Qu.: 302000 1st Qu.: 1828000 1st Qu.:1.762e+06
## Median : 919000 Median : 4644500 Median :4.574e+06
## Mean : 5235680 Mean : 17983539 Mean :2.370e+07
## 3rd Qu.: 3169000 3rd Qu.: 11746000 3rd Qu.:1.447e+07
## Max. :428684000 Max. :597926000 Max. :1.843e+09
## NA's :33 NA's :33 NA's :33
## salaries_total salaries_instruction salaries_teachers_regular_prog
## Min. :0.000e+00 Min. : 0 Min. : 0
## 1st Qu.:2.400e+06 1st Qu.: 1654000 1st Qu.: 0
## Median :5.762e+06 Median : 3949000 Median : 0
## Mean :2.611e+07 Mean : 17727361 Mean : 4667369
## 3rd Qu.:1.606e+07 3rd Qu.: 10992500 3rd Qu.: 1929000
## Max. :1.256e+09 Max. :824620000 Max. :482246000
## NA's :33 NA's :33 NA's :33
## salaries_teachers_sped benefits_employee_total teacherstudentratio
## Min. : 0 Min. : 0 Min. :0.00000
## 1st Qu.: 0 1st Qu.: 392000 1st Qu.:0.06921
## Median : 0 Median : 966500 Median :0.07869
## Mean : 593797 Mean : 4470621 Mean :0.08302
## 3rd Qu.: 157750 3rd Qu.: 2621750 3rd Qu.:0.09122
## Max. :60292000 Max. :223138000 Max. :0.35714
## NA's :33 NA's :33 NA's :100
dim(Education)
## [1] 12387 29
Education$english_language_learners = as.numeric(Education$english_language_learners)
## Warning: NAs introduced by coercion
Education$teachers_total_fte = as.numeric(Education$teachers_total_fte)
## Warning: NAs introduced by coercion
Education$read_test_pct_prof_midpt = as.numeric(Education$read_test_pct_prof_midpt)
## Warning: NAs introduced by coercion
Education$read_test_pct_prof_high = as.numeric(Education$read_test_pct_prof_high)
## Warning: NAs introduced by coercion
Education$read_test_pct_prof_low = as.numeric(Education$read_test_pct_prof_low)
## Warning: NAs introduced by coercion
Education$math_test_pct_prof_midpt = as.numeric(Education$math_test_pct_prof_midpt)
## Warning: NAs introduced by coercion
Education$math_test_pct_prof_high = as.numeric(Education$math_test_pct_prof_high)
## Warning: NAs introduced by coercion
Education$math_test_pct_prof_low = as.numeric(Education$math_test_pct_prof_low)
## Warning: NAs introduced by coercion
Education$rev_fed_total = as.numeric(Education$rev_fed_total)
Education$rev_state_total = as.numeric(Education$rev_state_total)
Education$rev_local_total = as.numeric(Education$rev_local_total)
Education$salaries_total = as.numeric(Education$salaries_total)
Education$salaries_teachers_regular_prog = as.numeric(Education$salaries_teachers_regular_prog)
Education$benefits_employee_total = as.numeric(Education$benefits_employee_total)
Education$year = as.numeric(Education$year)
Education$leaid = as.numeric(Education$leaid)
Education$read_test_num_valid = as.numeric(Education$read_test_num_valid)
Education$math_test_num_valid = as.numeric(Education$math_test_num_valid)
Education$lea_name = as.character(Education$lea_name)
Education$state_name = as.character(Education$state_name)
Education$city_location = as.character(Education$city_location)
Education$enrollment = as.numeric(Education$enrollment)
## Warning: NAs introduced by coercion
Education$rev_total = as.numeric(Education$rev_total)
Education$salaries_instruction = as.numeric(Education$salaries_instruction)
Education$salaries_teacher_sped = as.numeric(Education$salaries_teachers_sped)
str(Education)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 12387 obs. of 30 variables:
## $ year : num 2015 2015 2015 2015 2015 ...
## $ leaid : num 4800001 4800002 4800003 4800005 4800006 ...
## $ lea_name : chr "CROSBYTON CISD" "SPUR ISD" "ROCKSPRINGS ISD" "BLACKWELL CISD" ...
## $ state_name : chr "Texas" "Texas" "Texas" "Texas" ...
## $ city_location : chr "CROSBYTON" "SPUR" "ROCKSPRINGS" "BLACKWELL" ...
## $ latitude : num 33.7 33.5 30 32.1 33.1 ...
## $ longitude : num -101 -101 -100 -100 -100 ...
## $ number_of_schools : num 4 1 1 2 2 1 7 4 24 7 ...
## $ enrollment : num 387 272 266 136 242 ...
## $ english_language_learners : num 4 3 12 3 7 ...
## $ teachers_total_fte : num 36 29 29 22 25 ...
## $ read_test_num_valid : num 241 166 174 80 148 ...
## $ read_test_pct_prof_midpt : num 63 77 72 87 77 82 90 83 69 65 ...
## $ read_test_pct_prof_high : num 63 79 74 89 79 84 90 83 69 65 ...
## $ read_test_pct_prof_low : num 63 75 70 85 75 80 90 83 69 65 ...
## $ math_test_num_valid : num 209 147 144 68 130 ...
## $ math_test_pct_prof_midpt : num 62 67 47 82 82 87 91 82 69 63 ...
## $ math_test_pct_prof_high : num 62 69 49 84 84 89 91 82 69 63 ...
## $ math_test_pct_prof_low : num 62 65 45 80 80 85 91 82 69 63 ...
## $ rev_total : num 5076000 5111000 4626000 6183000 3338000 ...
## $ rev_fed_total : num 571000 222000 329000 50000 145000 ...
## $ rev_state_total : num 3391000 1981000 922000 1098000 242000 ...
## $ rev_local_total : num 1114000 2908000 3375000 5035000 2951000 ...
## $ salaries_total : num 2823000 2272000 2248000 2020000 1852000 ...
## $ salaries_instruction : num 1830000 1418000 1497000 1293000 1284000 ...
## $ salaries_teachers_regular_prog: num 1274000 1005000 928000 961000 684000 ...
## $ salaries_teachers_sped : num 67000 38000 118000 46000 83000 ...
## $ benefits_employee_total : num 467000 317000 452000 325000 317000 ...
## $ teacherstudentratio : num 0.093 0.107 0.109 0.162 0.103 ...
## $ salaries_teacher_sped : num 67000 38000 118000 46000 83000 ...
## - attr(*, "problems")=Classes 'tbl_df', 'tbl' and 'data.frame': 167 obs. of 5 variables:
## ..$ row : int 1038 1038 1038 1038 1038 1038 1038 1038 1038 1039 ...
## ..$ col : chr "rev_total" "rev_fed_total" "rev_state_total" "rev_local_total" ...
## ..$ expected: chr "a double" "a double" "a double" "a double" ...
## ..$ actual : chr "Missing/not reported" "Missing/not reported" "Missing/not reported" "Missing/not reported" ...
## ..$ file : chr "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ ...
## - attr(*, "spec")=
## .. cols(
## .. year = col_double(),
## .. leaid = col_double(),
## .. lea_name = col_character(),
## .. state_name = col_character(),
## .. city_location = col_character(),
## .. latitude = col_double(),
## .. longitude = col_double(),
## .. number_of_schools = col_double(),
## .. enrollment = col_character(),
## .. english_language_learners = col_character(),
## .. teachers_total_fte = col_character(),
## .. read_test_num_valid = col_double(),
## .. read_test_pct_prof_midpt = col_character(),
## .. read_test_pct_prof_high = col_character(),
## .. read_test_pct_prof_low = col_character(),
## .. math_test_num_valid = col_double(),
## .. math_test_pct_prof_midpt = col_character(),
## .. math_test_pct_prof_high = col_character(),
## .. math_test_pct_prof_low = col_character(),
## .. rev_total = col_double(),
## .. rev_fed_total = col_double(),
## .. rev_state_total = col_double(),
## .. rev_local_total = col_double(),
## .. salaries_total = col_double(),
## .. salaries_instruction = col_double(),
## .. salaries_teachers_regular_prog = col_double(),
## .. salaries_teachers_sped = col_double(),
## .. benefits_employee_total = col_double()
## .. )
One thing to note is that Teacher’s Salaries & funding information were not recorded for 2016. We will need to complete the analysis for the student:teacher ratio vs. the average teacher salary & federal funding for 2006 - 2015.
na.omit(Education)
## # A tibble: 7,001 x 30
## year leaid lea_name state_name city_location latitude longitude
## <dbl> <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 2015 4.80e6 CROSBYT~ Texas CROSBYTON 33.7 -101.
## 2 2015 4.80e6 SPUR ISD Texas SPUR 33.5 -101.
## 3 2015 4.80e6 ROCKSPR~ Texas ROCKSPRINGS 30.0 -100.
## 4 2015 4.80e6 BLACKWE~ Texas BLACKWELL 32.1 -100.
## 5 2015 4.80e6 ASPERMO~ Texas ASPERMONT 33.1 -100.
## 6 2015 4.80e6 DRIPPIN~ Texas DRIPPING SPR~ 30.2 -98.1
## 7 2015 4.80e6 WIMBERL~ Texas WIMBERLEY 30.0 -98.1
## 8 2015 4.80e6 HAYS CI~ Texas KYLE 30.0 -97.9
## 9 2015 4.80e6 ALVARAD~ Texas ALVARADO 32.4 -97.2
## 10 2015 4.80e6 CHISUM ~ Texas PARIS 33.6 -95.6
## # ... with 6,991 more rows, and 23 more variables:
## # number_of_schools <dbl>, enrollment <dbl>,
## # english_language_learners <dbl>, teachers_total_fte <dbl>,
## # read_test_num_valid <dbl>, read_test_pct_prof_midpt <dbl>,
## # read_test_pct_prof_high <dbl>, read_test_pct_prof_low <dbl>,
## # math_test_num_valid <dbl>, math_test_pct_prof_midpt <dbl>,
## # math_test_pct_prof_high <dbl>, math_test_pct_prof_low <dbl>,
## # rev_total <dbl>, rev_fed_total <dbl>, rev_state_total <dbl>,
## # rev_local_total <dbl>, salaries_total <dbl>,
## # salaries_instruction <dbl>, salaries_teachers_regular_prog <dbl>,
## # salaries_teachers_sped <dbl>, benefits_employee_total <dbl>,
## # teacherstudentratio <dbl>, salaries_teacher_sped <dbl>
dim(Education)
## [1] 12387 30
head(Education)
## # A tibble: 6 x 30
## year leaid lea_name state_name city_location latitude longitude
## <dbl> <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 2015 4.80e6 CROSBYT~ Texas CROSBYTON 33.7 -101.
## 2 2015 4.80e6 SPUR ISD Texas SPUR 33.5 -101.
## 3 2015 4.80e6 ROCKSPR~ Texas ROCKSPRINGS 30.0 -100.
## 4 2015 4.80e6 BLACKWE~ Texas BLACKWELL 32.1 -100.
## 5 2015 4.80e6 ASPERMO~ Texas ASPERMONT 33.1 -100.
## 6 2015 4.80e6 PANTHER~ Texas VALERA 31.6 -99.6
## # ... with 23 more variables: number_of_schools <dbl>, enrollment <dbl>,
## # english_language_learners <dbl>, teachers_total_fte <dbl>,
## # read_test_num_valid <dbl>, read_test_pct_prof_midpt <dbl>,
## # read_test_pct_prof_high <dbl>, read_test_pct_prof_low <dbl>,
## # math_test_num_valid <dbl>, math_test_pct_prof_midpt <dbl>,
## # math_test_pct_prof_high <dbl>, math_test_pct_prof_low <dbl>,
## # rev_total <dbl>, rev_fed_total <dbl>, rev_state_total <dbl>,
## # rev_local_total <dbl>, salaries_total <dbl>,
## # salaries_instruction <dbl>, salaries_teachers_regular_prog <dbl>,
## # salaries_teachers_sped <dbl>, benefits_employee_total <dbl>,
## # teacherstudentratio <dbl>, salaries_teacher_sped <dbl>
tail(Education)
## # A tibble: 6 x 30
## year leaid lea_name state_name city_location latitude longitude
## <dbl> <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 2004 4.85e6 YORKTOW~ Texas YORKTOWN NA NA
## 2 2004 4.85e6 YSLETA ~ Texas EL PASO NA NA
## 3 2004 4.85e6 ZAPATA ~ Texas ZAPATA NA NA
## 4 2004 4.85e6 ZAVALLA~ Texas ZAVALLA NA NA
## 5 2004 4.85e6 ZEPHYR ~ Texas ZEPHYR NA NA
## 6 2004 4.90e6 BENAVID~ Texas BENAVIDES NA NA
## # ... with 23 more variables: number_of_schools <dbl>, enrollment <dbl>,
## # english_language_learners <dbl>, teachers_total_fte <dbl>,
## # read_test_num_valid <dbl>, read_test_pct_prof_midpt <dbl>,
## # read_test_pct_prof_high <dbl>, read_test_pct_prof_low <dbl>,
## # math_test_num_valid <dbl>, math_test_pct_prof_midpt <dbl>,
## # math_test_pct_prof_high <dbl>, math_test_pct_prof_low <dbl>,
## # rev_total <dbl>, rev_fed_total <dbl>, rev_state_total <dbl>,
## # rev_local_total <dbl>, salaries_total <dbl>,
## # salaries_instruction <dbl>, salaries_teachers_regular_prog <dbl>,
## # salaries_teachers_sped <dbl>, benefits_employee_total <dbl>,
## # teacherstudentratio <dbl>, salaries_teacher_sped <dbl>
I also noticed that the majority of the variables are coded as a factor variable. To avoid any confusion, I will converting the appropriate variables to numeric.
library(data.table)
colnames(Education)<-tolower(colnames(Education))
colnames(Education)<-gsub(" ","", colnames(Education))
colnames(Education)<-gsub("_", "", colnames(Education))
colnames(Education)<-gsub("\\.", "", colnames(Education))
colnames(Education)
## [1] "year" "leaid"
## [3] "leaname" "statename"
## [5] "citylocation" "latitude"
## [7] "longitude" "numberofschools"
## [9] "enrollment" "englishlanguagelearners"
## [11] "teacherstotalfte" "readtestnumvalid"
## [13] "readtestpctprofmidpt" "readtestpctprofhigh"
## [15] "readtestpctproflow" "mathtestnumvalid"
## [17] "mathtestpctprofmidpt" "mathtestpctprofhigh"
## [19] "mathtestpctproflow" "revtotal"
## [21] "revfedtotal" "revstatetotal"
## [23] "revlocaltotal" "salariestotal"
## [25] "salariesinstruction" "salariesteachersregularprog"
## [27] "salariesteacherssped" "benefitsemployeetotal"
## [29] "teacherstudentratio" "salariesteachersped"
eliminate <- Education[!is.na(Education$latitude)&!is.na(Education$longitude),]
str(Education)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 12387 obs. of 30 variables:
## $ year : num 2015 2015 2015 2015 2015 ...
## $ leaid : num 4800001 4800002 4800003 4800005 4800006 ...
## $ leaname : chr "CROSBYTON CISD" "SPUR ISD" "ROCKSPRINGS ISD" "BLACKWELL CISD" ...
## $ statename : chr "Texas" "Texas" "Texas" "Texas" ...
## $ citylocation : chr "CROSBYTON" "SPUR" "ROCKSPRINGS" "BLACKWELL" ...
## $ latitude : num 33.7 33.5 30 32.1 33.1 ...
## $ longitude : num -101 -101 -100 -100 -100 ...
## $ numberofschools : num 4 1 1 2 2 1 7 4 24 7 ...
## $ enrollment : num 387 272 266 136 242 ...
## $ englishlanguagelearners : num 4 3 12 3 7 ...
## $ teacherstotalfte : num 36 29 29 22 25 ...
## $ readtestnumvalid : num 241 166 174 80 148 ...
## $ readtestpctprofmidpt : num 63 77 72 87 77 82 90 83 69 65 ...
## $ readtestpctprofhigh : num 63 79 74 89 79 84 90 83 69 65 ...
## $ readtestpctproflow : num 63 75 70 85 75 80 90 83 69 65 ...
## $ mathtestnumvalid : num 209 147 144 68 130 ...
## $ mathtestpctprofmidpt : num 62 67 47 82 82 87 91 82 69 63 ...
## $ mathtestpctprofhigh : num 62 69 49 84 84 89 91 82 69 63 ...
## $ mathtestpctproflow : num 62 65 45 80 80 85 91 82 69 63 ...
## $ revtotal : num 5076000 5111000 4626000 6183000 3338000 ...
## $ revfedtotal : num 571000 222000 329000 50000 145000 ...
## $ revstatetotal : num 3391000 1981000 922000 1098000 242000 ...
## $ revlocaltotal : num 1114000 2908000 3375000 5035000 2951000 ...
## $ salariestotal : num 2823000 2272000 2248000 2020000 1852000 ...
## $ salariesinstruction : num 1830000 1418000 1497000 1293000 1284000 ...
## $ salariesteachersregularprog: num 1274000 1005000 928000 961000 684000 ...
## $ salariesteacherssped : num 67000 38000 118000 46000 83000 ...
## $ benefitsemployeetotal : num 467000 317000 452000 325000 317000 ...
## $ teacherstudentratio : num 0.093 0.107 0.109 0.162 0.103 ...
## $ salariesteachersped : num 67000 38000 118000 46000 83000 ...
## - attr(*, "problems")=Classes 'tbl_df', 'tbl' and 'data.frame': 167 obs. of 5 variables:
## ..$ row : int 1038 1038 1038 1038 1038 1038 1038 1038 1038 1039 ...
## ..$ col : chr "rev_total" "rev_fed_total" "rev_state_total" "rev_local_total" ...
## ..$ expected: chr "a double" "a double" "a double" "a double" ...
## ..$ actual : chr "Missing/not reported" "Missing/not reported" "Missing/not reported" "Missing/not reported" ...
## ..$ file : chr "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ ...
## - attr(*, "spec")=
## .. cols(
## .. year = col_double(),
## .. leaid = col_double(),
## .. lea_name = col_character(),
## .. state_name = col_character(),
## .. city_location = col_character(),
## .. latitude = col_double(),
## .. longitude = col_double(),
## .. number_of_schools = col_double(),
## .. enrollment = col_character(),
## .. english_language_learners = col_character(),
## .. teachers_total_fte = col_character(),
## .. read_test_num_valid = col_double(),
## .. read_test_pct_prof_midpt = col_character(),
## .. read_test_pct_prof_high = col_character(),
## .. read_test_pct_prof_low = col_character(),
## .. math_test_num_valid = col_double(),
## .. math_test_pct_prof_midpt = col_character(),
## .. math_test_pct_prof_high = col_character(),
## .. math_test_pct_prof_low = col_character(),
## .. rev_total = col_double(),
## .. rev_fed_total = col_double(),
## .. rev_state_total = col_double(),
## .. rev_local_total = col_double(),
## .. salaries_total = col_double(),
## .. salaries_instruction = col_double(),
## .. salaries_teachers_regular_prog = col_double(),
## .. salaries_teachers_sped = col_double(),
## .. benefits_employee_total = col_double()
## .. )
https://stackoverflow.com/questions/48696395/leaflet-mixing-continuous-and-discrete-colors
library(raster)
## Loading required package: sp
##
## Attaching package: 'raster'
## The following object is masked from 'package:data.table':
##
## shift
## The following object is masked from 'package:dplyr':
##
## select
Edu <- subset(Education, year==2015)
US <- getData('GADM', country = 'USA', level = 2)
Texas <- subset(US, NAME_1 == "Texas")
plot(Texas)
Edu$englishlearnerratio <- as.numeric(Edu$englishlanguagelearners)/as.numeric(Edu$enrollment)
Edu$englishlearnerratio[Edu$englishlearnerratio > 0.09122] <- "High"
Edu$englishlearnerratio[Edu$englishlearnerratio > 0.06921 & Edu$englishlearnerratio <= 0.09122] <- "Moderate"
Edu$englishlearnerratio[Edu$englishlearnerratio <= 0.06921] <- "Low"
dispal <- colorFactor(palette = c("red", "green", "yellow"), domain = Edu$englishlearnerratio, levels = c("High", "Low", "Moderate"), na.color = "black")
https://cfss.uchicago.edu/notes/leaflet/
districticon <- makeIcon(iconUrl = "C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/districticon.png", iconWidth = 40, iconHeight = 50)
markers <- data.frame(lat = Edu$latitude, lng = Edu$longitude)
leaflet(data = Edu) %>%
addTiles()%>%
addPolygons(data = Texas,
group = "discrete",
stroke = FALSE,
smoothFactor = 0.2,
fillOpacity = 0.1,
fillColor = ~dispal(Edu$englishlearnerratio)) %>%
addMarkers(~longitude, ~latitude,
icon = districticon,
clusterOptions = markerClusterOptions(),
popup = paste("Local Education Agency Name:", Edu$leaname, "<br>", "# of English Language Learners: ", Edu$englishlanguagelearners, "<br>", "Total # of Students Enrolled:", Edu$enrollment)) %>%
addLegend(position = "bottomleft",
pal = dispal,
values = Edu$englishlearnerratio,
title = "English Learner Level",
opacity = 0.3)