Loading and Cleaning the Dataset

Loading the Data

When looking at the structure of the data, I am noticing that there are several variables that have a few rows without any reported data. Per the Education Data Portal, this is expected as information is not available for specific years.

Education <- readr::read_csv("C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/EducationDataPortalDistricts.csv")
## Parsed with column specification:
## cols(
##   .default = col_double(),
##   lea_name = col_character(),
##   state_name = col_character(),
##   city_location = col_character(),
##   enrollment = col_character(),
##   english_language_learners = col_character(),
##   teachers_total_fte = col_character(),
##   read_test_pct_prof_midpt = col_character(),
##   read_test_pct_prof_high = col_character(),
##   read_test_pct_prof_low = col_character(),
##   math_test_pct_prof_midpt = col_character(),
##   math_test_pct_prof_high = col_character(),
##   math_test_pct_prof_low = col_character()
## )
## See spec(...) for full column specifications.
## Warning: 167 parsing failures.
##  row             col expected               actual                                                                                                                                     file
## 1038 rev_total       a double Missing/not reported 'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/EducationDataPortalDistricts.csv'
## 1038 rev_fed_total   a double Missing/not reported 'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/EducationDataPortalDistricts.csv'
## 1038 rev_state_total a double Missing/not reported 'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/EducationDataPortalDistricts.csv'
## 1038 rev_local_total a double Missing/not reported 'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/EducationDataPortalDistricts.csv'
## 1038 salaries_total  a double Missing/not reported 'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/EducationDataPortalDistricts.csv'
## .... ............... ........ .................... ........................................................................................................................................
## See problems(...) for more details.
Education$teacherstudentratio <- (as.numeric(Education$teachers_total_fte)/as.numeric(Education$enrollment))
## Warning: NAs introduced by coercion
## Warning: NAs introduced by coercion
str(Education)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 12387 obs. of  29 variables:
##  $ year                          : num  2015 2015 2015 2015 2015 ...
##  $ leaid                         : num  4800001 4800002 4800003 4800005 4800006 ...
##  $ lea_name                      : chr  "CROSBYTON CISD" "SPUR ISD" "ROCKSPRINGS ISD" "BLACKWELL CISD" ...
##  $ state_name                    : chr  "Texas" "Texas" "Texas" "Texas" ...
##  $ city_location                 : chr  "CROSBYTON" "SPUR" "ROCKSPRINGS" "BLACKWELL" ...
##  $ latitude                      : num  33.7 33.5 30 32.1 33.1 ...
##  $ longitude                     : num  -101 -101 -100 -100 -100 ...
##  $ number_of_schools             : num  4 1 1 2 2 1 7 4 24 7 ...
##  $ enrollment                    : chr  "387" "272" "266" "136" ...
##  $ english_language_learners     : chr  "4" "3" "12" "3" ...
##  $ teachers_total_fte            : chr  "36" "29" "29" "22" ...
##  $ read_test_num_valid           : num  241 166 174 80 148 ...
##  $ read_test_pct_prof_midpt      : chr  "63" "77" "72" "87" ...
##  $ read_test_pct_prof_high       : chr  "63" "79" "74" "89" ...
##  $ read_test_pct_prof_low        : chr  "63" "75" "70" "85" ...
##  $ math_test_num_valid           : num  209 147 144 68 130 ...
##  $ math_test_pct_prof_midpt      : chr  "62" "67" "47" "82" ...
##  $ math_test_pct_prof_high       : chr  "62" "69" "49" "84" ...
##  $ math_test_pct_prof_low        : chr  "62" "65" "45" "80" ...
##  $ rev_total                     : num  5076000 5111000 4626000 6183000 3338000 ...
##  $ rev_fed_total                 : num  571000 222000 329000 50000 145000 ...
##  $ rev_state_total               : num  3391000 1981000 922000 1098000 242000 ...
##  $ rev_local_total               : num  1114000 2908000 3375000 5035000 2951000 ...
##  $ salaries_total                : num  2823000 2272000 2248000 2020000 1852000 ...
##  $ salaries_instruction          : num  1830000 1418000 1497000 1293000 1284000 ...
##  $ salaries_teachers_regular_prog: num  1274000 1005000 928000 961000 684000 ...
##  $ salaries_teachers_sped        : num  67000 38000 118000 46000 83000 ...
##  $ benefits_employee_total       : num  467000 317000 452000 325000 317000 ...
##  $ teacherstudentratio           : num  0.093 0.107 0.109 0.162 0.103 ...
##  - attr(*, "problems")=Classes 'tbl_df', 'tbl' and 'data.frame': 167 obs. of  5 variables:
##   ..$ row     : int  1038 1038 1038 1038 1038 1038 1038 1038 1038 1039 ...
##   ..$ col     : chr  "rev_total" "rev_fed_total" "rev_state_total" "rev_local_total" ...
##   ..$ expected: chr  "a double" "a double" "a double" "a double" ...
##   ..$ actual  : chr  "Missing/not reported" "Missing/not reported" "Missing/not reported" "Missing/not reported" ...
##   ..$ file    : chr  "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   year = col_double(),
##   ..   leaid = col_double(),
##   ..   lea_name = col_character(),
##   ..   state_name = col_character(),
##   ..   city_location = col_character(),
##   ..   latitude = col_double(),
##   ..   longitude = col_double(),
##   ..   number_of_schools = col_double(),
##   ..   enrollment = col_character(),
##   ..   english_language_learners = col_character(),
##   ..   teachers_total_fte = col_character(),
##   ..   read_test_num_valid = col_double(),
##   ..   read_test_pct_prof_midpt = col_character(),
##   ..   read_test_pct_prof_high = col_character(),
##   ..   read_test_pct_prof_low = col_character(),
##   ..   math_test_num_valid = col_double(),
##   ..   math_test_pct_prof_midpt = col_character(),
##   ..   math_test_pct_prof_high = col_character(),
##   ..   math_test_pct_prof_low = col_character(),
##   ..   rev_total = col_double(),
##   ..   rev_fed_total = col_double(),
##   ..   rev_state_total = col_double(),
##   ..   rev_local_total = col_double(),
##   ..   salaries_total = col_double(),
##   ..   salaries_instruction = col_double(),
##   ..   salaries_teachers_regular_prog = col_double(),
##   ..   salaries_teachers_sped = col_double(),
##   ..   benefits_employee_total = col_double()
##   .. )
summary(Education)
##       year          leaid           lea_name          state_name       
##  Min.   :2004   Min.   :4800001   Length:12387       Length:12387      
##  1st Qu.:2006   1st Qu.:4816860   Class :character   Class :character  
##  Median :2009   Median :4826340   Mode  :character   Mode  :character  
##  Mean   :2009   Mean   :4826747                                        
##  3rd Qu.:2012   3rd Qu.:4836960                                        
##  Max.   :2015   Max.   :4899130                                        
##                                                                        
##  city_location         latitude       longitude       number_of_schools
##  Length:12387       Min.   :25.93   Min.   :-106.60   Min.   :  0.000  
##  Class :character   1st Qu.:30.10   1st Qu.: -99.18   1st Qu.:  2.000  
##  Mode  :character   Median :31.83   Median : -97.42   Median :  4.000  
##                     Mean   :31.55   Mean   : -97.83   Mean   :  8.395  
##                     3rd Qu.:33.02   3rd Qu.: -95.97   3rd Qu.:  7.000  
##                     Max.   :36.49   Max.   : -93.67   Max.   :313.000  
##                     NA's   :2077    NA's   :2077      NA's   :14       
##   enrollment        english_language_learners teachers_total_fte
##  Length:12387       Length:12387              Length:12387      
##  Class :character   Class :character          Class :character  
##  Mode  :character   Mode  :character          Mode  :character  
##                                                                 
##                                                                 
##                                                                 
##                                                                 
##  read_test_num_valid read_test_pct_prof_midpt read_test_pct_prof_high
##  Min.   :     4      Length:12387             Length:12387           
##  1st Qu.:   218      Class :character         Class :character       
##  Median :   562      Mode  :character         Mode  :character       
##  Mean   :  2806                                                      
##  3rd Qu.:  1730                                                      
##  Max.   :132328                                                      
##  NA's   :5200                                                        
##  read_test_pct_prof_low math_test_num_valid math_test_pct_prof_midpt
##  Length:12387           Min.   :     4      Length:12387            
##  Class :character       1st Qu.:   197      Class :character        
##  Mode  :character       Median :   515      Mode  :character        
##                         Mean   :  2543                              
##                         3rd Qu.:  1599                              
##                         Max.   :122246                              
##                         NA's   :5200                                
##  math_test_pct_prof_high math_test_pct_prof_low   rev_total        
##  Length:12387            Length:12387           Min.   :2.970e+05  
##  Class :character        Class :character       1st Qu.:4.935e+06  
##  Mode  :character        Mode  :character       Median :1.102e+07  
##                                                 Mean   :4.692e+07  
##                                                 3rd Qu.:3.090e+07  
##                                                 Max.   :2.480e+09  
##                                                 NA's   :33         
##  rev_fed_total       rev_state_total     rev_local_total    
##  Min.   :        0   Min.   :    18000   Min.   :0.000e+00  
##  1st Qu.:   302000   1st Qu.:  1828000   1st Qu.:1.762e+06  
##  Median :   919000   Median :  4644500   Median :4.574e+06  
##  Mean   :  5235680   Mean   : 17983539   Mean   :2.370e+07  
##  3rd Qu.:  3169000   3rd Qu.: 11746000   3rd Qu.:1.447e+07  
##  Max.   :428684000   Max.   :597926000   Max.   :1.843e+09  
##  NA's   :33          NA's   :33          NA's   :33         
##  salaries_total      salaries_instruction salaries_teachers_regular_prog
##  Min.   :0.000e+00   Min.   :        0    Min.   :        0             
##  1st Qu.:2.400e+06   1st Qu.:  1654000    1st Qu.:        0             
##  Median :5.762e+06   Median :  3949000    Median :        0             
##  Mean   :2.611e+07   Mean   : 17727361    Mean   :  4667369             
##  3rd Qu.:1.606e+07   3rd Qu.: 10992500    3rd Qu.:  1929000             
##  Max.   :1.256e+09   Max.   :824620000    Max.   :482246000             
##  NA's   :33          NA's   :33           NA's   :33                    
##  salaries_teachers_sped benefits_employee_total teacherstudentratio
##  Min.   :       0       Min.   :        0       Min.   :0.00000    
##  1st Qu.:       0       1st Qu.:   392000       1st Qu.:0.06921    
##  Median :       0       Median :   966500       Median :0.07869    
##  Mean   :  593797       Mean   :  4470621       Mean   :0.08302    
##  3rd Qu.:  157750       3rd Qu.:  2621750       3rd Qu.:0.09122    
##  Max.   :60292000       Max.   :223138000       Max.   :0.35714    
##  NA's   :33             NA's   :33              NA's   :100
dim(Education)
## [1] 12387    29
Education$english_language_learners = as.numeric(Education$english_language_learners)
## Warning: NAs introduced by coercion
Education$teachers_total_fte = as.numeric(Education$teachers_total_fte)
## Warning: NAs introduced by coercion
Education$read_test_pct_prof_midpt = as.numeric(Education$read_test_pct_prof_midpt)
## Warning: NAs introduced by coercion
Education$read_test_pct_prof_high = as.numeric(Education$read_test_pct_prof_high)
## Warning: NAs introduced by coercion
Education$read_test_pct_prof_low = as.numeric(Education$read_test_pct_prof_low)
## Warning: NAs introduced by coercion
Education$math_test_pct_prof_midpt = as.numeric(Education$math_test_pct_prof_midpt)
## Warning: NAs introduced by coercion
Education$math_test_pct_prof_high = as.numeric(Education$math_test_pct_prof_high)
## Warning: NAs introduced by coercion
Education$math_test_pct_prof_low = as.numeric(Education$math_test_pct_prof_low)
## Warning: NAs introduced by coercion
Education$rev_fed_total = as.numeric(Education$rev_fed_total)
Education$rev_state_total = as.numeric(Education$rev_state_total)
Education$rev_local_total = as.numeric(Education$rev_local_total)
Education$salaries_total = as.numeric(Education$salaries_total)
Education$salaries_teachers_regular_prog = as.numeric(Education$salaries_teachers_regular_prog)
Education$benefits_employee_total = as.numeric(Education$benefits_employee_total)
Education$year = as.numeric(Education$year)
Education$leaid = as.numeric(Education$leaid)
Education$read_test_num_valid = as.numeric(Education$read_test_num_valid)
Education$math_test_num_valid = as.numeric(Education$math_test_num_valid)
Education$lea_name = as.character(Education$lea_name)
Education$state_name = as.character(Education$state_name) 
Education$city_location = as.character(Education$city_location)
Education$enrollment = as.numeric(Education$enrollment)
## Warning: NAs introduced by coercion
Education$rev_total = as.numeric(Education$rev_total)
Education$salaries_instruction = as.numeric(Education$salaries_instruction) 
Education$salaries_teacher_sped = as.numeric(Education$salaries_teachers_sped)
str(Education)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 12387 obs. of  30 variables:
##  $ year                          : num  2015 2015 2015 2015 2015 ...
##  $ leaid                         : num  4800001 4800002 4800003 4800005 4800006 ...
##  $ lea_name                      : chr  "CROSBYTON CISD" "SPUR ISD" "ROCKSPRINGS ISD" "BLACKWELL CISD" ...
##  $ state_name                    : chr  "Texas" "Texas" "Texas" "Texas" ...
##  $ city_location                 : chr  "CROSBYTON" "SPUR" "ROCKSPRINGS" "BLACKWELL" ...
##  $ latitude                      : num  33.7 33.5 30 32.1 33.1 ...
##  $ longitude                     : num  -101 -101 -100 -100 -100 ...
##  $ number_of_schools             : num  4 1 1 2 2 1 7 4 24 7 ...
##  $ enrollment                    : num  387 272 266 136 242 ...
##  $ english_language_learners     : num  4 3 12 3 7 ...
##  $ teachers_total_fte            : num  36 29 29 22 25 ...
##  $ read_test_num_valid           : num  241 166 174 80 148 ...
##  $ read_test_pct_prof_midpt      : num  63 77 72 87 77 82 90 83 69 65 ...
##  $ read_test_pct_prof_high       : num  63 79 74 89 79 84 90 83 69 65 ...
##  $ read_test_pct_prof_low        : num  63 75 70 85 75 80 90 83 69 65 ...
##  $ math_test_num_valid           : num  209 147 144 68 130 ...
##  $ math_test_pct_prof_midpt      : num  62 67 47 82 82 87 91 82 69 63 ...
##  $ math_test_pct_prof_high       : num  62 69 49 84 84 89 91 82 69 63 ...
##  $ math_test_pct_prof_low        : num  62 65 45 80 80 85 91 82 69 63 ...
##  $ rev_total                     : num  5076000 5111000 4626000 6183000 3338000 ...
##  $ rev_fed_total                 : num  571000 222000 329000 50000 145000 ...
##  $ rev_state_total               : num  3391000 1981000 922000 1098000 242000 ...
##  $ rev_local_total               : num  1114000 2908000 3375000 5035000 2951000 ...
##  $ salaries_total                : num  2823000 2272000 2248000 2020000 1852000 ...
##  $ salaries_instruction          : num  1830000 1418000 1497000 1293000 1284000 ...
##  $ salaries_teachers_regular_prog: num  1274000 1005000 928000 961000 684000 ...
##  $ salaries_teachers_sped        : num  67000 38000 118000 46000 83000 ...
##  $ benefits_employee_total       : num  467000 317000 452000 325000 317000 ...
##  $ teacherstudentratio           : num  0.093 0.107 0.109 0.162 0.103 ...
##  $ salaries_teacher_sped         : num  67000 38000 118000 46000 83000 ...
##  - attr(*, "problems")=Classes 'tbl_df', 'tbl' and 'data.frame': 167 obs. of  5 variables:
##   ..$ row     : int  1038 1038 1038 1038 1038 1038 1038 1038 1038 1039 ...
##   ..$ col     : chr  "rev_total" "rev_fed_total" "rev_state_total" "rev_local_total" ...
##   ..$ expected: chr  "a double" "a double" "a double" "a double" ...
##   ..$ actual  : chr  "Missing/not reported" "Missing/not reported" "Missing/not reported" "Missing/not reported" ...
##   ..$ file    : chr  "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   year = col_double(),
##   ..   leaid = col_double(),
##   ..   lea_name = col_character(),
##   ..   state_name = col_character(),
##   ..   city_location = col_character(),
##   ..   latitude = col_double(),
##   ..   longitude = col_double(),
##   ..   number_of_schools = col_double(),
##   ..   enrollment = col_character(),
##   ..   english_language_learners = col_character(),
##   ..   teachers_total_fte = col_character(),
##   ..   read_test_num_valid = col_double(),
##   ..   read_test_pct_prof_midpt = col_character(),
##   ..   read_test_pct_prof_high = col_character(),
##   ..   read_test_pct_prof_low = col_character(),
##   ..   math_test_num_valid = col_double(),
##   ..   math_test_pct_prof_midpt = col_character(),
##   ..   math_test_pct_prof_high = col_character(),
##   ..   math_test_pct_prof_low = col_character(),
##   ..   rev_total = col_double(),
##   ..   rev_fed_total = col_double(),
##   ..   rev_state_total = col_double(),
##   ..   rev_local_total = col_double(),
##   ..   salaries_total = col_double(),
##   ..   salaries_instruction = col_double(),
##   ..   salaries_teachers_regular_prog = col_double(),
##   ..   salaries_teachers_sped = col_double(),
##   ..   benefits_employee_total = col_double()
##   .. )

One thing to note is that Teacher’s Salaries & funding information were not recorded for 2016. We will need to complete the analysis for the student:teacher ratio vs. the average teacher salary & federal funding for 2006 - 2015.

na.omit(Education)
## # A tibble: 7,001 x 30
##     year  leaid lea_name state_name city_location latitude longitude
##    <dbl>  <dbl> <chr>    <chr>      <chr>            <dbl>     <dbl>
##  1  2015 4.80e6 CROSBYT~ Texas      CROSBYTON         33.7    -101. 
##  2  2015 4.80e6 SPUR ISD Texas      SPUR              33.5    -101. 
##  3  2015 4.80e6 ROCKSPR~ Texas      ROCKSPRINGS       30.0    -100. 
##  4  2015 4.80e6 BLACKWE~ Texas      BLACKWELL         32.1    -100. 
##  5  2015 4.80e6 ASPERMO~ Texas      ASPERMONT         33.1    -100. 
##  6  2015 4.80e6 DRIPPIN~ Texas      DRIPPING SPR~     30.2     -98.1
##  7  2015 4.80e6 WIMBERL~ Texas      WIMBERLEY         30.0     -98.1
##  8  2015 4.80e6 HAYS CI~ Texas      KYLE              30.0     -97.9
##  9  2015 4.80e6 ALVARAD~ Texas      ALVARADO          32.4     -97.2
## 10  2015 4.80e6 CHISUM ~ Texas      PARIS             33.6     -95.6
## # ... with 6,991 more rows, and 23 more variables:
## #   number_of_schools <dbl>, enrollment <dbl>,
## #   english_language_learners <dbl>, teachers_total_fte <dbl>,
## #   read_test_num_valid <dbl>, read_test_pct_prof_midpt <dbl>,
## #   read_test_pct_prof_high <dbl>, read_test_pct_prof_low <dbl>,
## #   math_test_num_valid <dbl>, math_test_pct_prof_midpt <dbl>,
## #   math_test_pct_prof_high <dbl>, math_test_pct_prof_low <dbl>,
## #   rev_total <dbl>, rev_fed_total <dbl>, rev_state_total <dbl>,
## #   rev_local_total <dbl>, salaries_total <dbl>,
## #   salaries_instruction <dbl>, salaries_teachers_regular_prog <dbl>,
## #   salaries_teachers_sped <dbl>, benefits_employee_total <dbl>,
## #   teacherstudentratio <dbl>, salaries_teacher_sped <dbl>
dim(Education)
## [1] 12387    30
head(Education)
## # A tibble: 6 x 30
##    year  leaid lea_name state_name city_location latitude longitude
##   <dbl>  <dbl> <chr>    <chr>      <chr>            <dbl>     <dbl>
## 1  2015 4.80e6 CROSBYT~ Texas      CROSBYTON         33.7    -101. 
## 2  2015 4.80e6 SPUR ISD Texas      SPUR              33.5    -101. 
## 3  2015 4.80e6 ROCKSPR~ Texas      ROCKSPRINGS       30.0    -100. 
## 4  2015 4.80e6 BLACKWE~ Texas      BLACKWELL         32.1    -100. 
## 5  2015 4.80e6 ASPERMO~ Texas      ASPERMONT         33.1    -100. 
## 6  2015 4.80e6 PANTHER~ Texas      VALERA            31.6     -99.6
## # ... with 23 more variables: number_of_schools <dbl>, enrollment <dbl>,
## #   english_language_learners <dbl>, teachers_total_fte <dbl>,
## #   read_test_num_valid <dbl>, read_test_pct_prof_midpt <dbl>,
## #   read_test_pct_prof_high <dbl>, read_test_pct_prof_low <dbl>,
## #   math_test_num_valid <dbl>, math_test_pct_prof_midpt <dbl>,
## #   math_test_pct_prof_high <dbl>, math_test_pct_prof_low <dbl>,
## #   rev_total <dbl>, rev_fed_total <dbl>, rev_state_total <dbl>,
## #   rev_local_total <dbl>, salaries_total <dbl>,
## #   salaries_instruction <dbl>, salaries_teachers_regular_prog <dbl>,
## #   salaries_teachers_sped <dbl>, benefits_employee_total <dbl>,
## #   teacherstudentratio <dbl>, salaries_teacher_sped <dbl>
tail(Education)
## # A tibble: 6 x 30
##    year  leaid lea_name state_name city_location latitude longitude
##   <dbl>  <dbl> <chr>    <chr>      <chr>            <dbl>     <dbl>
## 1  2004 4.85e6 YORKTOW~ Texas      YORKTOWN            NA        NA
## 2  2004 4.85e6 YSLETA ~ Texas      EL PASO             NA        NA
## 3  2004 4.85e6 ZAPATA ~ Texas      ZAPATA              NA        NA
## 4  2004 4.85e6 ZAVALLA~ Texas      ZAVALLA             NA        NA
## 5  2004 4.85e6 ZEPHYR ~ Texas      ZEPHYR              NA        NA
## 6  2004 4.90e6 BENAVID~ Texas      BENAVIDES           NA        NA
## # ... with 23 more variables: number_of_schools <dbl>, enrollment <dbl>,
## #   english_language_learners <dbl>, teachers_total_fte <dbl>,
## #   read_test_num_valid <dbl>, read_test_pct_prof_midpt <dbl>,
## #   read_test_pct_prof_high <dbl>, read_test_pct_prof_low <dbl>,
## #   math_test_num_valid <dbl>, math_test_pct_prof_midpt <dbl>,
## #   math_test_pct_prof_high <dbl>, math_test_pct_prof_low <dbl>,
## #   rev_total <dbl>, rev_fed_total <dbl>, rev_state_total <dbl>,
## #   rev_local_total <dbl>, salaries_total <dbl>,
## #   salaries_instruction <dbl>, salaries_teachers_regular_prog <dbl>,
## #   salaries_teachers_sped <dbl>, benefits_employee_total <dbl>,
## #   teacherstudentratio <dbl>, salaries_teacher_sped <dbl>

I also noticed that the majority of the variables are coded as a factor variable. To avoid any confusion, I will converting the appropriate variables to numeric.

Cleaning the Data

library(data.table)
colnames(Education)<-tolower(colnames(Education))
colnames(Education)<-gsub(" ","", colnames(Education))
colnames(Education)<-gsub("_", "", colnames(Education))
colnames(Education)<-gsub("\\.", "", colnames(Education))
colnames(Education)
##  [1] "year"                        "leaid"                      
##  [3] "leaname"                     "statename"                  
##  [5] "citylocation"                "latitude"                   
##  [7] "longitude"                   "numberofschools"            
##  [9] "enrollment"                  "englishlanguagelearners"    
## [11] "teacherstotalfte"            "readtestnumvalid"           
## [13] "readtestpctprofmidpt"        "readtestpctprofhigh"        
## [15] "readtestpctproflow"          "mathtestnumvalid"           
## [17] "mathtestpctprofmidpt"        "mathtestpctprofhigh"        
## [19] "mathtestpctproflow"          "revtotal"                   
## [21] "revfedtotal"                 "revstatetotal"              
## [23] "revlocaltotal"               "salariestotal"              
## [25] "salariesinstruction"         "salariesteachersregularprog"
## [27] "salariesteacherssped"        "benefitsemployeetotal"      
## [29] "teacherstudentratio"         "salariesteachersped"
eliminate <- Education[!is.na(Education$latitude)&!is.na(Education$longitude),]
str(Education)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 12387 obs. of  30 variables:
##  $ year                       : num  2015 2015 2015 2015 2015 ...
##  $ leaid                      : num  4800001 4800002 4800003 4800005 4800006 ...
##  $ leaname                    : chr  "CROSBYTON CISD" "SPUR ISD" "ROCKSPRINGS ISD" "BLACKWELL CISD" ...
##  $ statename                  : chr  "Texas" "Texas" "Texas" "Texas" ...
##  $ citylocation               : chr  "CROSBYTON" "SPUR" "ROCKSPRINGS" "BLACKWELL" ...
##  $ latitude                   : num  33.7 33.5 30 32.1 33.1 ...
##  $ longitude                  : num  -101 -101 -100 -100 -100 ...
##  $ numberofschools            : num  4 1 1 2 2 1 7 4 24 7 ...
##  $ enrollment                 : num  387 272 266 136 242 ...
##  $ englishlanguagelearners    : num  4 3 12 3 7 ...
##  $ teacherstotalfte           : num  36 29 29 22 25 ...
##  $ readtestnumvalid           : num  241 166 174 80 148 ...
##  $ readtestpctprofmidpt       : num  63 77 72 87 77 82 90 83 69 65 ...
##  $ readtestpctprofhigh        : num  63 79 74 89 79 84 90 83 69 65 ...
##  $ readtestpctproflow         : num  63 75 70 85 75 80 90 83 69 65 ...
##  $ mathtestnumvalid           : num  209 147 144 68 130 ...
##  $ mathtestpctprofmidpt       : num  62 67 47 82 82 87 91 82 69 63 ...
##  $ mathtestpctprofhigh        : num  62 69 49 84 84 89 91 82 69 63 ...
##  $ mathtestpctproflow         : num  62 65 45 80 80 85 91 82 69 63 ...
##  $ revtotal                   : num  5076000 5111000 4626000 6183000 3338000 ...
##  $ revfedtotal                : num  571000 222000 329000 50000 145000 ...
##  $ revstatetotal              : num  3391000 1981000 922000 1098000 242000 ...
##  $ revlocaltotal              : num  1114000 2908000 3375000 5035000 2951000 ...
##  $ salariestotal              : num  2823000 2272000 2248000 2020000 1852000 ...
##  $ salariesinstruction        : num  1830000 1418000 1497000 1293000 1284000 ...
##  $ salariesteachersregularprog: num  1274000 1005000 928000 961000 684000 ...
##  $ salariesteacherssped       : num  67000 38000 118000 46000 83000 ...
##  $ benefitsemployeetotal      : num  467000 317000 452000 325000 317000 ...
##  $ teacherstudentratio        : num  0.093 0.107 0.109 0.162 0.103 ...
##  $ salariesteachersped        : num  67000 38000 118000 46000 83000 ...
##  - attr(*, "problems")=Classes 'tbl_df', 'tbl' and 'data.frame': 167 obs. of  5 variables:
##   ..$ row     : int  1038 1038 1038 1038 1038 1038 1038 1038 1038 1039 ...
##   ..$ col     : chr  "rev_total" "rev_fed_total" "rev_state_total" "rev_local_total" ...
##   ..$ expected: chr  "a double" "a double" "a double" "a double" ...
##   ..$ actual  : chr  "Missing/not reported" "Missing/not reported" "Missing/not reported" "Missing/not reported" ...
##   ..$ file    : chr  "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ "'C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/Educatio"| __truncated__ ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   year = col_double(),
##   ..   leaid = col_double(),
##   ..   lea_name = col_character(),
##   ..   state_name = col_character(),
##   ..   city_location = col_character(),
##   ..   latitude = col_double(),
##   ..   longitude = col_double(),
##   ..   number_of_schools = col_double(),
##   ..   enrollment = col_character(),
##   ..   english_language_learners = col_character(),
##   ..   teachers_total_fte = col_character(),
##   ..   read_test_num_valid = col_double(),
##   ..   read_test_pct_prof_midpt = col_character(),
##   ..   read_test_pct_prof_high = col_character(),
##   ..   read_test_pct_prof_low = col_character(),
##   ..   math_test_num_valid = col_double(),
##   ..   math_test_pct_prof_midpt = col_character(),
##   ..   math_test_pct_prof_high = col_character(),
##   ..   math_test_pct_prof_low = col_character(),
##   ..   rev_total = col_double(),
##   ..   rev_fed_total = col_double(),
##   ..   rev_state_total = col_double(),
##   ..   rev_local_total = col_double(),
##   ..   salaries_total = col_double(),
##   ..   salaries_instruction = col_double(),
##   ..   salaries_teachers_regular_prog = col_double(),
##   ..   salaries_teachers_sped = col_double(),
##   ..   benefits_employee_total = col_double()
##   .. )

https://stackoverflow.com/questions/48696395/leaflet-mixing-continuous-and-discrete-colors

library(raster)
## Loading required package: sp
## 
## Attaching package: 'raster'
## The following object is masked from 'package:data.table':
## 
##     shift
## The following object is masked from 'package:dplyr':
## 
##     select
Edu <- subset(Education, year==2015)

US <- getData('GADM', country = 'USA', level = 2)
Texas <- subset(US, NAME_1 == "Texas")
plot(Texas)

Edu$englishlearnerratio <- as.numeric(Edu$englishlanguagelearners)/as.numeric(Edu$enrollment)

Edu$englishlearnerratio[Edu$englishlearnerratio > 0.09122] <- "High"
Edu$englishlearnerratio[Edu$englishlearnerratio > 0.06921 & Edu$englishlearnerratio <= 0.09122] <- "Moderate"
Edu$englishlearnerratio[Edu$englishlearnerratio <= 0.06921] <- "Low"

dispal <- colorFactor(palette = c("red", "green", "yellow"), domain = Edu$englishlearnerratio, levels = c("High", "Low", "Moderate"), na.color = "black") 

https://cfss.uchicago.edu/notes/leaflet/

districticon <- makeIcon(iconUrl = "C:/Users/selen/OneDrive/Documents/Fall 2019 - MSDA/Data Visualization and Communication/Final Project/districticon.png", iconWidth = 40, iconHeight = 50)

markers <- data.frame(lat = Edu$latitude, lng = Edu$longitude)

leaflet(data = Edu) %>%
  addTiles()%>% 
  addPolygons(data = Texas,
              group = "discrete",
              stroke = FALSE, 
              smoothFactor = 0.2, 
              fillOpacity = 0.1,
              fillColor = ~dispal(Edu$englishlearnerratio)) %>%
  addMarkers(~longitude, ~latitude, 
    icon = districticon, 
             clusterOptions = markerClusterOptions(),
             popup = paste("Local Education Agency Name:", Edu$leaname, "<br>", "# of English Language Learners: ", Edu$englishlanguagelearners, "<br>", "Total # of Students Enrolled:", Edu$enrollment)) %>% 
  addLegend(position = "bottomleft", 
            pal = dispal, 
            values = Edu$englishlearnerratio, 
            title = "English Learner Level", 
            opacity = 0.3)