library(kableExtra)
library(stringr)
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1 v readr 1.3.1
## v tibble 2.1.3 v purrr 0.3.2
## v tidyr 0.8.3 v dplyr 0.8.3
## v ggplot2 3.2.1 v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag() masks stats::lag()
#DOE Demographics
demographics_DOE <- read.csv("demographic-snapshot-2014-15-to-2018-19-(public).csv", header = TRUE, stringsAsFactors = FALSE, sep = ",")
head(demographics_DOE)
## ï..DBN School.Name Year Total.Enrollment
## 1 01M015 P.S. 015 Roberto Clemente 2014-15 183
## 2 01M015 P.S. 015 Roberto Clemente 2015-16 176
## 3 01M015 P.S. 015 Roberto Clemente 2016-17 178
## 4 01M015 P.S. 015 Roberto Clemente 2017-18 190
## 5 01M015 P.S. 015 Roberto Clemente 2018-19 174
## 6 01M019 P.S. 019 Asher Levy 2014-15 270
## Grade.PK..Half.Day...Full.Day. Grade.K Grade.1 Grade.2 Grade.3 Grade.4
## 1 18 27 47 31 19 17
## 2 14 32 33 39 23 17
## 3 17 28 33 27 31 24
## 4 17 28 32 33 23 31
## 5 13 20 33 30 30 20
## 6 30 44 40 39 35 40
## Grade.5 Grade.6 Grade.7 Grade.8 Grade.9 Grade.10 Grade.11 Grade.12
## 1 24 0 0 0 0 0 0 0
## 2 18 0 0 0 0 0 0 0
## 3 18 0 0 0 0 0 0 0
## 4 26 0 0 0 0 0 0 0
## 5 28 0 0 0 0 0 0 0
## 6 42 0 0 0 0 0 0 0
## X..Female X..Female.1 X..Male X..Male.1 X..Asian X..Asian.1 X..Black
## 1 84 45.9% 99 54.1% 8 4.4% 65
## 2 83 47.2% 93 52.8% 9 5.1% 57
## 3 83 46.6% 95 53.4% 14 7.9% 51
## 4 99 52.1% 91 47.9% 20 10.5% 52
## 5 85 48.9% 89 51.1% 24 13.8% 48
## 6 132 48.9% 138 51.1% 30 11.1% 47
## X..Black.1 X..Hispanic X..Hispanic.1
## 1 35.5% 107 58.5%
## 2 32.4% 105 59.7%
## 3 28.7% 105 59.0%
## 4 27.4% 110 57.9%
## 5 27.6% 95 54.6%
## 6 17.4% 158 58.5%
## X..Multiple.Race.Categories.Not.Represented
## 1 1
## 2 3
## 3 4
## 4 2
## 5 1
## 6 8
## X..Multiple.Race.Categories.Not.Represented.1 X..White X..White.1
## 1 0.5% 2 1.1%
## 2 1.7% 2 1.1%
## 3 2.2% 4 2.2%
## 4 1.1% 6 3.2%
## 5 0.6% 6 3.4%
## 6 3.0% 27 10.0%
## X..Students.with.Disabilities X..Students.with.Disabilities.1
## 1 64 35.0%
## 2 60 34.1%
## 3 51 28.7%
## 4 49 25.8%
## 5 38 21.8%
## 6 82 30.4%
## X..English.Language.Learners X..English.Language.Learners.1 X..Poverty
## 1 17 9.3% 169
## 2 16 9.1% 149
## 3 12 6.7% 152
## 4 8 4.2% 161
## 5 8 4.6% 145
## 6 18 6.7% 200
## X..Poverty.1 Economic.Need.Index
## 1 92.3% 93.0%
## 2 84.7% 88.9%
## 3 85.4% 88.2%
## 4 84.7% 89.0%
## 5 83.3% 88.0%
## 6 74.1% 60.5%
# Column titles
names(demographics_DOE)
## [1] "ï..DBN"
## [2] "School.Name"
## [3] "Year"
## [4] "Total.Enrollment"
## [5] "Grade.PK..Half.Day...Full.Day."
## [6] "Grade.K"
## [7] "Grade.1"
## [8] "Grade.2"
## [9] "Grade.3"
## [10] "Grade.4"
## [11] "Grade.5"
## [12] "Grade.6"
## [13] "Grade.7"
## [14] "Grade.8"
## [15] "Grade.9"
## [16] "Grade.10"
## [17] "Grade.11"
## [18] "Grade.12"
## [19] "X..Female"
## [20] "X..Female.1"
## [21] "X..Male"
## [22] "X..Male.1"
## [23] "X..Asian"
## [24] "X..Asian.1"
## [25] "X..Black"
## [26] "X..Black.1"
## [27] "X..Hispanic"
## [28] "X..Hispanic.1"
## [29] "X..Multiple.Race.Categories.Not.Represented"
## [30] "X..Multiple.Race.Categories.Not.Represented.1"
## [31] "X..White"
## [32] "X..White.1"
## [33] "X..Students.with.Disabilities"
## [34] "X..Students.with.Disabilities.1"
## [35] "X..English.Language.Learners"
## [36] "X..English.Language.Learners.1"
## [37] "X..Poverty"
## [38] "X..Poverty.1"
## [39] "Economic.Need.Index"
#Creating a subset
subset <- demographics_DOE[, c(1,2,3,9,38)]
head(subset,5)
## ï..DBN School.Name Year Grade.3 X..Poverty.1
## 1 01M015 P.S. 015 Roberto Clemente 2014-15 19 92.3%
## 2 01M015 P.S. 015 Roberto Clemente 2015-16 23 84.7%
## 3 01M015 P.S. 015 Roberto Clemente 2016-17 31 85.4%
## 4 01M015 P.S. 015 Roberto Clemente 2017-18 23 84.7%
## 5 01M015 P.S. 015 Roberto Clemente 2018-19 30 83.3%
# rename columns
#subset with 8973 entries
names(subset)[1] <- "DBN"
names(subset)[4] <- "Grade3"
names(subset)[5] <- "%Poverty"
head(subset)
## DBN School.Name Year Grade3 %Poverty
## 1 01M015 P.S. 015 Roberto Clemente 2014-15 19 92.3%
## 2 01M015 P.S. 015 Roberto Clemente 2015-16 23 84.7%
## 3 01M015 P.S. 015 Roberto Clemente 2016-17 31 85.4%
## 4 01M015 P.S. 015 Roberto Clemente 2017-18 23 84.7%
## 5 01M015 P.S. 015 Roberto Clemente 2018-19 30 83.3%
## 6 01M019 P.S. 019 Asher Levy 2014-15 35 74.1%
# Subset for Brooklyn Schools Grade 3 with poverty level in year 2018-2019 with 569 Entries
brooklyn_demographics <- subset %>%
filter(str_detect(DBN, "K"))%>%
filter(Year == "2018-19")
head(brooklyn_demographics)
## DBN School.Name Year Grade3 %Poverty
## 1 13K003 P.S. 003 The Bedford Village 2018-19 48 92.5%
## 2 13K008 P.S. 008 Robert Fulton 2018-19 89 19.5%
## 3 13K009 P.S. 009 Teunis G. Bergen 2018-19 132 35.8%
## 4 13K011 P.S. 011 Purvis J. Behan 2018-19 136 39.3%
## 5 13K020 P.S. 020 Clinton Hill 2018-19 67 58.4%
## 6 13K044 P.S. 044 Marcus Garvey 2018-19 15 95.0%
#ELA Demographics
demographics_ELA <- read.csv("school-ela-results-2013-2019-(public).csv", header = TRUE, stringsAsFactors = FALSE, sep = ",")
head(demographics_ELA)
## ï.. DBN School.Name Grade Year
## 1 01M01532013All Students 01M015 P.S. 015 ROBERTO CLEMENTE 3 2013
## 2 01M01532014All Students 01M015 P.S. 015 ROBERTO CLEMENTE 3 2014
## 3 01M01532015All Students 01M015 P.S. 015 ROBERTO CLEMENTE 3 2015
## 4 01M01532016All Students 01M015 P.S. 015 ROBERTO CLEMENTE 3 2016
## 5 01M01532017All Students 01M015 P.S. 015 ROBERTO CLEMENTE 3 2017
## 6 01M01532018All Students 01M015 P.S. 015 ROBERTO CLEMENTE 3 2018
## Category Number.Tested Mean.Scale.Score X..Level.1 X..Level.1.1
## 1 All Students 27 289 14 51.9
## 2 All Students 18 285 10 55.6
## 3 All Students 16 282 9 56.3
## 4 All Students 20 293 10 50.0
## 5 All Students 27 302 10 37.0
## 6 All Students 20 613 0 0.0
## X..Level.2 X..Level.2.1 X..Level.3 X..Level.3.1 X..Level.4 X..Level.4.1
## 1 11 40.7 2 7.4 0 0.0
## 2 8 44.4 0 0.0 0 0.0
## 3 5 31.3 2 12.5 0 0.0
## 4 6 30.0 4 20.0 0 0.0
## 5 8 29.6 7 25.9 2 7.4
## 6 4 20.0 13 65.0 3 15.0
## X..Level.3.4 X..Level.3.4.1
## 1 2 7.4
## 2 0 0.0
## 3 2 12.5
## 4 4 20.0
## 5 9 33.3
## 6 16 80.0
#Creating a subset for ELA with Level 3 and 4 score % in Grade 3
subset2 <- demographics_ELA[, c(2,3,4,5,14,16)]
names(subset2)[5] <- "%Level3"
names(subset2)[6] <- "%Level4"
head(subset2)
## DBN School.Name Grade Year %Level3 %Level4
## 1 01M015 P.S. 015 ROBERTO CLEMENTE 3 2013 7.4 0.0
## 2 01M015 P.S. 015 ROBERTO CLEMENTE 3 2014 0.0 0.0
## 3 01M015 P.S. 015 ROBERTO CLEMENTE 3 2015 12.5 0.0
## 4 01M015 P.S. 015 ROBERTO CLEMENTE 3 2016 20.0 0.0
## 5 01M015 P.S. 015 ROBERTO CLEMENTE 3 2017 25.9 7.4
## 6 01M015 P.S. 015 ROBERTO CLEMENTE 3 2018 65.0 15.0
# All the Brooklyn Borough school ELA results with level 3 and 4 in grade 3 in 2018
brooklyn_ELA_Grade3 <- subset2 %>%
filter(str_detect(DBN, "K"))%>%
filter(Grade == 3)%>%
filter(Year == "2018")
head(brooklyn_ELA_Grade3)
## DBN School.Name Grade Year %Level3 %Level4
## 1 13K003 P.S. 003 THE BEDFORD VILLAGE 3 2018 33.3 2.2
## 2 13K008 P.S. 008 ROBERT FULTON 3 2018 70.2 14.9
## 3 13K009 P.S. 009 TEUNIS G. BERGEN 3 2018 50.5 18.7
## 4 13K011 P.S. 011 PURVIS J. BEHAN 3 2018 55.5 25.5
## 5 13K020 P.S. 020 CLINTON HILL 3 2018 50.7 17.9
## 6 13K044 P.S. 044 MARCUS GARVEY 3 2018 11.8 0.0
#Merge two subsets of Brooklyn DOE demographics and Brooklyn ELA scores for Grade 3. This Gives us the proficiency of ELA in Brooklyn schools Grade 3 students.
brooklyn_ELA_Grade3_prof<- merge(brooklyn_ELA_Grade3, brooklyn_demographics, by = "School.Name")
brooklyn_ELA_Grade3_prof
## School.Name DBN.x Grade Year.x %Level3 %Level4 DBN.y Year.y Grade3
## 1 M.S. K394 17K394 3 2018 29.0 0.0 17K394 2018-19 39
## 2 P.S. 024 15K024 3 2018 14.6 3.4 15K024 2018-19 93
## 3 P.S. 065 19K065 3 2018 52.3 5.8 19K065 2018-19 104
## 4 P.S. 109 22K109 3 2018 17.9 1.8 22K109 2018-19 60
## 5 P.S. 245 22K245 3 2018 39.5 0.0 22K245 2018-19 47
## 6 P.S. 253 21K253 3 2018 71.9 13.7 21K253 2018-19 138
## 7 P.S. 295 15K295 3 2018 36.0 14.0 15K295 2018-19 71
## 8 P.S. 376 32K376 3 2018 36.1 31.3 32K376 2018-19 102
## 9 P.S. 66 18K066 3 2018 48.6 2.9 18K066 2018-19 66
## 10 P.S. K134 22K134 3 2018 55.6 27.3 22K134 2018-19 100
## 11 P.S. K315 22K315 3 2018 46.0 6.2 22K315 2018-19 91
## 12 P.S./I.S. 323 23K323 3 2018 23.9 0.0 23K323 2018-19 55
## %Poverty
## 1 85.2%
## 2 93.8%
## 3 81.6%
## 4 84.0%
## 5 95.2%
## 6 79.4%
## 7 39.0%
## 8 82.4%
## 9 67.5%
## 10 88.4%
## 11 87.7%
## 12 93.8%
#Re-arranging the Brooklyn school ELA Grade 3 proficiency subset with required columns.
brooklyn_ELA_Grade3_prof <- brooklyn_ELA_Grade3_prof[,c(1,2,5,6,9,10)]
brooklyn_ELA_Grade3_prof
## School.Name DBN.x %Level3 %Level4 Grade3 %Poverty
## 1 M.S. K394 17K394 29.0 0.0 39 85.2%
## 2 P.S. 024 15K024 14.6 3.4 93 93.8%
## 3 P.S. 065 19K065 52.3 5.8 104 81.6%
## 4 P.S. 109 22K109 17.9 1.8 60 84.0%
## 5 P.S. 245 22K245 39.5 0.0 47 95.2%
## 6 P.S. 253 21K253 71.9 13.7 138 79.4%
## 7 P.S. 295 15K295 36.0 14.0 71 39.0%
## 8 P.S. 376 32K376 36.1 31.3 102 82.4%
## 9 P.S. 66 18K066 48.6 2.9 66 67.5%
## 10 P.S. K134 22K134 55.6 27.3 100 88.4%
## 11 P.S. K315 22K315 46.0 6.2 91 87.7%
## 12 P.S./I.S. 323 23K323 23.9 0.0 55 93.8%
#Filtering all the schools with poverty level more than 60%
brooklyn_ELA_Grade3_prof[,6] <- as.numeric(sub("%","",brooklyn_ELA_Grade3_prof[,6]))
brooklyn_ELA_Grade3_prof <- brooklyn_ELA_Grade3_prof %>%
filter(brooklyn_ELA_Grade3_prof$`%Poverty` > 60)
brooklyn_ELA_Grade3_prof
## School.Name DBN.x %Level3 %Level4 Grade3 %Poverty
## 1 M.S. K394 17K394 29.0 0.0 39 85.2
## 2 P.S. 024 15K024 14.6 3.4 93 93.8
## 3 P.S. 065 19K065 52.3 5.8 104 81.6
## 4 P.S. 109 22K109 17.9 1.8 60 84.0
## 5 P.S. 245 22K245 39.5 0.0 47 95.2
## 6 P.S. 253 21K253 71.9 13.7 138 79.4
## 7 P.S. 376 32K376 36.1 31.3 102 82.4
## 8 P.S. 66 18K066 48.6 2.9 66 67.5
## 9 P.S. K134 22K134 55.6 27.3 100 88.4
## 10 P.S. K315 22K315 46.0 6.2 91 87.7
## 11 P.S./I.S. 323 23K323 23.9 0.0 55 93.8
- I converted both excel files to .csv as it is convenient to read using read.csv
- after reading my first demographic data file I decided to go with only the information that I need for the task
- I did the same steps to read my second data file.
- As the data of the criteria to eligible for the grant are coming from both datasets, I merge both datasets.
- I used filter function to filter out brooklyn borough schools, grade 3 students who scored low percentage in level3 and level4.
I ended up with 11 possible candidates for the library makeover grant. All the schools are in high rate of poverty or more than 60%
#Large population of ELL
ELL_Schools <- demographics_DOE[,c(1,2,3,36)]%>%
filter(Year == "2018-19")
names(ELL_Schools)[4] <- "ELL_Percentage"
head(ELL_Schools,20)
## ï..DBN School.Name Year ELL_Percentage
## 1 01M015 P.S. 015 Roberto Clemente 2018-19 4.6%
## 2 01M019 P.S. 019 Asher Levy 2018-19 3.2%
## 3 01M020 P.S. 020 Anna Silver 2018-19 13.1%
## 4 01M034 P.S. 034 Franklin D. Roosevelt 2018-19 7.2%
## 5 01M063 The STAR Academy - P.S.63 2018-19 1.7%
## 6 01M064 P.S. 064 Robert Simon 2018-19 2.7%
## 7 01M110 P.S. 110 Florence Nightingale 2018-19 2.2%
## 8 01M134 P.S. 134 Henrietta Szold 2018-19 8.5%
## 9 01M140 P.S. 140 Nathan Straus 2018-19 11.2%
## 10 01M142 P.S. 142 Amalia Castro 2018-19 7.4%
## 11 01M184 P.S. 184m Shuang Wen 2018-19 14.6%
## 12 01M188 P.S. 188 The Island School 2018-19 7.4%
## 13 01M292 Orchard Collegiate Academy 2018-19 11.7%
## 14 01M315 The East Village Community School 2018-19 1.5%
## 15 01M332 University Neighborhood Middle School 2018-19 3.3%
## 16 01M361 The Children's Workshop School 2018-19 3.4%
## 17 01M363 Neighborhood School 2018-19 1.3%
## 18 01M364 Earth School 2018-19 6.0%
## 19 01M378 School for Global Leaders 2018-19 10.0%
## 20 01M448 University Neighborhood High School 2018-19 14.7%