library(kableExtra)
library(stringr)
library(tidyverse)
## -- Attaching packages ---------------------------------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.2.1     v readr   1.3.1
## v tibble  2.1.3     v purrr   0.3.2
## v tidyr   0.8.3     v dplyr   0.8.3
## v ggplot2 3.2.1     v forcats 0.4.0
## -- Conflicts ------------------------------------------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter()     masks stats::filter()
## x dplyr::group_rows() masks kableExtra::group_rows()
## x dplyr::lag()        masks stats::lag()

Part 1

Library Makeover

Read Data

#DOE Demographics  
demographics_DOE <- read.csv("demographic-snapshot-2014-15-to-2018-19-(public).csv", header = TRUE, stringsAsFactors = FALSE, sep = ",")

head(demographics_DOE)
##   ï..DBN               School.Name    Year Total.Enrollment
## 1 01M015 P.S. 015 Roberto Clemente 2014-15              183
## 2 01M015 P.S. 015 Roberto Clemente 2015-16              176
## 3 01M015 P.S. 015 Roberto Clemente 2016-17              178
## 4 01M015 P.S. 015 Roberto Clemente 2017-18              190
## 5 01M015 P.S. 015 Roberto Clemente 2018-19              174
## 6 01M019       P.S. 019 Asher Levy 2014-15              270
##   Grade.PK..Half.Day...Full.Day. Grade.K Grade.1 Grade.2 Grade.3 Grade.4
## 1                             18      27      47      31      19      17
## 2                             14      32      33      39      23      17
## 3                             17      28      33      27      31      24
## 4                             17      28      32      33      23      31
## 5                             13      20      33      30      30      20
## 6                             30      44      40      39      35      40
##   Grade.5 Grade.6 Grade.7 Grade.8 Grade.9 Grade.10 Grade.11 Grade.12
## 1      24       0       0       0       0        0        0        0
## 2      18       0       0       0       0        0        0        0
## 3      18       0       0       0       0        0        0        0
## 4      26       0       0       0       0        0        0        0
## 5      28       0       0       0       0        0        0        0
## 6      42       0       0       0       0        0        0        0
##   X..Female X..Female.1 X..Male X..Male.1 X..Asian X..Asian.1 X..Black
## 1        84       45.9%      99     54.1%        8       4.4%       65
## 2        83       47.2%      93     52.8%        9       5.1%       57
## 3        83       46.6%      95     53.4%       14       7.9%       51
## 4        99       52.1%      91     47.9%       20      10.5%       52
## 5        85       48.9%      89     51.1%       24      13.8%       48
## 6       132       48.9%     138     51.1%       30      11.1%       47
##   X..Black.1 X..Hispanic X..Hispanic.1
## 1      35.5%         107         58.5%
## 2      32.4%         105         59.7%
## 3      28.7%         105         59.0%
## 4      27.4%         110         57.9%
## 5      27.6%          95         54.6%
## 6      17.4%         158         58.5%
##   X..Multiple.Race.Categories.Not.Represented
## 1                                           1
## 2                                           3
## 3                                           4
## 4                                           2
## 5                                           1
## 6                                           8
##   X..Multiple.Race.Categories.Not.Represented.1 X..White X..White.1
## 1                                          0.5%        2       1.1%
## 2                                          1.7%        2       1.1%
## 3                                          2.2%        4       2.2%
## 4                                          1.1%        6       3.2%
## 5                                          0.6%        6       3.4%
## 6                                          3.0%       27      10.0%
##   X..Students.with.Disabilities X..Students.with.Disabilities.1
## 1                            64                           35.0%
## 2                            60                           34.1%
## 3                            51                           28.7%
## 4                            49                           25.8%
## 5                            38                           21.8%
## 6                            82                           30.4%
##   X..English.Language.Learners X..English.Language.Learners.1 X..Poverty
## 1                           17                           9.3%        169
## 2                           16                           9.1%        149
## 3                           12                           6.7%        152
## 4                            8                           4.2%        161
## 5                            8                           4.6%        145
## 6                           18                           6.7%        200
##   X..Poverty.1 Economic.Need.Index
## 1        92.3%               93.0%
## 2        84.7%               88.9%
## 3        85.4%               88.2%
## 4        84.7%               89.0%
## 5        83.3%               88.0%
## 6        74.1%               60.5%
# Column titles
names(demographics_DOE)
##  [1] "ï..DBN"                                       
##  [2] "School.Name"                                  
##  [3] "Year"                                         
##  [4] "Total.Enrollment"                             
##  [5] "Grade.PK..Half.Day...Full.Day."               
##  [6] "Grade.K"                                      
##  [7] "Grade.1"                                      
##  [8] "Grade.2"                                      
##  [9] "Grade.3"                                      
## [10] "Grade.4"                                      
## [11] "Grade.5"                                      
## [12] "Grade.6"                                      
## [13] "Grade.7"                                      
## [14] "Grade.8"                                      
## [15] "Grade.9"                                      
## [16] "Grade.10"                                     
## [17] "Grade.11"                                     
## [18] "Grade.12"                                     
## [19] "X..Female"                                    
## [20] "X..Female.1"                                  
## [21] "X..Male"                                      
## [22] "X..Male.1"                                    
## [23] "X..Asian"                                     
## [24] "X..Asian.1"                                   
## [25] "X..Black"                                     
## [26] "X..Black.1"                                   
## [27] "X..Hispanic"                                  
## [28] "X..Hispanic.1"                                
## [29] "X..Multiple.Race.Categories.Not.Represented"  
## [30] "X..Multiple.Race.Categories.Not.Represented.1"
## [31] "X..White"                                     
## [32] "X..White.1"                                   
## [33] "X..Students.with.Disabilities"                
## [34] "X..Students.with.Disabilities.1"              
## [35] "X..English.Language.Learners"                 
## [36] "X..English.Language.Learners.1"               
## [37] "X..Poverty"                                   
## [38] "X..Poverty.1"                                 
## [39] "Economic.Need.Index"
#Creating a subset
subset <- demographics_DOE[, c(1,2,3,9,38)]

head(subset,5)
##   ï..DBN               School.Name    Year Grade.3 X..Poverty.1
## 1 01M015 P.S. 015 Roberto Clemente 2014-15      19        92.3%
## 2 01M015 P.S. 015 Roberto Clemente 2015-16      23        84.7%
## 3 01M015 P.S. 015 Roberto Clemente 2016-17      31        85.4%
## 4 01M015 P.S. 015 Roberto Clemente 2017-18      23        84.7%
## 5 01M015 P.S. 015 Roberto Clemente 2018-19      30        83.3%
# rename columns
#subset with 8973 entries
names(subset)[1] <- "DBN"
names(subset)[4] <- "Grade3"
names(subset)[5] <- "%Poverty"

head(subset)
##      DBN               School.Name    Year Grade3 %Poverty
## 1 01M015 P.S. 015 Roberto Clemente 2014-15     19    92.3%
## 2 01M015 P.S. 015 Roberto Clemente 2015-16     23    84.7%
## 3 01M015 P.S. 015 Roberto Clemente 2016-17     31    85.4%
## 4 01M015 P.S. 015 Roberto Clemente 2017-18     23    84.7%
## 5 01M015 P.S. 015 Roberto Clemente 2018-19     30    83.3%
## 6 01M019       P.S. 019 Asher Levy 2014-15     35    74.1%
# Subset for Brooklyn Schools Grade 3 with poverty level in year 2018-2019 with 569 Entries
brooklyn_demographics <- subset %>%
    filter(str_detect(DBN, "K"))%>%
    filter(Year == "2018-19")
head(brooklyn_demographics)
##      DBN                  School.Name    Year Grade3 %Poverty
## 1 13K003 P.S. 003 The Bedford Village 2018-19     48    92.5%
## 2 13K008       P.S. 008 Robert Fulton 2018-19     89    19.5%
## 3 13K009    P.S. 009 Teunis G. Bergen 2018-19    132    35.8%
## 4 13K011     P.S. 011 Purvis J. Behan 2018-19    136    39.3%
## 5 13K020        P.S. 020 Clinton Hill 2018-19     67    58.4%
## 6 13K044       P.S. 044 Marcus Garvey 2018-19     15    95.0%
#ELA Demographics
demographics_ELA <- read.csv("school-ela-results-2013-2019-(public).csv", header = TRUE, stringsAsFactors = FALSE, sep = ",")

head(demographics_ELA)
##                       ï..    DBN               School.Name Grade Year
## 1 01M01532013All Students 01M015 P.S. 015 ROBERTO CLEMENTE     3 2013
## 2 01M01532014All Students 01M015 P.S. 015 ROBERTO CLEMENTE     3 2014
## 3 01M01532015All Students 01M015 P.S. 015 ROBERTO CLEMENTE     3 2015
## 4 01M01532016All Students 01M015 P.S. 015 ROBERTO CLEMENTE     3 2016
## 5 01M01532017All Students 01M015 P.S. 015 ROBERTO CLEMENTE     3 2017
## 6 01M01532018All Students 01M015 P.S. 015 ROBERTO CLEMENTE     3 2018
##       Category Number.Tested Mean.Scale.Score X..Level.1 X..Level.1.1
## 1 All Students            27              289         14         51.9
## 2 All Students            18              285         10         55.6
## 3 All Students            16              282          9         56.3
## 4 All Students            20              293         10         50.0
## 5 All Students            27              302         10         37.0
## 6 All Students            20              613          0          0.0
##   X..Level.2 X..Level.2.1 X..Level.3 X..Level.3.1 X..Level.4 X..Level.4.1
## 1         11         40.7          2          7.4          0          0.0
## 2          8         44.4          0          0.0          0          0.0
## 3          5         31.3          2         12.5          0          0.0
## 4          6         30.0          4         20.0          0          0.0
## 5          8         29.6          7         25.9          2          7.4
## 6          4         20.0         13         65.0          3         15.0
##   X..Level.3.4 X..Level.3.4.1
## 1            2            7.4
## 2            0            0.0
## 3            2           12.5
## 4            4           20.0
## 5            9           33.3
## 6           16           80.0
#Creating a subset for ELA with Level 3 and 4 score % in Grade 3
subset2 <- demographics_ELA[, c(2,3,4,5,14,16)]
names(subset2)[5] <- "%Level3"
names(subset2)[6] <- "%Level4"

head(subset2)
##      DBN               School.Name Grade Year %Level3 %Level4
## 1 01M015 P.S. 015 ROBERTO CLEMENTE     3 2013     7.4     0.0
## 2 01M015 P.S. 015 ROBERTO CLEMENTE     3 2014     0.0     0.0
## 3 01M015 P.S. 015 ROBERTO CLEMENTE     3 2015    12.5     0.0
## 4 01M015 P.S. 015 ROBERTO CLEMENTE     3 2016    20.0     0.0
## 5 01M015 P.S. 015 ROBERTO CLEMENTE     3 2017    25.9     7.4
## 6 01M015 P.S. 015 ROBERTO CLEMENTE     3 2018    65.0    15.0
# All the Brooklyn Borough school ELA results with level 3 and 4 in grade 3 in 2018
brooklyn_ELA_Grade3 <- subset2 %>%
  filter(str_detect(DBN, "K"))%>%
  filter(Grade == 3)%>%
  filter(Year == "2018")
head(brooklyn_ELA_Grade3)
##      DBN                  School.Name Grade Year %Level3 %Level4
## 1 13K003 P.S. 003 THE BEDFORD VILLAGE     3 2018    33.3     2.2
## 2 13K008       P.S. 008 ROBERT FULTON     3 2018    70.2    14.9
## 3 13K009    P.S. 009 TEUNIS G. BERGEN     3 2018    50.5    18.7
## 4 13K011     P.S. 011 PURVIS J. BEHAN     3 2018    55.5    25.5
## 5 13K020        P.S. 020 CLINTON HILL     3 2018    50.7    17.9
## 6 13K044       P.S. 044 MARCUS GARVEY     3 2018    11.8     0.0
#Merge two subsets of Brooklyn DOE demographics and Brooklyn ELA scores for Grade 3. This Gives us the proficiency of ELA in Brooklyn schools Grade 3 students.  

brooklyn_ELA_Grade3_prof<- merge(brooklyn_ELA_Grade3, brooklyn_demographics, by = "School.Name")
brooklyn_ELA_Grade3_prof
##      School.Name  DBN.x Grade Year.x %Level3 %Level4  DBN.y  Year.y Grade3
## 1      M.S. K394 17K394     3   2018    29.0     0.0 17K394 2018-19     39
## 2       P.S. 024 15K024     3   2018    14.6     3.4 15K024 2018-19     93
## 3       P.S. 065 19K065     3   2018    52.3     5.8 19K065 2018-19    104
## 4       P.S. 109 22K109     3   2018    17.9     1.8 22K109 2018-19     60
## 5       P.S. 245 22K245     3   2018    39.5     0.0 22K245 2018-19     47
## 6       P.S. 253 21K253     3   2018    71.9    13.7 21K253 2018-19    138
## 7       P.S. 295 15K295     3   2018    36.0    14.0 15K295 2018-19     71
## 8       P.S. 376 32K376     3   2018    36.1    31.3 32K376 2018-19    102
## 9        P.S. 66 18K066     3   2018    48.6     2.9 18K066 2018-19     66
## 10     P.S. K134 22K134     3   2018    55.6    27.3 22K134 2018-19    100
## 11     P.S. K315 22K315     3   2018    46.0     6.2 22K315 2018-19     91
## 12 P.S./I.S. 323 23K323     3   2018    23.9     0.0 23K323 2018-19     55
##    %Poverty
## 1     85.2%
## 2     93.8%
## 3     81.6%
## 4     84.0%
## 5     95.2%
## 6     79.4%
## 7     39.0%
## 8     82.4%
## 9     67.5%
## 10    88.4%
## 11    87.7%
## 12    93.8%
#Re-arranging the Brooklyn school ELA Grade 3 proficiency subset with required columns.
brooklyn_ELA_Grade3_prof <- brooklyn_ELA_Grade3_prof[,c(1,2,5,6,9,10)]
brooklyn_ELA_Grade3_prof
##      School.Name  DBN.x %Level3 %Level4 Grade3 %Poverty
## 1      M.S. K394 17K394    29.0     0.0     39    85.2%
## 2       P.S. 024 15K024    14.6     3.4     93    93.8%
## 3       P.S. 065 19K065    52.3     5.8    104    81.6%
## 4       P.S. 109 22K109    17.9     1.8     60    84.0%
## 5       P.S. 245 22K245    39.5     0.0     47    95.2%
## 6       P.S. 253 21K253    71.9    13.7    138    79.4%
## 7       P.S. 295 15K295    36.0    14.0     71    39.0%
## 8       P.S. 376 32K376    36.1    31.3    102    82.4%
## 9        P.S. 66 18K066    48.6     2.9     66    67.5%
## 10     P.S. K134 22K134    55.6    27.3    100    88.4%
## 11     P.S. K315 22K315    46.0     6.2     91    87.7%
## 12 P.S./I.S. 323 23K323    23.9     0.0     55    93.8%
#Filtering all the schools with poverty level more than 60%
brooklyn_ELA_Grade3_prof[,6] <- as.numeric(sub("%","",brooklyn_ELA_Grade3_prof[,6]))
brooklyn_ELA_Grade3_prof <- brooklyn_ELA_Grade3_prof %>%
  filter(brooklyn_ELA_Grade3_prof$`%Poverty` > 60)

brooklyn_ELA_Grade3_prof
##      School.Name  DBN.x %Level3 %Level4 Grade3 %Poverty
## 1      M.S. K394 17K394    29.0     0.0     39     85.2
## 2       P.S. 024 15K024    14.6     3.4     93     93.8
## 3       P.S. 065 19K065    52.3     5.8    104     81.6
## 4       P.S. 109 22K109    17.9     1.8     60     84.0
## 5       P.S. 245 22K245    39.5     0.0     47     95.2
## 6       P.S. 253 21K253    71.9    13.7    138     79.4
## 7       P.S. 376 32K376    36.1    31.3    102     82.4
## 8        P.S. 66 18K066    48.6     2.9     66     67.5
## 9      P.S. K134 22K134    55.6    27.3    100     88.4
## 10     P.S. K315 22K315    46.0     6.2     91     87.7
## 11 P.S./I.S. 323 23K323    23.9     0.0     55     93.8

My Approach

  1. I converted both excel files to .csv as it is convenient to read using read.csv
  2. after reading my first demographic data file I decided to go with only the information that I need for the task
  3. I did the same steps to read my second data file.
  4. As the data of the criteria to eligible for the grant are coming from both datasets, I merge both datasets.
  5. I used filter function to filter out brooklyn borough schools, grade 3 students who scored low percentage in level3 and level4.

Conclusion

I ended up with 11 possible candidates for the library makeover grant. All the schools are in high rate of poverty or more than 60%

Part 2

English Language Assessment Scores over Time

#Large population of ELL
ELL_Schools <- demographics_DOE[,c(1,2,3,36)]%>%
  filter(Year == "2018-19")
  
names(ELL_Schools)[4] <- "ELL_Percentage"
head(ELL_Schools,20)
##    ï..DBN                           School.Name    Year ELL_Percentage
## 1  01M015             P.S. 015 Roberto Clemente 2018-19           4.6%
## 2  01M019                   P.S. 019 Asher Levy 2018-19           3.2%
## 3  01M020                  P.S. 020 Anna Silver 2018-19          13.1%
## 4  01M034        P.S. 034 Franklin D. Roosevelt 2018-19           7.2%
## 5  01M063             The STAR Academy - P.S.63 2018-19           1.7%
## 6  01M064                 P.S. 064 Robert Simon 2018-19           2.7%
## 7  01M110         P.S. 110 Florence Nightingale 2018-19           2.2%
## 8  01M134              P.S. 134 Henrietta Szold 2018-19           8.5%
## 9  01M140                P.S. 140 Nathan Straus 2018-19          11.2%
## 10 01M142                P.S. 142 Amalia Castro 2018-19           7.4%
## 11 01M184                  P.S. 184m Shuang Wen 2018-19          14.6%
## 12 01M188            P.S. 188 The Island School 2018-19           7.4%
## 13 01M292            Orchard Collegiate Academy 2018-19          11.7%
## 14 01M315     The East Village Community School 2018-19           1.5%
## 15 01M332 University Neighborhood Middle School 2018-19           3.3%
## 16 01M361        The Children's Workshop School 2018-19           3.4%
## 17 01M363                   Neighborhood School 2018-19           1.3%
## 18 01M364                          Earth School 2018-19           6.0%
## 19 01M378             School for Global Leaders 2018-19          10.0%
## 20 01M448   University Neighborhood High School 2018-19          14.7%