##Set Up Data

#Data 1
phillyschools <- read.csv("philadelphia_schools.csv")

colnames(phillyschools)
##  [1] "School_code"             "Attendance"             
##  [3] "Enrollment"              "New_student"            
##  [5] "Withdrawals"             "African_American"       
##  [7] "White"                   "Asian"                  
##  [9] "Latino"                  "Other"                  
## [11] "Pacific_Islander"        "Low_income_family"      
## [13] "SCHOOL_NAME_1"           "SCHOOL_NAME_2"          
## [15] "ADDRESS"                 "SCHOOL_ZIP"             
## [17] "ZIP_PLUS_4"              "CITY"                   
## [19] "STATE_CD"                "PHONE_NUMBER"           
## [21] "SCH_START_GRADE"         "SCH_TERM_GRADE"         
## [23] "HPADDR"                  "SCHOOL_LEVEL_NAME"      
## [25] "Drugs"                   "Morals"                 
## [27] "Assaults"                "Weapons"                
## [29] "Thefts"                  "Total_suspensions"      
## [31] "One_suspension"          "Two_suspensions"        
## [33] "Three_suspensions"       "Three_plus_suspensions" 
## [35] "Teacher_attendance"      "Special_education"      
## [37] "Gifted_education"        "English_second_language"
## [39] "Average_salary"
head(phillyschools$School_code, 10)
##  [1] 1010 1020 1030 1050 1100 1130 1190 1200 1230 1250
#Data 2
school_scores <- read_excel("SPR_SY1819_School_Metric_Scores_20200127.xlsx") 

#Realized there were multiple sheets within excel
#Lesson: Pay better attention to code books/"README" 

excel_sheets("SPR_SY1819_School_Metric_Scores_20200127.xlsx")
## [1] "README"             "SPR SY2018-2019"    "SPR SY2018-2019 ES"
## [4] "SPR SY2018-2019 MS" "SPR SY2018-2019 K8" "SPR SY2018-2019 HS"
#Elementary Schools
es_scores <- read_excel("SPR_SY1819_School_Metric_Scores_20200127.xlsx",
                        sheet = "SPR SY2018-2019 ES")
colnames(es_scores)
##   [1] "School"                              
##   [2] "SRC School ID"                       
##   [3] "ULCS Code"                           
##   [4] "Report"                              
##   [5] "Rpt Type Long"                       
##   [6] "Street Address"                      
##   [7] "City"                                
##   [8] "State"                               
##   [9] "Zip Code"                            
##  [10] "Phone Number"                        
##  [11] "Fax Number"                          
##  [12] "Governance"                          
##  [13] "Turnaround Model"                    
##  [14] "Enrollment"                          
##  [15] "Website"                             
##  [16] "Grades Served"                       
##  [17] "Admissions Type"                     
##  [18] "Peer Group"                          
##  [19] "2018-19 Learning Network"            
##  [20] "2019-20 Learning Network"            
##  [21] "Overall Score"                       
##  [22] "Overall Pts Earn"                    
##  [23] "Overall Pts Poss"                    
##  [24] "Overall Tier"                        
##  [25] "Overall Peer Rank"                   
##  [26] "Overall City Rank"                   
##  [27] "Ach Score"                           
##  [28] "Ach Pts Earn"                        
##  [29] "Ach Pts Poss"                        
##  [30] "Ach Tier"                            
##  [31] "Ach Peer Rank"                       
##  [32] "Ach City Rank"                       
##  [33] "Prog Score"                          
##  [34] "Prog Pts Earn"                       
##  [35] "Prog Pts Poss"                       
##  [36] "Prog Tier"                           
##  [37] "Prog Peer Rank"                      
##  [38] "Prog City Rank"                      
##  [39] "Clim Score"                          
##  [40] "Clim Pts Earn"                       
##  [41] "Clim Pts Poss"                       
##  [42] "Clim Tier"                           
##  [43] "Clim Peer Rank"                      
##  [44] "Clim City Rank"                      
##  [45] "CC Peer Rank"                        
##  [46] "CC City Rank"                        
##  [47] "K-2 Rdg Score"                       
##  [48] "K-2 Rdg Denom"                       
##  [49] "K-2 Rdg Pts Earn"                    
##  [50] "K-2 Rdg Pts Poss"                    
##  [51] "K-2 Rdg Pct Earn"                    
##  [52] "K-2 Rdg Tier"                        
##  [53] "PSSA ELA Prof/Adv Score"             
##  [54] "PSSA ELA Prof/Adv Denom"             
##  [55] "PSSA ELA Prof/Adv Pts Earn"          
##  [56] "PSSA ELA Prof/Adv Pts Poss"          
##  [57] "PSSA ELA Prof/Adv Pct Earn"          
##  [58] "PSSA ELA Prof/Adv Tier"              
##  [59] "PSSA GR3 ELA Prof/Adv Score"         
##  [60] "PSSA GR3 ELA Prof/Adv Denom"         
##  [61] "PSSA GR4 ELA Prof/Adv Score"         
##  [62] "PSSA GR4 ELA Prof/Adv Denom"         
##  [63] "PSSA GR5 ELA Prof/Adv Score"         
##  [64] "PSSA GR5 ELA Prof/Adv Denom"         
##  [65] "PSSA GR6 ELA Prof/Adv Score"         
##  [66] "PSSA GR6 ELA Prof/Adv Denom"         
##  [67] "PSSA ELA Adv Score"                  
##  [68] "PSSA ELA Adv Denom"                  
##  [69] "PSSA ELA Adv Pts Earn"               
##  [70] "PSSA ELA Adv Pts Poss"               
##  [71] "PSSA ELA Adv Pct Earn"               
##  [72] "PSSA ELA Adv Tier"                   
##  [73] "PSSA Math Prof/Adv Score"            
##  [74] "PSSA Math Prof/Adv Denom"            
##  [75] "PSSA Math Prof/Adv Pts Earn"         
##  [76] "PSSA Math Prof/Adv Pts Poss"         
##  [77] "PSSA Math Prof/Adv Pct Earn"         
##  [78] "PSSA Math Prof/Adv Tier"             
##  [79] "PSSA GR3 Math Prof/Adv Score"        
##  [80] "PSSA GR3 Math Prof/Adv Denom"        
##  [81] "PSSA GR4 Math Prof/Adv Score"        
##  [82] "PSSA GR4 Math Prof/Adv Denom"        
##  [83] "PSSA GR5 Math Prof/Adv Score"        
##  [84] "PSSA GR5 Math Prof/Adv Denom"        
##  [85] "PSSA GR6 Math Prof/Adv Score"        
##  [86] "PSSA GR6 Math Prof/Adv Denom"        
##  [87] "PSSA Math Adv Score"                 
##  [88] "PSSA Math Adv Denom"                 
##  [89] "PSSA Math Adv Pts Earn"              
##  [90] "PSSA Math Adv Pts Poss"              
##  [91] "PSSA Math Adv Pct Earn"              
##  [92] "PSSA Math Adv Tier"                  
##  [93] "PSSA Sci Prof/Adv Score"             
##  [94] "PSSA Sci Prof/Adv Denom"             
##  [95] "PSSA Sci Prof/Adv Pts Earn"          
##  [96] "PSSA Sci Prof/Adv Pts Poss"          
##  [97] "PSSA Sci Prof/Adv Pct Earn"          
##  [98] "PSSA Sci Prof/Adv Tier"              
##  [99] "PSSA GR4 Sci Prof/Adv Score"         
## [100] "PSSA GR4 Sci Prof/Adv Denom"         
## [101] "PSSA Sci Adv Score"                  
## [102] "PSSA Sci Adv Denom"                  
## [103] "PSSA Sci Adv Pts Earn"               
## [104] "PSSA Sci Adv Pts Poss"               
## [105] "PSSA Sci Adv Pct Earn"               
## [106] "PSSA Sci Adv Tier"                   
## [107] "ACCESS Prof Score"                   
## [108] "ACCESS Prof Denom"                   
## [109] "ACCESS Prof Pts Earn"                
## [110] "ACCESS Prof Pts Poss"                
## [111] "ACCESS Prof Pct Earn"                
## [112] "ACCESS Prof Tier"                    
## [113] "PSSA Math AGI Score"                 
## [114] "PSSA Math AGI Pts Earn"              
## [115] "PSSA Math AGI Pts Poss"              
## [116] "PSSA Math AGI Pct Earn"              
## [117] "PSSA Math AGI Tier"                  
## [118] "PSSA ELA AGI Score"                  
## [119] "PSSA ELA AGI Pts Earn"               
## [120] "PSSA ELA AGI Pts Poss"               
## [121] "PSSA ELA AGI Pct Earn"               
## [122] "PSSA ELA AGI Tier"                   
## [123] "PSSA GR4 Sci AGI Score"              
## [124] "PSSA GR4 Sci AGI Pts Earn"           
## [125] "PSSA GR4 Sci AGI Pts Poss"           
## [126] "PSSA GR4 Sci AGI Pct Earn"           
## [127] "PSSA GR4 Sci AGI Tier"               
## [128] "ACCESS Growth Score"                 
## [129] "ACCESS Growth Denom"                 
## [130] "ACCESS Growth Pts Earn"              
## [131] "ACCESS Growth Pts Poss"              
## [132] "ACCESS Growth Pct Earn"              
## [133] "ACCESS Growth Tier"                  
## [134] "PSSA Math AGI (L33P) Score"          
## [135] "PSSA Math AGI (L33P) Pts Earn"       
## [136] "PSSA Math AGI (L33P) Pts Poss"       
## [137] "PSSA Math AGI (L33P) Pct Earn"       
## [138] "PSSA Math AGI (L33P) Tier"           
## [139] "PSSA ELA AGI (L33P) Score"           
## [140] "PSSA ELA AGI (L33P) Pts Earn"        
## [141] "PSSA ELA AGI (L33P) Pts Poss"        
## [142] "PSSA ELA AGI (L33P) Pct Earn"        
## [143] "PSSA ELA AGI (L33P) Tier"            
## [144] "Attendance (95%+) Score"             
## [145] "Attendance (95%+) Denom"             
## [146] "Attendance (95%+) Pts Earn"          
## [147] "Attendance (95%+) Pts Poss"          
## [148] "Attendance (95%+) Pct Earn"          
## [149] "Attendance (95%+) Tier"              
## [150] "Attendance (90%-95%) Score"          
## [151] "Attendance (90%-95%) Denom"          
## [152] "Attendance (85%-90%) Score"          
## [153] "Attendance (85%-90%) Denom"          
## [154] "Attendance (80%-85%) Score"          
## [155] "Attendance (80%-85%) Denom"          
## [156] "Attendance (<80%) Score"             
## [157] "Attendance (<80%) Denom"             
## [158] "Retention Score"                     
## [159] "Retention Denom"                     
## [160] "Retention Pts Earn"                  
## [161] "Retention Pts Poss"                  
## [162] "Retention Pct Earn"                  
## [163] "Retention Tier"                      
## [164] "ISS Score"                           
## [165] "ISS Denom"                           
## [166] "ISS Pts Earn"                        
## [167] "ISS Pts Poss"                        
## [168] "ISS Pct Earn"                        
## [169] "ISS Tier"                            
## [170] "OSS Score"                           
## [171] "OSS Denom"                           
## [172] "OSS Pts Earn"                        
## [173] "OSS Pts Poss"                        
## [174] "OSS Pct Earn"                        
## [175] "OSS Tier"                            
## [176] "Student Survey Climate Score"        
## [177] "Student Survey Climate Pts Earn"     
## [178] "Student Survey Climate Pts Poss"     
## [179] "Student Survey Climate Pct Earn"     
## [180] "Student Survey Climate Tier"         
## [181] "Parent Survey Climate Score"         
## [182] "Parent Survey Climate Pts Earn"      
## [183] "Parent Survey Climate Pts Poss"      
## [184] "Parent Survey Climate Pct Earn"      
## [185] "Parent Survey Climate Tier"          
## [186] "Parent Survey Participation Score"   
## [187] "Parent Survey Participation Pts Earn"
## [188] "Parent Survey Participation Pts Poss"
## [189] "Parent Survey Participation Pct Earn"
## [190] "Parent Survey Participation Tier"    
## [191] "Teach Effect MMS Dist Score"         
## [192] "Teach Effect MMS Prof Score"         
## [193] "Teacher Attendance Score"            
## [194] "Student Survey Teaching Score"
#High Schools
hs_scores <- read_excel("SPR_SY1819_School_Metric_Scores_20200127.xlsx",
                        sheet = "SPR SY2018-2019 HS")
head(es_scores)
## # A tibble: 6 × 194
##   School     `SRC School ID` `ULCS Code` Report `Rpt Type Long` `Street Address`
##   <chr>      <chr>           <chr>       <chr>  <chr>           <chr>           
## 1 Joseph W.… 125             1250        ES     Elementary Sch… 6600 Chester Av…
## 2 Thomas G.… 138             1380        ES     Elementary Sch… 2501 S. 63rd St.
## 3 Samuel Po… 139             1390        ES     Elementary Sch… 301 N. 36th St. 
## 4 John M. P… 140             1400        ES     Elementary Sch… 7000 Buist Ave. 
## 5 Stephen G… 232             2320        ES     Elementary Sch… 1800 Snyder Ave.
## 6 Abram S. … 252             2520        ES     Elementary Sch… 2501 S. 13th St.
## # ℹ 188 more variables: City <chr>, State <chr>, `Zip Code` <chr>,
## #   `Phone Number` <chr>, `Fax Number` <chr>, Governance <chr>,
## #   `Turnaround Model` <chr>, Enrollment <chr>, Website <chr>,
## #   `Grades Served` <chr>, `Admissions Type` <chr>, `Peer Group` <chr>,
## #   `2018-19 Learning Network` <chr>, `2019-20 Learning Network` <chr>,
## #   `Overall Score` <chr>, `Overall Pts Earn` <chr>, `Overall Pts Poss` <chr>,
## #   `Overall Tier` <chr>, `Overall Peer Rank` <chr>, …
#I'm choosing to work with Data 2 first, to work with the overall city ranking that captures parents' interests holistically (like academics, climate, attendance, etc. reflected on colnames(es_scores))

TOP 15 by City Rank

top15_elem <- es_scores %>% 
  slice_min(order_by = as.numeric(str_extract(`Ach City Rank`, "^\\d+")), n = 15) %>%
  select(School, `ULCS Code`, `Ach City Rank`, `Zip Code`) #Ranked out of 57 schools

top15_high <- hs_scores %>%
  slice_min(order_by = as.numeric(str_extract(`Ach City Rank`, "^\\d+")), n = 15) %>%
  select(School, `ULCS Code`, `Ach City Rank`, `Zip Code`) #Ranked out of 72 schools

top15_elem
## # A tibble: 15 × 4
##    School                            `ULCS Code` `Ach City Rank` `Zip Code`
##    <chr>                             <chr>       <chr>           <chr>     
##  1 Anne Frank School                 8400        1st out of 57   19115     
##  2 Abram S. Jenks School             2520        2nd out of 57   19148     
##  3 Watson Comly School               8370        3rd out of 57   19116     
##  4 MaST Community Charter School II  3440        4th out of 57   19111     
##  5 Samuel Powel School               1390        5th out of 57   19104     
##  6 William H. Loesche School         8440        6th out of 57   19116     
##  7 Kennedy C. Crossan School         8230        7th out of 57   19111     
##  8 Thomas Holme School               8270        8th out of 57   19114     
##  9 Horatio B. Hackett School         5300        9th out of 57   19125     
## 10 Fox Chase School                  8260        10th out of 57  19111     
## 11 Rhawnhurst School                 8360        11th out of 57  19152     
## 12 Inquiry Charter School            3308        12th out of 57  19104     
## 13 J. Hampton Moore School           8310        13th out of 57  19111     
## 14 Mastery Charter School at Smedley 3409        14th out of 57  19124     
## 15 Richmond School                   5400        15th out of 57  19134
top15_high
## # A tibble: 15 × 4
##    School                                 `ULCS Code` `Ach City Rank` `Zip Code`
##    <chr>                                  <chr>       <chr>           <chr>     
##  1 Julia R. Masterman School              2140        1st out of 72   19130     
##  2 Central High School                    6010        2nd out of 72   19141     
##  3 High School of Engineering and Science 4030        3rd out of 72   19121     
##  4 Franklin Towne Charter High School     3331        4th out of 72   19137     
##  5 Girard Academic Music Program          2410        5th out of 72   19145     
##  6 Science Leadership Academy             2650        6th out of 72   19103     
##  7 Academy at Palumbo                     2620        7th out of 72   19147     
##  8 Mathematics, Science, and Technology … 3328        8th out of 72   19116     
##  9 Parkway Center City Middle College Hi… 5080        9th out of 72   19123     
## 10 Arts Academy at Benjamin Rush          8040        10th out of 72  19154     
## 11 Philadelphia High School for Girls     6050        11th out of 72  19141     
## 12 Philadelphia Performing Arts: A Strin… 3336        12th out of 72  19145     
## 13 Hill-Freedman World Academy            6460        13th out of 72  19150     
## 14 William W. Bodine High School          5150        14th out of 72  19123     
## 15 High School for Creative and Performi… 2020        15th out of 72  19147

Incident Metrics

#Parents care about the safety of schools and their children being around positive influences, so I will calculate incidents which will capture drug infractions, morals infractions, assaults, weapons infractions, thefts, and average total suspensions per 100 students.
#Note: Keep in mind the same student(s) can commit multiple incidents/ one incident can lead to multiple suspensions. A higher incident number does not necessarily indicate higher number of incidents, but rather, could mean the school has a stronger disciplinary program.

phillyschools <- phillyschools %>%
  mutate(
    Totsus_p100 = (Total_suspensions / Enrollment) * 100,
    Total_incidents_p100 = Assaults + Weapons + Drugs + Thefts + Morals + Totsus_p100)

Present Data

phillyschools <- phillyschools %>%
  mutate(School_code = as.character(School_code)) #Fixed post-error message 

top15_elem_incidents <- top15_elem %>%
  left_join(
    phillyschools %>% select(
      School_code,
      SCHOOL_NAME_1,
      SCHOOL_ZIP,
      Enrollment,
      Total_incidents_p100,
      Attendance,
      Teacher_attendance
    ),
    by = c("ULCS Code" = "School_code")
  )%>%
select(
  School,
  SCHOOL_ZIP,
  `Ach City Rank`,
  Enrollment,
  Total_incidents_p100,
  Attendance,
  Teacher_attendance
)

top15_high_incidents <- top15_high %>%
  left_join(
    phillyschools %>% select(
      School_code,
      SCHOOL_NAME_1,
      SCHOOL_ZIP,
      Enrollment,
      Total_incidents_p100,
      Attendance,
      Teacher_attendance
    ),
    by = c("ULCS Code" = "School_code")
  ) %>%
  select(
    School,
    SCHOOL_ZIP,
    'Ach City Rank',
    Enrollment,
    Total_incidents_p100,
    Attendance,
    Teacher_attendance
  )
top15_elem_incidents <- top15_elem_incidents %>%
  filter(
    !(is.na(Enrollment) &
        is.na(Total_incidents_p100) &
        is.na(Attendance) &
        is.na(Teacher_attendance))
  )

top15_high_incidents <- top15_high_incidents %>%
  filter(!(is.na(Enrollment) &
           is.na(Total_incidents_p100) &
           is.na(Attendance) &
           is.na(Teacher_attendance)))
top15_elem_incidents
## # A tibble: 12 × 7
##    School  SCHOOL_ZIP `Ach City Rank` Enrollment Total_incidents_p100 Attendance
##    <chr>        <int> <chr>                <int>                <dbl>      <int>
##  1 Anne F…      19115 1st out of 57          994                3.41          96
##  2 Abram …      19148 2nd out of 57          352                5.27          95
##  3 Watson…      19116 3rd out of 57          445                3.12          95
##  4 Samuel…      19104 5th out of 57          239                4.51          95
##  5 Willia…      19116 6th out of 57          768                9.69          94
##  6 Kenned…      19111 7th out of 57          373                2.88          95
##  7 Thomas…      19114 8th out of 57          454               23.0           93
##  8 Horati…      19125 9th out of 57          422               12.5           93
##  9 Fox Ch…      19111 10th out of 57         396                1.77          95
## 10 Rhawnh…      19152 11th out of 57         525                0.762         95
## 11 J. Ham…      19111 13th out of 57        1134               15.4           95
## 12 Richmo…      19134 15th out of 57         680               17             93
## # ℹ 1 more variable: Teacher_attendance <int>
top15_high_incidents
## # A tibble: 12 × 7
##    School  SCHOOL_ZIP `Ach City Rank` Enrollment Total_incidents_p100 Attendance
##    <chr>        <int> <chr>                <int>                <dbl>      <int>
##  1 Julia …      19130 1st out of 72         1190                4.01          97
##  2 Centra…      19141 2nd out of 72         2331               16.1           96
##  3 High S…      19121 3rd out of 72          683               12.3           96
##  4 Girard…      19145 5th out of 72          499                0.200         97
##  5 Scienc…      19103 6th out of 72          462                3.87          95
##  6 Academ…      19147 7th out of 72          554                4.72          96
##  7 Parkwa…      19123 9th out of 72          390               16.4           93
##  8 Arts A…      19154 10th out of 72         364                6.30          95
##  9 Philad…      19141 11th out of 72        1072                6.03          93
## 10 Hill-F…      19138 13th out of 72         224                2.79          96
## 11 Willia…      19123 14th out of 72         543                7.97          95
## 12 High S…      19147 15th out of 72         684                5.19          95
## # ℹ 1 more variable: Teacher_attendance <int>

Final Recommendation to Parents

#Recommendation #1:Academy at Palumbo and Abram S. Jenks School

#Based on the city ranking that holistically captures your interests (academics, climate, attendance, etc.) along with verification of a low incident rate (capturing thefts, assaults, moral infractions, suspensions, etc.), I would like to recommend Academy at Palumbo and Abram S. Jenks School for your children. Another consideration was the number of enrolled students. For new students, a big student body (over a thousand students) could be overwhelming. Both schools are approximately 10 minutes from each other. 

top15_elem_incidents %>%
  filter(School %in% c("Abram S. Jenks School"))
## # A tibble: 1 × 7
##   School   SCHOOL_ZIP `Ach City Rank` Enrollment Total_incidents_p100 Attendance
##   <chr>         <int> <chr>                <int>                <dbl>      <int>
## 1 Abram S…      19148 2nd out of 57          352                 5.27         95
## # ℹ 1 more variable: Teacher_attendance <int>
top15_high_incidents %>%
  filter(School %in% c("Academy at Palumbo"))
## # A tibble: 1 × 7
##   School   SCHOOL_ZIP `Ach City Rank` Enrollment Total_incidents_p100 Attendance
##   <chr>         <int> <chr>                <int>                <dbl>      <int>
## 1 Academy…      19147 7th out of 72          554                 4.72         96
## # ℹ 1 more variable: Teacher_attendance <int>
#Recommendation #2: Masterman School and Horatio B Hackett School

#If your children attending a big school (more than thousand students) is not a concern, Masterman School and Horatio B Hackett are schools that are highly ranked with equally low numbers of incidents. Both schools are also approximately 10 minutes from each other.

top15_elem_incidents %>%
  filter(School %in% c("Horatio B. Hackett School"))
## # A tibble: 1 × 7
##   School   SCHOOL_ZIP `Ach City Rank` Enrollment Total_incidents_p100 Attendance
##   <chr>         <int> <chr>                <int>                <dbl>      <int>
## 1 Horatio…      19125 9th out of 57          422                 12.5         93
## # ℹ 1 more variable: Teacher_attendance <int>
top15_high_incidents %>%
  filter(School %in% c("Julia R. Masterman School"))
## # A tibble: 1 × 7
##   School   SCHOOL_ZIP `Ach City Rank` Enrollment Total_incidents_p100 Attendance
##   <chr>         <int> <chr>                <int>                <dbl>      <int>
## 1 Julia R…      19130 1st out of 72         1190                 4.01         97
## # ℹ 1 more variable: Teacher_attendance <int>

Thank You