##Set Up Data
#Data 1
phillyschools <- read.csv("philadelphia_schools.csv")
colnames(phillyschools)
## [1] "School_code" "Attendance"
## [3] "Enrollment" "New_student"
## [5] "Withdrawals" "African_American"
## [7] "White" "Asian"
## [9] "Latino" "Other"
## [11] "Pacific_Islander" "Low_income_family"
## [13] "SCHOOL_NAME_1" "SCHOOL_NAME_2"
## [15] "ADDRESS" "SCHOOL_ZIP"
## [17] "ZIP_PLUS_4" "CITY"
## [19] "STATE_CD" "PHONE_NUMBER"
## [21] "SCH_START_GRADE" "SCH_TERM_GRADE"
## [23] "HPADDR" "SCHOOL_LEVEL_NAME"
## [25] "Drugs" "Morals"
## [27] "Assaults" "Weapons"
## [29] "Thefts" "Total_suspensions"
## [31] "One_suspension" "Two_suspensions"
## [33] "Three_suspensions" "Three_plus_suspensions"
## [35] "Teacher_attendance" "Special_education"
## [37] "Gifted_education" "English_second_language"
## [39] "Average_salary"
head(phillyschools$School_code, 10)
## [1] 1010 1020 1030 1050 1100 1130 1190 1200 1230 1250
#Data 2
school_scores <- read_excel("SPR_SY1819_School_Metric_Scores_20200127.xlsx")
#Realized there were multiple sheets within excel
#Lesson: Pay better attention to code books/"README"
excel_sheets("SPR_SY1819_School_Metric_Scores_20200127.xlsx")
## [1] "README" "SPR SY2018-2019" "SPR SY2018-2019 ES"
## [4] "SPR SY2018-2019 MS" "SPR SY2018-2019 K8" "SPR SY2018-2019 HS"
#Elementary Schools
es_scores <- read_excel("SPR_SY1819_School_Metric_Scores_20200127.xlsx",
sheet = "SPR SY2018-2019 ES")
colnames(es_scores)
## [1] "School"
## [2] "SRC School ID"
## [3] "ULCS Code"
## [4] "Report"
## [5] "Rpt Type Long"
## [6] "Street Address"
## [7] "City"
## [8] "State"
## [9] "Zip Code"
## [10] "Phone Number"
## [11] "Fax Number"
## [12] "Governance"
## [13] "Turnaround Model"
## [14] "Enrollment"
## [15] "Website"
## [16] "Grades Served"
## [17] "Admissions Type"
## [18] "Peer Group"
## [19] "2018-19 Learning Network"
## [20] "2019-20 Learning Network"
## [21] "Overall Score"
## [22] "Overall Pts Earn"
## [23] "Overall Pts Poss"
## [24] "Overall Tier"
## [25] "Overall Peer Rank"
## [26] "Overall City Rank"
## [27] "Ach Score"
## [28] "Ach Pts Earn"
## [29] "Ach Pts Poss"
## [30] "Ach Tier"
## [31] "Ach Peer Rank"
## [32] "Ach City Rank"
## [33] "Prog Score"
## [34] "Prog Pts Earn"
## [35] "Prog Pts Poss"
## [36] "Prog Tier"
## [37] "Prog Peer Rank"
## [38] "Prog City Rank"
## [39] "Clim Score"
## [40] "Clim Pts Earn"
## [41] "Clim Pts Poss"
## [42] "Clim Tier"
## [43] "Clim Peer Rank"
## [44] "Clim City Rank"
## [45] "CC Peer Rank"
## [46] "CC City Rank"
## [47] "K-2 Rdg Score"
## [48] "K-2 Rdg Denom"
## [49] "K-2 Rdg Pts Earn"
## [50] "K-2 Rdg Pts Poss"
## [51] "K-2 Rdg Pct Earn"
## [52] "K-2 Rdg Tier"
## [53] "PSSA ELA Prof/Adv Score"
## [54] "PSSA ELA Prof/Adv Denom"
## [55] "PSSA ELA Prof/Adv Pts Earn"
## [56] "PSSA ELA Prof/Adv Pts Poss"
## [57] "PSSA ELA Prof/Adv Pct Earn"
## [58] "PSSA ELA Prof/Adv Tier"
## [59] "PSSA GR3 ELA Prof/Adv Score"
## [60] "PSSA GR3 ELA Prof/Adv Denom"
## [61] "PSSA GR4 ELA Prof/Adv Score"
## [62] "PSSA GR4 ELA Prof/Adv Denom"
## [63] "PSSA GR5 ELA Prof/Adv Score"
## [64] "PSSA GR5 ELA Prof/Adv Denom"
## [65] "PSSA GR6 ELA Prof/Adv Score"
## [66] "PSSA GR6 ELA Prof/Adv Denom"
## [67] "PSSA ELA Adv Score"
## [68] "PSSA ELA Adv Denom"
## [69] "PSSA ELA Adv Pts Earn"
## [70] "PSSA ELA Adv Pts Poss"
## [71] "PSSA ELA Adv Pct Earn"
## [72] "PSSA ELA Adv Tier"
## [73] "PSSA Math Prof/Adv Score"
## [74] "PSSA Math Prof/Adv Denom"
## [75] "PSSA Math Prof/Adv Pts Earn"
## [76] "PSSA Math Prof/Adv Pts Poss"
## [77] "PSSA Math Prof/Adv Pct Earn"
## [78] "PSSA Math Prof/Adv Tier"
## [79] "PSSA GR3 Math Prof/Adv Score"
## [80] "PSSA GR3 Math Prof/Adv Denom"
## [81] "PSSA GR4 Math Prof/Adv Score"
## [82] "PSSA GR4 Math Prof/Adv Denom"
## [83] "PSSA GR5 Math Prof/Adv Score"
## [84] "PSSA GR5 Math Prof/Adv Denom"
## [85] "PSSA GR6 Math Prof/Adv Score"
## [86] "PSSA GR6 Math Prof/Adv Denom"
## [87] "PSSA Math Adv Score"
## [88] "PSSA Math Adv Denom"
## [89] "PSSA Math Adv Pts Earn"
## [90] "PSSA Math Adv Pts Poss"
## [91] "PSSA Math Adv Pct Earn"
## [92] "PSSA Math Adv Tier"
## [93] "PSSA Sci Prof/Adv Score"
## [94] "PSSA Sci Prof/Adv Denom"
## [95] "PSSA Sci Prof/Adv Pts Earn"
## [96] "PSSA Sci Prof/Adv Pts Poss"
## [97] "PSSA Sci Prof/Adv Pct Earn"
## [98] "PSSA Sci Prof/Adv Tier"
## [99] "PSSA GR4 Sci Prof/Adv Score"
## [100] "PSSA GR4 Sci Prof/Adv Denom"
## [101] "PSSA Sci Adv Score"
## [102] "PSSA Sci Adv Denom"
## [103] "PSSA Sci Adv Pts Earn"
## [104] "PSSA Sci Adv Pts Poss"
## [105] "PSSA Sci Adv Pct Earn"
## [106] "PSSA Sci Adv Tier"
## [107] "ACCESS Prof Score"
## [108] "ACCESS Prof Denom"
## [109] "ACCESS Prof Pts Earn"
## [110] "ACCESS Prof Pts Poss"
## [111] "ACCESS Prof Pct Earn"
## [112] "ACCESS Prof Tier"
## [113] "PSSA Math AGI Score"
## [114] "PSSA Math AGI Pts Earn"
## [115] "PSSA Math AGI Pts Poss"
## [116] "PSSA Math AGI Pct Earn"
## [117] "PSSA Math AGI Tier"
## [118] "PSSA ELA AGI Score"
## [119] "PSSA ELA AGI Pts Earn"
## [120] "PSSA ELA AGI Pts Poss"
## [121] "PSSA ELA AGI Pct Earn"
## [122] "PSSA ELA AGI Tier"
## [123] "PSSA GR4 Sci AGI Score"
## [124] "PSSA GR4 Sci AGI Pts Earn"
## [125] "PSSA GR4 Sci AGI Pts Poss"
## [126] "PSSA GR4 Sci AGI Pct Earn"
## [127] "PSSA GR4 Sci AGI Tier"
## [128] "ACCESS Growth Score"
## [129] "ACCESS Growth Denom"
## [130] "ACCESS Growth Pts Earn"
## [131] "ACCESS Growth Pts Poss"
## [132] "ACCESS Growth Pct Earn"
## [133] "ACCESS Growth Tier"
## [134] "PSSA Math AGI (L33P) Score"
## [135] "PSSA Math AGI (L33P) Pts Earn"
## [136] "PSSA Math AGI (L33P) Pts Poss"
## [137] "PSSA Math AGI (L33P) Pct Earn"
## [138] "PSSA Math AGI (L33P) Tier"
## [139] "PSSA ELA AGI (L33P) Score"
## [140] "PSSA ELA AGI (L33P) Pts Earn"
## [141] "PSSA ELA AGI (L33P) Pts Poss"
## [142] "PSSA ELA AGI (L33P) Pct Earn"
## [143] "PSSA ELA AGI (L33P) Tier"
## [144] "Attendance (95%+) Score"
## [145] "Attendance (95%+) Denom"
## [146] "Attendance (95%+) Pts Earn"
## [147] "Attendance (95%+) Pts Poss"
## [148] "Attendance (95%+) Pct Earn"
## [149] "Attendance (95%+) Tier"
## [150] "Attendance (90%-95%) Score"
## [151] "Attendance (90%-95%) Denom"
## [152] "Attendance (85%-90%) Score"
## [153] "Attendance (85%-90%) Denom"
## [154] "Attendance (80%-85%) Score"
## [155] "Attendance (80%-85%) Denom"
## [156] "Attendance (<80%) Score"
## [157] "Attendance (<80%) Denom"
## [158] "Retention Score"
## [159] "Retention Denom"
## [160] "Retention Pts Earn"
## [161] "Retention Pts Poss"
## [162] "Retention Pct Earn"
## [163] "Retention Tier"
## [164] "ISS Score"
## [165] "ISS Denom"
## [166] "ISS Pts Earn"
## [167] "ISS Pts Poss"
## [168] "ISS Pct Earn"
## [169] "ISS Tier"
## [170] "OSS Score"
## [171] "OSS Denom"
## [172] "OSS Pts Earn"
## [173] "OSS Pts Poss"
## [174] "OSS Pct Earn"
## [175] "OSS Tier"
## [176] "Student Survey Climate Score"
## [177] "Student Survey Climate Pts Earn"
## [178] "Student Survey Climate Pts Poss"
## [179] "Student Survey Climate Pct Earn"
## [180] "Student Survey Climate Tier"
## [181] "Parent Survey Climate Score"
## [182] "Parent Survey Climate Pts Earn"
## [183] "Parent Survey Climate Pts Poss"
## [184] "Parent Survey Climate Pct Earn"
## [185] "Parent Survey Climate Tier"
## [186] "Parent Survey Participation Score"
## [187] "Parent Survey Participation Pts Earn"
## [188] "Parent Survey Participation Pts Poss"
## [189] "Parent Survey Participation Pct Earn"
## [190] "Parent Survey Participation Tier"
## [191] "Teach Effect MMS Dist Score"
## [192] "Teach Effect MMS Prof Score"
## [193] "Teacher Attendance Score"
## [194] "Student Survey Teaching Score"
#High Schools
hs_scores <- read_excel("SPR_SY1819_School_Metric_Scores_20200127.xlsx",
sheet = "SPR SY2018-2019 HS")
head(es_scores)
## # A tibble: 6 × 194
## School `SRC School ID` `ULCS Code` Report `Rpt Type Long` `Street Address`
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Joseph W.… 125 1250 ES Elementary Sch… 6600 Chester Av…
## 2 Thomas G.… 138 1380 ES Elementary Sch… 2501 S. 63rd St.
## 3 Samuel Po… 139 1390 ES Elementary Sch… 301 N. 36th St.
## 4 John M. P… 140 1400 ES Elementary Sch… 7000 Buist Ave.
## 5 Stephen G… 232 2320 ES Elementary Sch… 1800 Snyder Ave.
## 6 Abram S. … 252 2520 ES Elementary Sch… 2501 S. 13th St.
## # ℹ 188 more variables: City <chr>, State <chr>, `Zip Code` <chr>,
## # `Phone Number` <chr>, `Fax Number` <chr>, Governance <chr>,
## # `Turnaround Model` <chr>, Enrollment <chr>, Website <chr>,
## # `Grades Served` <chr>, `Admissions Type` <chr>, `Peer Group` <chr>,
## # `2018-19 Learning Network` <chr>, `2019-20 Learning Network` <chr>,
## # `Overall Score` <chr>, `Overall Pts Earn` <chr>, `Overall Pts Poss` <chr>,
## # `Overall Tier` <chr>, `Overall Peer Rank` <chr>, …
#I'm choosing to work with Data 2 first, to work with the overall city ranking that captures parents' interests holistically (like academics, climate, attendance, etc. reflected on colnames(es_scores))
top15_elem <- es_scores %>%
slice_min(order_by = as.numeric(str_extract(`Ach City Rank`, "^\\d+")), n = 15) %>%
select(School, `ULCS Code`, `Ach City Rank`, `Zip Code`) #Ranked out of 57 schools
top15_high <- hs_scores %>%
slice_min(order_by = as.numeric(str_extract(`Ach City Rank`, "^\\d+")), n = 15) %>%
select(School, `ULCS Code`, `Ach City Rank`, `Zip Code`) #Ranked out of 72 schools
top15_elem
## # A tibble: 15 × 4
## School `ULCS Code` `Ach City Rank` `Zip Code`
## <chr> <chr> <chr> <chr>
## 1 Anne Frank School 8400 1st out of 57 19115
## 2 Abram S. Jenks School 2520 2nd out of 57 19148
## 3 Watson Comly School 8370 3rd out of 57 19116
## 4 MaST Community Charter School II 3440 4th out of 57 19111
## 5 Samuel Powel School 1390 5th out of 57 19104
## 6 William H. Loesche School 8440 6th out of 57 19116
## 7 Kennedy C. Crossan School 8230 7th out of 57 19111
## 8 Thomas Holme School 8270 8th out of 57 19114
## 9 Horatio B. Hackett School 5300 9th out of 57 19125
## 10 Fox Chase School 8260 10th out of 57 19111
## 11 Rhawnhurst School 8360 11th out of 57 19152
## 12 Inquiry Charter School 3308 12th out of 57 19104
## 13 J. Hampton Moore School 8310 13th out of 57 19111
## 14 Mastery Charter School at Smedley 3409 14th out of 57 19124
## 15 Richmond School 5400 15th out of 57 19134
top15_high
## # A tibble: 15 × 4
## School `ULCS Code` `Ach City Rank` `Zip Code`
## <chr> <chr> <chr> <chr>
## 1 Julia R. Masterman School 2140 1st out of 72 19130
## 2 Central High School 6010 2nd out of 72 19141
## 3 High School of Engineering and Science 4030 3rd out of 72 19121
## 4 Franklin Towne Charter High School 3331 4th out of 72 19137
## 5 Girard Academic Music Program 2410 5th out of 72 19145
## 6 Science Leadership Academy 2650 6th out of 72 19103
## 7 Academy at Palumbo 2620 7th out of 72 19147
## 8 Mathematics, Science, and Technology … 3328 8th out of 72 19116
## 9 Parkway Center City Middle College Hi… 5080 9th out of 72 19123
## 10 Arts Academy at Benjamin Rush 8040 10th out of 72 19154
## 11 Philadelphia High School for Girls 6050 11th out of 72 19141
## 12 Philadelphia Performing Arts: A Strin… 3336 12th out of 72 19145
## 13 Hill-Freedman World Academy 6460 13th out of 72 19150
## 14 William W. Bodine High School 5150 14th out of 72 19123
## 15 High School for Creative and Performi… 2020 15th out of 72 19147
#Parents care about the safety of schools and their children being around positive influences, so I will calculate incidents which will capture drug infractions, morals infractions, assaults, weapons infractions, thefts, and average total suspensions per 100 students.
#Note: Keep in mind the same student(s) can commit multiple incidents/ one incident can lead to multiple suspensions. A higher incident number does not necessarily indicate higher number of incidents, but rather, could mean the school has a stronger disciplinary program.
phillyschools <- phillyschools %>%
mutate(
Totsus_p100 = (Total_suspensions / Enrollment) * 100,
Total_incidents_p100 = Assaults + Weapons + Drugs + Thefts + Morals + Totsus_p100)
phillyschools <- phillyschools %>%
mutate(School_code = as.character(School_code)) #Fixed post-error message
top15_elem_incidents <- top15_elem %>%
left_join(
phillyschools %>% select(
School_code,
SCHOOL_NAME_1,
SCHOOL_ZIP,
Enrollment,
Total_incidents_p100,
Attendance,
Teacher_attendance
),
by = c("ULCS Code" = "School_code")
)%>%
select(
School,
SCHOOL_ZIP,
`Ach City Rank`,
Enrollment,
Total_incidents_p100,
Attendance,
Teacher_attendance
)
top15_high_incidents <- top15_high %>%
left_join(
phillyschools %>% select(
School_code,
SCHOOL_NAME_1,
SCHOOL_ZIP,
Enrollment,
Total_incidents_p100,
Attendance,
Teacher_attendance
),
by = c("ULCS Code" = "School_code")
) %>%
select(
School,
SCHOOL_ZIP,
'Ach City Rank',
Enrollment,
Total_incidents_p100,
Attendance,
Teacher_attendance
)
top15_elem_incidents <- top15_elem_incidents %>%
filter(
!(is.na(Enrollment) &
is.na(Total_incidents_p100) &
is.na(Attendance) &
is.na(Teacher_attendance))
)
top15_high_incidents <- top15_high_incidents %>%
filter(!(is.na(Enrollment) &
is.na(Total_incidents_p100) &
is.na(Attendance) &
is.na(Teacher_attendance)))
top15_elem_incidents
## # A tibble: 12 × 7
## School SCHOOL_ZIP `Ach City Rank` Enrollment Total_incidents_p100 Attendance
## <chr> <int> <chr> <int> <dbl> <int>
## 1 Anne F… 19115 1st out of 57 994 3.41 96
## 2 Abram … 19148 2nd out of 57 352 5.27 95
## 3 Watson… 19116 3rd out of 57 445 3.12 95
## 4 Samuel… 19104 5th out of 57 239 4.51 95
## 5 Willia… 19116 6th out of 57 768 9.69 94
## 6 Kenned… 19111 7th out of 57 373 2.88 95
## 7 Thomas… 19114 8th out of 57 454 23.0 93
## 8 Horati… 19125 9th out of 57 422 12.5 93
## 9 Fox Ch… 19111 10th out of 57 396 1.77 95
## 10 Rhawnh… 19152 11th out of 57 525 0.762 95
## 11 J. Ham… 19111 13th out of 57 1134 15.4 95
## 12 Richmo… 19134 15th out of 57 680 17 93
## # ℹ 1 more variable: Teacher_attendance <int>
top15_high_incidents
## # A tibble: 12 × 7
## School SCHOOL_ZIP `Ach City Rank` Enrollment Total_incidents_p100 Attendance
## <chr> <int> <chr> <int> <dbl> <int>
## 1 Julia … 19130 1st out of 72 1190 4.01 97
## 2 Centra… 19141 2nd out of 72 2331 16.1 96
## 3 High S… 19121 3rd out of 72 683 12.3 96
## 4 Girard… 19145 5th out of 72 499 0.200 97
## 5 Scienc… 19103 6th out of 72 462 3.87 95
## 6 Academ… 19147 7th out of 72 554 4.72 96
## 7 Parkwa… 19123 9th out of 72 390 16.4 93
## 8 Arts A… 19154 10th out of 72 364 6.30 95
## 9 Philad… 19141 11th out of 72 1072 6.03 93
## 10 Hill-F… 19138 13th out of 72 224 2.79 96
## 11 Willia… 19123 14th out of 72 543 7.97 95
## 12 High S… 19147 15th out of 72 684 5.19 95
## # ℹ 1 more variable: Teacher_attendance <int>
#Recommendation #1:Academy at Palumbo and Abram S. Jenks School
#Based on the city ranking that holistically captures your interests (academics, climate, attendance, etc.) along with verification of a low incident rate (capturing thefts, assaults, moral infractions, suspensions, etc.), I would like to recommend Academy at Palumbo and Abram S. Jenks School for your children. Another consideration was the number of enrolled students. For new students, a big student body (over a thousand students) could be overwhelming. Both schools are approximately 10 minutes from each other.
top15_elem_incidents %>%
filter(School %in% c("Abram S. Jenks School"))
## # A tibble: 1 × 7
## School SCHOOL_ZIP `Ach City Rank` Enrollment Total_incidents_p100 Attendance
## <chr> <int> <chr> <int> <dbl> <int>
## 1 Abram S… 19148 2nd out of 57 352 5.27 95
## # ℹ 1 more variable: Teacher_attendance <int>
top15_high_incidents %>%
filter(School %in% c("Academy at Palumbo"))
## # A tibble: 1 × 7
## School SCHOOL_ZIP `Ach City Rank` Enrollment Total_incidents_p100 Attendance
## <chr> <int> <chr> <int> <dbl> <int>
## 1 Academy… 19147 7th out of 72 554 4.72 96
## # ℹ 1 more variable: Teacher_attendance <int>
#Recommendation #2: Masterman School and Horatio B Hackett School
#If your children attending a big school (more than thousand students) is not a concern, Masterman School and Horatio B Hackett are schools that are highly ranked with equally low numbers of incidents. Both schools are also approximately 10 minutes from each other.
top15_elem_incidents %>%
filter(School %in% c("Horatio B. Hackett School"))
## # A tibble: 1 × 7
## School SCHOOL_ZIP `Ach City Rank` Enrollment Total_incidents_p100 Attendance
## <chr> <int> <chr> <int> <dbl> <int>
## 1 Horatio… 19125 9th out of 57 422 12.5 93
## # ℹ 1 more variable: Teacher_attendance <int>
top15_high_incidents %>%
filter(School %in% c("Julia R. Masterman School"))
## # A tibble: 1 × 7
## School SCHOOL_ZIP `Ach City Rank` Enrollment Total_incidents_p100 Attendance
## <chr> <int> <chr> <int> <dbl> <int>
## 1 Julia R… 19130 1st out of 72 1190 4.01 97
## # ℹ 1 more variable: Teacher_attendance <int>
Thank You