title: “Assignment 3”
author: “Tyler Mitchell”
output: html_document

Data Import

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(readr)
library(ggplot2)
crime <- read_csv("http://jsuleiman.com/datasets/Crime_2015.csv")
## Rows: 378 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): MSA, State, City
## dbl (5): Murder, Rape, Robbery, AggravatedAssault, MotorVehicleTheft
## num (4): ViolentCrime, PropertyCrime, Burglary, Theft
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
crime <- crime %>% drop_na()

Displaying First Few Rows by Crime

head(crime)
## # A tibble: 6 × 12
##   MSA   Viole…¹ Murder  Rape Robbery Aggra…² Prope…³ Burgl…⁴ Theft Motor…⁵ State
##   <chr>   <dbl>  <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl> <chr>
## 1 Abil…    412.    5.3  56      78.4    273.   3609     852  2494.    263. TX   
## 2 Akro…    238.    5.1  38.2    75.2    120.   2552.    575. 1853     124. OH   
## 3 Alba…    668.    7.8  30.4   158.     472.   3894.   1100. 2653.    142. GA   
## 4 Alba…    114.    2.5  28.2    20.7     63    3208.    485. 2476.    248. OR   
## 5 Albu…    793.    6.1  63.8   207.     516    4608.    883. 3048.    677. NM   
## 6 Alex…    936.    4.5  35.5   120.     776.   4566.   1167  3084.    315. LA   
## # … with 1 more variable: City <chr>, and abbreviated variable names
## #   ¹​ViolentCrime, ²​AggravatedAssault, ³​PropertyCrime, ⁴​Burglary,
## #   ⁵​MotorVehicleTheft

Median of Violent Crimes

crimerate <- crime[!is.na(crime$ViolentCrime), ]
median <- median(crimerate$ViolentCrime)
median
## [1] 332.75

Data Import

scorecard <- read_csv("https://jsuleiman.com/datasets/CollegeScorecard.csv")%>% rename(City=CITY)
## Rows: 7804 Columns: 1729
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (178): INSTNM, CITY, STABBR, ZIP, AccredAgency, INSTURL, NPCURL, RPY_3Y...
## dbl  (377): UNITID, OPEID, opeid6, HCM2, main, NUMBRANCH, PREDDEG, HIGHDEG, ...
## lgl (1174): sch_deg, locale2, UG, UGDS_WHITENH, UGDS_BLACKNH, UGDS_API, UGDS...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Displaying First Few Rows of the Scorecard

head(scorecard)
## # A tibble: 6 × 1,729
##   UNITID   OPEID opeid6 INSTNM City  STABBR ZIP   Accre…¹ INSTURL NPCURL sch_deg
##    <dbl>   <dbl>  <dbl> <chr>  <chr> <chr>  <chr> <chr>   <chr>   <chr>  <lgl>  
## 1 100654  100200   1002 Alaba… Norm… AL     35762 Southe… www.aa… galil… NA     
## 2 100663  105200   1052 Unive… Birm… AL     3529… Southe… www.ua… www.c… NA     
## 3 100690 2503400  25034 Amrid… Mont… AL     3611… Southe… www.am… tcc.n… NA     
## 4 100706  105500   1055 Unive… Hunt… AL     35899 Southe… www.ua… finai… NA     
## 5 100724  100500   1005 Alaba… Mont… AL     3610… Southe… www.al… www.a… NA     
## 6 100751  105100   1051 The U… Tusc… AL     3548… Southe… www.ua… oira.… NA     
## # … with 1,718 more variables: HCM2 <dbl>, main <dbl>, NUMBRANCH <dbl>,
## #   PREDDEG <dbl>, HIGHDEG <dbl>, CONTROL <dbl>, st_fips <dbl>, region <dbl>,
## #   LOCALE <dbl>, locale2 <lgl>, LATITUDE <dbl>, LONGITUDE <dbl>,
## #   CCBASIC <dbl>, CCUGPROF <dbl>, CCSIZSET <dbl>, HBCU <dbl>, PBI <dbl>,
## #   ANNHI <dbl>, TRIBAL <dbl>, AANAPII <dbl>, HSI <dbl>, NANTI <dbl>,
## #   MENONLY <dbl>, WOMENONLY <dbl>, RELAFFIL <dbl>, ADM_RATE <dbl>,
## #   ADM_RATE_ALL <dbl>, SATVR25 <dbl>, SATVR75 <dbl>, SATMT25 <dbl>, …

Joining Data

Listedcolleges <- crime %>% inner_join(scorecard, by="City") %>% filter(!is.na(ViolentCrime) & !is.na(City) & !is.na(INSTNM) & !is.na(AccredAgency) & !is.na(SATMTMID)) %>% select(Institution_Name = "INSTNM", City = "City", SAT_Math_Mid = "SATMTMID", Violent_Crime_Rate = "ViolentCrime", Accreditation_Agency = "AccredAgency")

First Few Rows of Potential Schools

head(Listedcolleges)
## # A tibble: 6 × 5
##   Institution_Name                               City    SAT_M…¹ Viole…² Accre…³
##   <chr>                                          <chr>     <dbl>   <dbl> <chr>  
## 1 Abilene Christian University                   Abilene     545    412. Southe…
## 2 Hardin-Simmons University                      Abilene     520    412. Southe…
## 3 McMurry University                             Abilene     495    412. Southe…
## 4 University of Akron Main Campus                Akron       540    238. North …
## 5 Albany State University                        Albany      435    668. Southe…
## 6 Albany College of Pharmacy and Health Sciences Albany      615    668. Middle…
## # … with abbreviated variable names ¹​SAT_Math_Mid, ²​Violent_Crime_Rate,
## #   ³​Accreditation_Agency

Median SAT Score

mediansatscore <- median(Listedcolleges$SAT_Math_Mid) 
mediansatscore
## [1] 530

Crime Rate Median

CRmedian <- median(Listedcolleges$Violent_Crime_Rate)
CRmedian
## [1] 392.9
listedcolleges_filtered <- Listedcolleges %>% filter(Violent_Crime_Rate < CRmedian) %>% filter(SAT_Math_Mid > mediansatscore) %>% filter(Accreditation_Agency == "Southern Association of Colleges and Schools Commission on Colleges") 
Bestcolleges <- listedcolleges_filtered[c("Institution_Name", "City", "SAT_Math_Mid", "Violent_Crime_Rate", "Accreditation_Agency")] %>% arrange(desc(SAT_Math_Mid))

Number of Schools Fitting Criteria

Numberofschools <- nrow(Bestcolleges)
Numberofschools
## [1] 35

Table of Schools Fitting Criteria

print(as_tibble(Bestcolleges), n = 35)
## # A tibble: 35 × 5
##    Institution_Name                          City        SAT_M…¹ Viole…² Accre…³
##    <chr>                                     <chr>         <dbl>   <dbl> <chr>  
##  1 Washington and Lee University             Lexington       690    256. Southe…
##  2 University of Virginia-Main Campus        Charlottes…     685    165. Southe…
##  3 Southern Methodist University             Dallas          660    338. Southe…
##  4 Southern Methodist University             Dallas          660    331. Southe…
##  5 The University of Texas at Austin         Austin          650    288. Southe…
##  6 University of Florida                     Gainesville     640    189. Southe…
##  7 University of Georgia                     Athens          625    326. Southe…
##  8 Austin College                            Sherman         615    327. Southe…
##  9 Texas A & M University-College Station    College St…     605    316. Southe…
## 10 Texas Christian University                Fort Worth      600    351. Southe…
## 11 University of North Carolina at Asheville Asheville       595    202  Southe…
## 12 Mississippi University for Women          Columbus        595    109. Southe…
## 13 Mississippi University for Women          Columbus        595    298  Southe…
## 14 Union University                          Jackson         595    359. Southe…
## 15 LeTourneau University                     Longview        595    302. Southe…
## 16 University of South Florida-Main Campus   Tampa           595    384. Southe…
## 17 Mercer University                         Macon           590    320. Southe…
## 18 James Madison University                  Harrisonbu…     580    146. Southe…
## 19 Virginia Military Institute               Lexington       580    256. Southe…
## 20 University of North Florida               Jacksonvil…     575    175. Southe…
## 21 Millsaps College                          Jackson         570    359. Southe…
## 22 Transylvania University                   Lexington       570    256. Southe…
## 23 University of Kentucky                    Lexington       565    256. Southe…
## 24 Dallas Baptist University                 Dallas          562    338. Southe…
## 25 Dallas Baptist University                 Dallas          562    331. Southe…
## 26 Saint Edward's University                 Austin          560    288. Southe…
## 27 University of Louisiana at Monroe         Monroe          560    236. Southe…
## 28 Virginia Commonwealth University          Richmond        550    236. Southe…
## 29 University of Louisiana at Lafayette      Lafayette       545    292  Southe…
## 30 Florida Southern College                  Lakeland        545    340. Southe…
## 31 The University of Texas at Tyler          Tyler           544    296. Southe…
## 32 Roanoke College                           Salem           540    240. Southe…
## 33 The University of Tampa                   Tampa           540    384. Southe…
## 34 Bryan College-Dayton                      Dayton          535    305. Southe…
## 35 Randolph College                          Lynchburg       535    180. Southe…
## # … with abbreviated variable names ¹​SAT_Math_Mid, ²​Violent_Crime_Rate,
## #   ³​Accreditation_Agency

Graph of Top 20 Colleges based on SAT Score

ggplot(Bestcolleges[1:20, ], aes(x=reorder(Institution_Name, SAT_Math_Mid), y=SAT_Math_Mid)) + geom_bar(position="dodge", stat="identity", fill="lightblue") + coord_flip() + geom_text(color="black", size=4, aes(label=SAT_Math_Mid), position=position_dodge(width=1.0), hjust=3.0) + labs(x = "Institutions", y = "SAT Score") + ggtitle("Top 20 Colleges by SAT Score") + theme(plot.title = element_text(hjust = .1))