install.packages(‘readr’) install.packages(‘tidyverse’)

library(readr)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ dplyr   1.0.2
## ✓ tibble  3.0.4     ✓ stringr 1.4.0
## ✓ tidyr   1.1.2     ✓ forcats 0.5.0
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()

Reading CSV

CollegeScorecard <- read_csv("http://jsuleiman.com/datasets/CollegeScorecard.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_logical(),
##   UNITID = col_double(),
##   OPEID = col_double(),
##   opeid6 = col_double(),
##   INSTNM = col_character(),
##   CITY = col_character(),
##   STABBR = col_character(),
##   ZIP = col_character(),
##   AccredAgency = col_character(),
##   INSTURL = col_character(),
##   NPCURL = col_character(),
##   HCM2 = col_double(),
##   main = col_double(),
##   NUMBRANCH = col_double(),
##   PREDDEG = col_double(),
##   HIGHDEG = col_double(),
##   CONTROL = col_double(),
##   st_fips = col_double(),
##   region = col_double(),
##   LOCALE = col_double(),
##   LATITUDE = col_double()
##   # ... with 535 more columns
## )
## ℹ Use `spec()` for the full column specifications.
Crime_2015 <- read_csv("http://jsuleiman.com/datasets/Crime_2015.csv")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   MSA = col_character(),
##   ViolentCrime = col_number(),
##   Murder = col_double(),
##   Rape = col_double(),
##   Robbery = col_double(),
##   AggravatedAssault = col_double(),
##   PropertyCrime = col_number(),
##   Burglary = col_number(),
##   Theft = col_number(),
##   MotorVehicleTheft = col_double(),
##   State = col_character(),
##   City = col_character()
## )

Criteria 1: violent crime rate below the median of the dataset

median(Crime_2015$ViolentCrime,na.rm = TRUE)
## [1] 333.7
AllowableCrimeRate <- filter(Crime_2015, ViolentCrime < 333.7, na.rm = TRUE)

Criteria 2&3: Accredited by SACSCC AND midpoint of SAT scores above median of the dataset

median(CollegeScorecard$SATMTMID,na.rm = TRUE)
## [1] 520
AccreditedSouthSATMath <- filter(CollegeScorecard, AccredAgency == "Southern Association of Colleges and Schools Commission on Colleges", SATMTMID > 520)

Join Data matching criteria

TopCollegeList <- AllowableCrimeRate %>% inner_join(AccreditedSouthSATMath, by = c("City" = "CITY"))
TopCollegeList %>% select(INSTNM)
## # A tibble: 30 x 1
##    INSTNM                                   
##    <chr>                                    
##  1 University of North Carolina at Asheville
##  2 University of Georgia                    
##  3 Concordia University-Texas               
##  4 Saint Edward's University                
##  5 The University of Texas at Austin        
##  6 University of Virginia-Main Campus       
##  7 Texas A & M University-College Station   
##  8 Mississippi University for Women         
##  9 Mississippi University for Women         
## 10 Dallas Baptist University                
## # … with 20 more rows

Count of colleges fitting criteria

count(TopCollegeList)
## # A tibble: 1 x 1
##       n
##   <int>
## 1    30