Import Libraries

library(DT)
library(tidyverse)
library(readxl)
library(dplyr)

Import Data

crime_stats <- read_csv("http://jsuleiman.com/datasets/Crime_2015.csv")
score_card_dict <-read_excel("CollegeScorecardDataDictionary.xlsx")
score_card <- read.csv("CollegeScorecard.csv")

We will conduct some analysis in this section to obtain a dataset that meets the criteria below:

  • A violent crime rate below the median of the dataset.
  • Accredited by Southern Association of Colleges and Schools Commission on Colleges
  • The midpoint of SAT scores at the institution (math) above the median of the dataset.
# Select only institutions that are Accredited by Southern Association of Colleges and Schools Commission on Colleges

score_card_2 <- score_card  %>% 
                filter(AccredAgency=='Southern Association of Colleges and Schools Commission on Colleges')

# Select only institutions where the midpoint of SAT scores at the institution (math) is above the median of the dataset.

score_card_3 <- score_card_2 %>% drop_na(SATMTMID)
score_card_3_median <- median(score_card_3$SATMTMID)
score_card_4 <- score_card_3 %>%  filter(SATMTMID > score_card_3_median)

# Select only Institutions where the violent crime rate is below the median of the dataset.
data <- left_join(score_card_4,crime_stats, by=c("CITY"="City","STABBR"="State"))
data_2 <- data %>% drop_na(ViolentCrime)
median_violent_crime_rate <- median(data_2$ViolentCrime)
data_3 <- data_2 %>% filter(ViolentCrime < median_violent_crime_rate)

#Select final report for Cornelia
data_final <- data_3 %>% select(INSTNM,CITY,ViolentCrime,AccredAgency,SATMTMID) 
data_final_2 <-   data_final[with(data_final, order(data_final$SATMTMID)), ]
row.names(data_final_2) <- NULL

The total number of Institutions that meets Cornelia’s criteria are:

length(data_final_2$SATMTMID)
## [1] 47

See table below showing the list of colleges that meets Cornelia’s criteria.

This table displays the name, city, violent crime rate, accrediting agency, and midpoint SAT math score, and is sorted by the midpoint SAT math score, from highest to lowest.

datatable(data_final_2)