R Markdown

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(dplyr)
library(knitr)

Crime_2015 <- read_csv("http://jsuleiman.com/datasets/Crime_2015.csv", show_col_types = FALSE)

#Shows first 6 values
head(Crime_2015)
## # A tibble: 6 × 12
##   MSA   Viole…¹ Murder  Rape Robbery Aggra…² Prope…³ Burgl…⁴ Theft Motor…⁵ State
##   <chr>   <dbl>  <dbl> <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <dbl>   <dbl> <chr>
## 1 Abil…    412.    5.3  56      78.4    273.   3609     852  2494.    263. TX   
## 2 Akro…    238.    5.1  38.2    75.2    120.   2552.    575. 1853     124. OH   
## 3 Alba…    668.    7.8  30.4   158.     472.   3894.   1100. 2653.    142. GA   
## 4 Alba…    114.    2.5  28.2    20.7     63    3208.    485. 2476.    248. OR   
## 5 Albu…    793.    6.1  63.8   207.     516    4608.    883. 3048.    677. NM   
## 6 Alex…    936.    4.5  35.5   120.     776.   4566.   1167  3084.    315. LA   
## # … with 1 more variable: City <chr>, and abbreviated variable names
## #   ¹​ViolentCrime, ²​AggravatedAssault, ³​PropertyCrime, ⁴​Burglary,
## #   ⁵​MotorVehicleTheft
#Renames City for CITY, allowing innerjoin to join on City
CollegeScorecard <- read_csv("http://jsuleiman.com/datasets/CollegeScorecard.csv", show_col_types = FALSE) %>% rename(City=CITY)

#Shows first 6 Institutions
head(CollegeScorecard)
## # A tibble: 6 × 1,729
##   UNITID   OPEID opeid6 INSTNM City  STABBR ZIP   Accre…¹ INSTURL NPCURL sch_deg
##    <dbl>   <dbl>  <dbl> <chr>  <chr> <chr>  <chr> <chr>   <chr>   <chr>  <lgl>  
## 1 100654  100200   1002 Alaba… Norm… AL     35762 Southe… www.aa… galil… NA     
## 2 100663  105200   1052 Unive… Birm… AL     3529… Southe… www.ua… www.c… NA     
## 3 100690 2503400  25034 Amrid… Mont… AL     3611… Southe… www.am… tcc.n… NA     
## 4 100706  105500   1055 Unive… Hunt… AL     35899 Southe… www.ua… finai… NA     
## 5 100724  100500   1005 Alaba… Mont… AL     3610… Southe… www.al… www.a… NA     
## 6 100751  105100   1051 The U… Tusc… AL     3548… Southe… www.ua… oira.… NA     
## # … with 1,718 more variables: HCM2 <dbl>, main <dbl>, NUMBRANCH <dbl>,
## #   PREDDEG <dbl>, HIGHDEG <dbl>, CONTROL <dbl>, st_fips <dbl>, region <dbl>,
## #   LOCALE <dbl>, locale2 <lgl>, LATITUDE <dbl>, LONGITUDE <dbl>,
## #   CCBASIC <dbl>, CCUGPROF <dbl>, CCSIZSET <dbl>, HBCU <dbl>, PBI <dbl>,
## #   ANNHI <dbl>, TRIBAL <dbl>, AANAPII <dbl>, HSI <dbl>, NANTI <dbl>,
## #   MENONLY <dbl>, WOMENONLY <dbl>, RELAFFIL <dbl>, ADM_RATE <dbl>,
## #   ADM_RATE_ALL <dbl>, SATVR25 <dbl>, SATVR75 <dbl>, SATMT25 <dbl>, …
#Innerjoin on CollegeScorecard and Crime_2015. Filters on City, Institution Name, Accredited Agency and SAT Midpoint datasets. 
CollegesList <- Crime_2015 %>% inner_join(CollegeScorecard, by="City") %>% 
  filter(!is.na(ViolentCrime) 
         & !is.na(City) 
         & !is.na(INSTNM) 
         & !is.na(AccredAgency) 
         & !is.na(SATMTMID)) %>% 
  
  
  #Selects columns in dataset and renames accordingly
  select(Institution = "INSTNM", City = "City", SAT_Median = "SATMTMID", Violent_Crime_Rate = "ViolentCrime", Accred_Agency = "AccredAgency")

#Drops any NA Values
drop_na(CollegesList)
## # A tibble: 645 × 5
##    Institution                                    City   SAT_M…¹ Viole…² Accre…³
##    <chr>                                          <chr>    <dbl>   <dbl> <chr>  
##  1 Abilene Christian University                   Abile…     545    412. Southe…
##  2 Hardin-Simmons University                      Abile…     520    412. Southe…
##  3 McMurry University                             Abile…     495    412. Southe…
##  4 University of Akron Main Campus                Akron      540    238. North …
##  5 Albany State University                        Albany     435    668. Southe…
##  6 Albany College of Pharmacy and Health Sciences Albany     615    668. Middle…
##  7 Maria College of Albany                        Albany     440    668. Middle…
##  8 SUNY at Albany                                 Albany     565    668. Middle…
##  9 Albany State University                        Albany     435    114. Southe…
## 10 Albany College of Pharmacy and Health Sciences Albany     615    114. Middle…
## # … with 635 more rows, and abbreviated variable names ¹​SAT_Median,
## #   ²​Violent_Crime_Rate, ³​Accred_Agency
CollegesList
## # A tibble: 645 × 5
##    Institution                                    City   SAT_M…¹ Viole…² Accre…³
##    <chr>                                          <chr>    <dbl>   <dbl> <chr>  
##  1 Abilene Christian University                   Abile…     545    412. Southe…
##  2 Hardin-Simmons University                      Abile…     520    412. Southe…
##  3 McMurry University                             Abile…     495    412. Southe…
##  4 University of Akron Main Campus                Akron      540    238. North …
##  5 Albany State University                        Albany     435    668. Southe…
##  6 Albany College of Pharmacy and Health Sciences Albany     615    668. Middle…
##  7 Maria College of Albany                        Albany     440    668. Middle…
##  8 SUNY at Albany                                 Albany     565    668. Middle…
##  9 Albany State University                        Albany     435    114. Southe…
## 10 Albany College of Pharmacy and Health Sciences Albany     615    114. Middle…
## # … with 635 more rows, and abbreviated variable names ¹​SAT_Median,
## #   ²​Violent_Crime_Rate, ³​Accred_Agency
#Shows first 6 Institutions by joined columns 
head(CollegesList)
## # A tibble: 6 × 5
##   Institution                                    City    SAT_M…¹ Viole…² Accre…³
##   <chr>                                          <chr>     <dbl>   <dbl> <chr>  
## 1 Abilene Christian University                   Abilene     545    412. Southe…
## 2 Hardin-Simmons University                      Abilene     520    412. Southe…
## 3 McMurry University                             Abilene     495    412. Southe…
## 4 University of Akron Main Campus                Akron       540    238. North …
## 5 Albany State University                        Albany      435    668. Southe…
## 6 Albany College of Pharmacy and Health Sciences Albany      615    668. Middle…
## # … with abbreviated variable names ¹​SAT_Median, ²​Violent_Crime_Rate,
## #   ³​Accred_Agency
#Median SAT scores from SAT Median DATASET
MedianSAT <- median(CollegesList$SAT_Median) 
MedianSAT
## [1] 530
#Median Crime Rate for CollegesList Joined Dataset
MedianCrimeRate <- median(CollegesList$Violent_Crime_Rate)
MedianCrimeRate
## [1] 395.3
#Filtered on Violent Crime values less than the median and SAT Midpoint dataset is filtered on values above median
TopColleges_Crime <- CollegesList %>% 
  
  filter(Violent_Crime_Rate < MedianCrimeRate) %>% 
  
  filter(SAT_Median > MedianSAT) %>% 
  
  filter(Accred_Agency == "Southern Association of Colleges and Schools Commission on Colleges")

#TopColleges_Crime_1 is the assigned dataframe for the SAT Median column to sort greatest to least
TopColleges_Crime_1 <- TopColleges_Crime[c("Institution", "City", "SAT_Median", "Violent_Crime_Rate", "Accred_Agency")] %>% arrange(desc(SAT_Median))

drop_na(TopColleges_Crime_1)
## # A tibble: 36 × 5
##    Institution                            City           SAT_M…¹ Viole…² Accre…³
##    <chr>                                  <chr>            <dbl>   <dbl> <chr>  
##  1 Washington and Lee University          Lexington          690    256. Southe…
##  2 University of Virginia-Main Campus     Charlottesvil…     685    165. Southe…
##  3 Southern Methodist University          Dallas             660    338. Southe…
##  4 Southern Methodist University          Dallas             660    331. Southe…
##  5 The University of Texas at Austin      Austin             650    288. Southe…
##  6 University of Florida                  Gainesville        640    189. Southe…
##  7 University of Georgia                  Athens             625    326. Southe…
##  8 Austin College                         Sherman            615    327. Southe…
##  9 Texas A & M University-College Station College Stati…     605    316. Southe…
## 10 Texas Christian University             Fort Worth         600    351. Southe…
## # … with 26 more rows, and abbreviated variable names ¹​SAT_Median,
## #   ²​Violent_Crime_Rate, ³​Accred_Agency
TopColleges_Crime_1
## # A tibble: 36 × 5
##    Institution                            City           SAT_M…¹ Viole…² Accre…³
##    <chr>                                  <chr>            <dbl>   <dbl> <chr>  
##  1 Washington and Lee University          Lexington          690    256. Southe…
##  2 University of Virginia-Main Campus     Charlottesvil…     685    165. Southe…
##  3 Southern Methodist University          Dallas             660    338. Southe…
##  4 Southern Methodist University          Dallas             660    331. Southe…
##  5 The University of Texas at Austin      Austin             650    288. Southe…
##  6 University of Florida                  Gainesville        640    189. Southe…
##  7 University of Georgia                  Athens             625    326. Southe…
##  8 Austin College                         Sherman            615    327. Southe…
##  9 Texas A & M University-College Station College Stati…     605    316. Southe…
## 10 Texas Christian University             Fort Worth         600    351. Southe…
## # … with 26 more rows, and abbreviated variable names ¹​SAT_Median,
## #   ²​Violent_Crime_Rate, ³​Accred_Agency
#Counts number of rows that fits criteria given the provided dataframe
CountofColleges <- nrow(TopColleges_Crime_1)

CountofColleges 
## [1] 36