R Markdown
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(dplyr)
library(knitr)
Crime_2015 <- read_csv("http://jsuleiman.com/datasets/Crime_2015.csv", show_col_types = FALSE)
#Shows first 6 values
head(Crime_2015)
## # A tibble: 6 × 12
## MSA Viole…¹ Murder Rape Robbery Aggra…² Prope…³ Burgl…⁴ Theft Motor…⁵ State
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Abil… 412. 5.3 56 78.4 273. 3609 852 2494. 263. TX
## 2 Akro… 238. 5.1 38.2 75.2 120. 2552. 575. 1853 124. OH
## 3 Alba… 668. 7.8 30.4 158. 472. 3894. 1100. 2653. 142. GA
## 4 Alba… 114. 2.5 28.2 20.7 63 3208. 485. 2476. 248. OR
## 5 Albu… 793. 6.1 63.8 207. 516 4608. 883. 3048. 677. NM
## 6 Alex… 936. 4.5 35.5 120. 776. 4566. 1167 3084. 315. LA
## # … with 1 more variable: City <chr>, and abbreviated variable names
## # ¹ViolentCrime, ²AggravatedAssault, ³PropertyCrime, ⁴Burglary,
## # ⁵MotorVehicleTheft
#Renames City for CITY, allowing innerjoin to join on City
CollegeScorecard <- read_csv("http://jsuleiman.com/datasets/CollegeScorecard.csv", show_col_types = FALSE) %>% rename(City=CITY)
#Shows first 6 Institutions
head(CollegeScorecard)
## # A tibble: 6 × 1,729
## UNITID OPEID opeid6 INSTNM City STABBR ZIP Accre…¹ INSTURL NPCURL sch_deg
## <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <lgl>
## 1 100654 100200 1002 Alaba… Norm… AL 35762 Southe… www.aa… galil… NA
## 2 100663 105200 1052 Unive… Birm… AL 3529… Southe… www.ua… www.c… NA
## 3 100690 2503400 25034 Amrid… Mont… AL 3611… Southe… www.am… tcc.n… NA
## 4 100706 105500 1055 Unive… Hunt… AL 35899 Southe… www.ua… finai… NA
## 5 100724 100500 1005 Alaba… Mont… AL 3610… Southe… www.al… www.a… NA
## 6 100751 105100 1051 The U… Tusc… AL 3548… Southe… www.ua… oira.… NA
## # … with 1,718 more variables: HCM2 <dbl>, main <dbl>, NUMBRANCH <dbl>,
## # PREDDEG <dbl>, HIGHDEG <dbl>, CONTROL <dbl>, st_fips <dbl>, region <dbl>,
## # LOCALE <dbl>, locale2 <lgl>, LATITUDE <dbl>, LONGITUDE <dbl>,
## # CCBASIC <dbl>, CCUGPROF <dbl>, CCSIZSET <dbl>, HBCU <dbl>, PBI <dbl>,
## # ANNHI <dbl>, TRIBAL <dbl>, AANAPII <dbl>, HSI <dbl>, NANTI <dbl>,
## # MENONLY <dbl>, WOMENONLY <dbl>, RELAFFIL <dbl>, ADM_RATE <dbl>,
## # ADM_RATE_ALL <dbl>, SATVR25 <dbl>, SATVR75 <dbl>, SATMT25 <dbl>, …
#Innerjoin on CollegeScorecard and Crime_2015. Filters on City, Institution Name, Accredited Agency and SAT Midpoint datasets.
CollegesList <- Crime_2015 %>% inner_join(CollegeScorecard, by="City") %>%
filter(!is.na(ViolentCrime)
& !is.na(City)
& !is.na(INSTNM)
& !is.na(AccredAgency)
& !is.na(SATMTMID)) %>%
#Selects columns in dataset and renames accordingly
select(Institution = "INSTNM", City = "City", SAT_Median = "SATMTMID", Violent_Crime_Rate = "ViolentCrime", Accred_Agency = "AccredAgency")
#Drops any NA Values
drop_na(CollegesList)
## # A tibble: 645 × 5
## Institution City SAT_M…¹ Viole…² Accre…³
## <chr> <chr> <dbl> <dbl> <chr>
## 1 Abilene Christian University Abile… 545 412. Southe…
## 2 Hardin-Simmons University Abile… 520 412. Southe…
## 3 McMurry University Abile… 495 412. Southe…
## 4 University of Akron Main Campus Akron 540 238. North …
## 5 Albany State University Albany 435 668. Southe…
## 6 Albany College of Pharmacy and Health Sciences Albany 615 668. Middle…
## 7 Maria College of Albany Albany 440 668. Middle…
## 8 SUNY at Albany Albany 565 668. Middle…
## 9 Albany State University Albany 435 114. Southe…
## 10 Albany College of Pharmacy and Health Sciences Albany 615 114. Middle…
## # … with 635 more rows, and abbreviated variable names ¹SAT_Median,
## # ²Violent_Crime_Rate, ³Accred_Agency
CollegesList
## # A tibble: 645 × 5
## Institution City SAT_M…¹ Viole…² Accre…³
## <chr> <chr> <dbl> <dbl> <chr>
## 1 Abilene Christian University Abile… 545 412. Southe…
## 2 Hardin-Simmons University Abile… 520 412. Southe…
## 3 McMurry University Abile… 495 412. Southe…
## 4 University of Akron Main Campus Akron 540 238. North …
## 5 Albany State University Albany 435 668. Southe…
## 6 Albany College of Pharmacy and Health Sciences Albany 615 668. Middle…
## 7 Maria College of Albany Albany 440 668. Middle…
## 8 SUNY at Albany Albany 565 668. Middle…
## 9 Albany State University Albany 435 114. Southe…
## 10 Albany College of Pharmacy and Health Sciences Albany 615 114. Middle…
## # … with 635 more rows, and abbreviated variable names ¹SAT_Median,
## # ²Violent_Crime_Rate, ³Accred_Agency
#Shows first 6 Institutions by joined columns
head(CollegesList)
## # A tibble: 6 × 5
## Institution City SAT_M…¹ Viole…² Accre…³
## <chr> <chr> <dbl> <dbl> <chr>
## 1 Abilene Christian University Abilene 545 412. Southe…
## 2 Hardin-Simmons University Abilene 520 412. Southe…
## 3 McMurry University Abilene 495 412. Southe…
## 4 University of Akron Main Campus Akron 540 238. North …
## 5 Albany State University Albany 435 668. Southe…
## 6 Albany College of Pharmacy and Health Sciences Albany 615 668. Middle…
## # … with abbreviated variable names ¹SAT_Median, ²Violent_Crime_Rate,
## # ³Accred_Agency
#Median SAT scores from SAT Median DATASET
MedianSAT <- median(CollegesList$SAT_Median)
MedianSAT
## [1] 530
#Median Crime Rate for CollegesList Joined Dataset
MedianCrimeRate <- median(CollegesList$Violent_Crime_Rate)
MedianCrimeRate
## [1] 395.3
#Filtered on Violent Crime values less than the median and SAT Midpoint dataset is filtered on values above median
TopColleges_Crime <- CollegesList %>%
filter(Violent_Crime_Rate < MedianCrimeRate) %>%
filter(SAT_Median > MedianSAT) %>%
filter(Accred_Agency == "Southern Association of Colleges and Schools Commission on Colleges")
#TopColleges_Crime_1 is the assigned dataframe for the SAT Median column to sort greatest to least
TopColleges_Crime_1 <- TopColleges_Crime[c("Institution", "City", "SAT_Median", "Violent_Crime_Rate", "Accred_Agency")] %>% arrange(desc(SAT_Median))
drop_na(TopColleges_Crime_1)
## # A tibble: 36 × 5
## Institution City SAT_M…¹ Viole…² Accre…³
## <chr> <chr> <dbl> <dbl> <chr>
## 1 Washington and Lee University Lexington 690 256. Southe…
## 2 University of Virginia-Main Campus Charlottesvil… 685 165. Southe…
## 3 Southern Methodist University Dallas 660 338. Southe…
## 4 Southern Methodist University Dallas 660 331. Southe…
## 5 The University of Texas at Austin Austin 650 288. Southe…
## 6 University of Florida Gainesville 640 189. Southe…
## 7 University of Georgia Athens 625 326. Southe…
## 8 Austin College Sherman 615 327. Southe…
## 9 Texas A & M University-College Station College Stati… 605 316. Southe…
## 10 Texas Christian University Fort Worth 600 351. Southe…
## # … with 26 more rows, and abbreviated variable names ¹SAT_Median,
## # ²Violent_Crime_Rate, ³Accred_Agency
TopColleges_Crime_1
## # A tibble: 36 × 5
## Institution City SAT_M…¹ Viole…² Accre…³
## <chr> <chr> <dbl> <dbl> <chr>
## 1 Washington and Lee University Lexington 690 256. Southe…
## 2 University of Virginia-Main Campus Charlottesvil… 685 165. Southe…
## 3 Southern Methodist University Dallas 660 338. Southe…
## 4 Southern Methodist University Dallas 660 331. Southe…
## 5 The University of Texas at Austin Austin 650 288. Southe…
## 6 University of Florida Gainesville 640 189. Southe…
## 7 University of Georgia Athens 625 326. Southe…
## 8 Austin College Sherman 615 327. Southe…
## 9 Texas A & M University-College Station College Stati… 605 316. Southe…
## 10 Texas Christian University Fort Worth 600 351. Southe…
## # … with 26 more rows, and abbreviated variable names ¹SAT_Median,
## # ²Violent_Crime_Rate, ³Accred_Agency
#Counts number of rows that fits criteria given the provided dataframe
CountofColleges <- nrow(TopColleges_Crime_1)
CountofColleges
## [1] 36