| title: “Assignment 3” |
| author: “Tyler Mitchell” |
| output: html_document |
Data Import
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(readr)
library(ggplot2)
crime <- read_csv("http://jsuleiman.com/datasets/Crime_2015.csv")
## Rows: 378 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): MSA, State, City
## dbl (5): Murder, Rape, Robbery, AggravatedAssault, MotorVehicleTheft
## num (4): ViolentCrime, PropertyCrime, Burglary, Theft
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
crime <- crime %>% drop_na()
Displaying First Few Rows by Crime
head(crime)
## # A tibble: 6 × 12
## MSA Viole…¹ Murder Rape Robbery Aggra…² Prope…³ Burgl…⁴ Theft Motor…⁵ State
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 Abil… 412. 5.3 56 78.4 273. 3609 852 2494. 263. TX
## 2 Akro… 238. 5.1 38.2 75.2 120. 2552. 575. 1853 124. OH
## 3 Alba… 668. 7.8 30.4 158. 472. 3894. 1100. 2653. 142. GA
## 4 Alba… 114. 2.5 28.2 20.7 63 3208. 485. 2476. 248. OR
## 5 Albu… 793. 6.1 63.8 207. 516 4608. 883. 3048. 677. NM
## 6 Alex… 936. 4.5 35.5 120. 776. 4566. 1167 3084. 315. LA
## # … with 1 more variable: City <chr>, and abbreviated variable names
## # ¹ViolentCrime, ²AggravatedAssault, ³PropertyCrime, ⁴Burglary,
## # ⁵MotorVehicleTheft
Data Import
scorecard <- read_csv("https://jsuleiman.com/datasets/CollegeScorecard.csv")%>% rename(City=CITY)
## Rows: 7804 Columns: 1729
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (178): INSTNM, CITY, STABBR, ZIP, AccredAgency, INSTURL, NPCURL, RPY_3Y...
## dbl (377): UNITID, OPEID, opeid6, HCM2, main, NUMBRANCH, PREDDEG, HIGHDEG, ...
## lgl (1174): sch_deg, locale2, UG, UGDS_WHITENH, UGDS_BLACKNH, UGDS_API, UGDS...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Displaying First Few Rows of the Scorecard
head(scorecard)
## # A tibble: 6 × 1,729
## UNITID OPEID opeid6 INSTNM City STABBR ZIP Accre…¹ INSTURL NPCURL sch_deg
## <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <lgl>
## 1 100654 100200 1002 Alaba… Norm… AL 35762 Southe… www.aa… galil… NA
## 2 100663 105200 1052 Unive… Birm… AL 3529… Southe… www.ua… www.c… NA
## 3 100690 2503400 25034 Amrid… Mont… AL 3611… Southe… www.am… tcc.n… NA
## 4 100706 105500 1055 Unive… Hunt… AL 35899 Southe… www.ua… finai… NA
## 5 100724 100500 1005 Alaba… Mont… AL 3610… Southe… www.al… www.a… NA
## 6 100751 105100 1051 The U… Tusc… AL 3548… Southe… www.ua… oira.… NA
## # … with 1,718 more variables: HCM2 <dbl>, main <dbl>, NUMBRANCH <dbl>,
## # PREDDEG <dbl>, HIGHDEG <dbl>, CONTROL <dbl>, st_fips <dbl>, region <dbl>,
## # LOCALE <dbl>, locale2 <lgl>, LATITUDE <dbl>, LONGITUDE <dbl>,
## # CCBASIC <dbl>, CCUGPROF <dbl>, CCSIZSET <dbl>, HBCU <dbl>, PBI <dbl>,
## # ANNHI <dbl>, TRIBAL <dbl>, AANAPII <dbl>, HSI <dbl>, NANTI <dbl>,
## # MENONLY <dbl>, WOMENONLY <dbl>, RELAFFIL <dbl>, ADM_RATE <dbl>,
## # ADM_RATE_ALL <dbl>, SATVR25 <dbl>, SATVR75 <dbl>, SATMT25 <dbl>, …
Joining Data
Listedcolleges <- crime %>% inner_join(scorecard, by="City") %>% filter(!is.na(ViolentCrime) & !is.na(City) & !is.na(INSTNM) & !is.na(AccredAgency) & !is.na(SATMTMID)) %>% select(Institution_Name = "INSTNM", City = "City", SAT_Math_Mid = "SATMTMID", Violent_Crime_Rate = "ViolentCrime", Accreditation_Agency = "AccredAgency")
First Few Rows of Potential Schools
head(Listedcolleges)
## # A tibble: 6 × 5
## Institution_Name City SAT_M…¹ Viole…² Accre…³
## <chr> <chr> <dbl> <dbl> <chr>
## 1 Abilene Christian University Abilene 545 412. Southe…
## 2 Hardin-Simmons University Abilene 520 412. Southe…
## 3 McMurry University Abilene 495 412. Southe…
## 4 University of Akron Main Campus Akron 540 238. North …
## 5 Albany State University Albany 435 668. Southe…
## 6 Albany College of Pharmacy and Health Sciences Albany 615 668. Middle…
## # … with abbreviated variable names ¹SAT_Math_Mid, ²Violent_Crime_Rate,
## # ³Accreditation_Agency
Number of Schools Fitting Criteria
Numberofschools <- nrow(Bestcolleges)
Numberofschools
## [1] 35
Table of Schools Fitting Criteria
print(as_tibble(Bestcolleges), n = 35)
## # A tibble: 35 × 5
## Institution_Name City SAT_M…¹ Viole…² Accre…³
## <chr> <chr> <dbl> <dbl> <chr>
## 1 Washington and Lee University Lexington 690 256. Southe…
## 2 University of Virginia-Main Campus Charlottes… 685 165. Southe…
## 3 Southern Methodist University Dallas 660 338. Southe…
## 4 Southern Methodist University Dallas 660 331. Southe…
## 5 The University of Texas at Austin Austin 650 288. Southe…
## 6 University of Florida Gainesville 640 189. Southe…
## 7 University of Georgia Athens 625 326. Southe…
## 8 Austin College Sherman 615 327. Southe…
## 9 Texas A & M University-College Station College St… 605 316. Southe…
## 10 Texas Christian University Fort Worth 600 351. Southe…
## 11 University of North Carolina at Asheville Asheville 595 202 Southe…
## 12 Mississippi University for Women Columbus 595 109. Southe…
## 13 Mississippi University for Women Columbus 595 298 Southe…
## 14 Union University Jackson 595 359. Southe…
## 15 LeTourneau University Longview 595 302. Southe…
## 16 University of South Florida-Main Campus Tampa 595 384. Southe…
## 17 Mercer University Macon 590 320. Southe…
## 18 James Madison University Harrisonbu… 580 146. Southe…
## 19 Virginia Military Institute Lexington 580 256. Southe…
## 20 University of North Florida Jacksonvil… 575 175. Southe…
## 21 Millsaps College Jackson 570 359. Southe…
## 22 Transylvania University Lexington 570 256. Southe…
## 23 University of Kentucky Lexington 565 256. Southe…
## 24 Dallas Baptist University Dallas 562 338. Southe…
## 25 Dallas Baptist University Dallas 562 331. Southe…
## 26 Saint Edward's University Austin 560 288. Southe…
## 27 University of Louisiana at Monroe Monroe 560 236. Southe…
## 28 Virginia Commonwealth University Richmond 550 236. Southe…
## 29 University of Louisiana at Lafayette Lafayette 545 292 Southe…
## 30 Florida Southern College Lakeland 545 340. Southe…
## 31 The University of Texas at Tyler Tyler 544 296. Southe…
## 32 Roanoke College Salem 540 240. Southe…
## 33 The University of Tampa Tampa 540 384. Southe…
## 34 Bryan College-Dayton Dayton 535 305. Southe…
## 35 Randolph College Lynchburg 535 180. Southe…
## # … with abbreviated variable names ¹SAT_Math_Mid, ²Violent_Crime_Rate,
## # ³Accreditation_Agency
Graph of Top 20 Colleges based on SAT Score
ggplot(Bestcolleges[1:20, ], aes(x=reorder(Institution_Name, SAT_Math_Mid), y=SAT_Math_Mid)) + geom_bar(position="dodge", stat="identity", fill="lightblue") + coord_flip() + geom_text(color="black", size=4, aes(label=SAT_Math_Mid), position=position_dodge(width=1.0), hjust=3.0) + labs(x = "Institutions", y = "SAT Score") + ggtitle("Top 20 Colleges by SAT Score") + theme(plot.title = element_text(hjust = .1))
