#Kristen Sosulski
#Exercise 3.1
#1. Set working directory
setwd("~/Dropbox/R_Fundamentals")
#2. import csv file
library(readr)
march <- read_csv("march_madness.csv")
## Parsed with column specification:
## cols(
## RANK = col_integer(),
## PREVIOUS = col_integer(),
## SCHOOL = col_character(),
## CONFERENCE = col_character(),
## RECORD = col_character(),
## ROAD = col_character(),
## NEUTRAL = col_character(),
## HOME = col_character(),
## `NON-DI` = col_character()
## )
#3 View our data
# View(march)
march
## # A tibble: 349 x 9
## RANK PREVIOUS SCHOOL CONFERENCE RECORD ROAD NEUTRAL HOME `NON-DI`
## <int> <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 2 Florida Southeaste… 32-2 10-2 5- 0 17-0 0-0
## 2 2 1 Arizona Pac-12 30-4 8-3 4- 1 18-0 0-0
## 3 3 3 Kansas Big 12 24-9 5-6 4- 2 15-1 0-0
## 4 4 4 Wichita… Missouri V… 34-0 12-0 5- 0 16-0 1-0
## 5 5 5 Villano… Big East 28-4 10-2 3- 1 15-1 0-0
## 6 6 6 Wiscons… Big Ten 26-7 8-3 4- 1 14-3 0-0
## 7 7 8 Iowa St. Big 12 26-7 4-6 7- 0 15-1 0-0
## 8 8 11 Virginia Atlantic C… 28-6 7-4 6- 0 15-2 0-0
## 9 9 7 Duke Atlantic C… 26-8 4-5 5- 3 17-0 0-0
## 10 10 10 Creight… Big East 26-7 7-4 3- 3 16-0 0-0
## # ... with 339 more rows
#4 Print the number of rows and columns
dim(march)
## [1] 349 9
#5 Print the column names
names(march)
## [1] "RANK" "PREVIOUS" "SCHOOL" "CONFERENCE" "RECORD"
## [6] "ROAD" "NEUTRAL" "HOME" "NON-DI"
#6 Change the column names to lowercase
names(march) <- tolower(names(march))
names(march)
## [1] "rank" "previous" "school" "conference" "record"
## [6] "road" "neutral" "home" "non-di"
#7 Explore the variable types
str(march)
## Classes 'tbl_df', 'tbl' and 'data.frame': 349 obs. of 9 variables:
## $ rank : int 1 2 3 4 5 6 7 8 9 10 ...
## $ previous : int 2 1 3 4 5 6 8 11 7 10 ...
## $ school : chr "Florida" "Arizona" "Kansas" "Wichita St." ...
## $ conference: chr "Southeastern" "Pac-12" "Big 12" "Missouri Valley" ...
## $ record : chr "32-2" "30-4" "24-9" "34-0" ...
## $ road : chr "10-2" "8-3" "5-6" "12-0" ...
## $ neutral : chr "5- 0" "4- 1" "4- 2" "5- 0" ...
## $ home : chr "17-0" "18-0" "15-1" "16-0" ...
## $ non-di : chr "0-0" "0-0" "0-0" "1-0" ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 9
## .. ..$ RANK : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ PREVIOUS : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ SCHOOL : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ CONFERENCE: list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ RECORD : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ ROAD : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ NEUTRAL : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ HOME : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ NON-DI : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
# How many different conferences are there?
#Approach 1
march$conference
## [1] "Southeastern" "Pac-12"
## [3] "Big 12" "Missouri Valley"
## [5] "Big East" "Big Ten"
## [7] "Big 12" "Atlantic Coast"
## [9] "Atlantic Coast" "Big East"
## [11] "Big Ten" "Mountain West"
## [13] "Atlantic 10" "Pac-12"
## [15] "Mountain West" "Atlantic Coast"
## [17] "Southeastern" "Big Ten"
## [19] "AAC" "West Coast"
## [21] "AAC" "AAC"
## [23] "Atlantic 10" "Big Ten"
## [25] "Atlantic Coast" "Big 12"
## [27] "Atlantic 10" "Pac-12"
## [29] "Atlantic 10" "Big 12"
## [31] "West Coast" "Atlantic 10"
## [33] "Conference USA" "Pac-12"
## [35] "Summit" "Big 12"
## [37] "AAC" "Mid-American"
## [39] "Atlantic Coast" "Big East"
## [41] "Pac-12" "Southeastern"
## [43] "Atlantic 10" "Pac-12"
## [45] "Big 12" "Ivy"
## [47] "Big East" "Big Ten"
## [49] "Southeastern" "Big Ten"
## [51] "Big 12" "Southland"
## [53] "AAC" "Atlantic Coast"
## [55] "Atlantic Coast" "Big Ten"
## [57] "OVC" "Horizon"
## [59] "Metro Atlantic" "Metro Atlantic"
## [61] "West Coast" "Conference USA"
## [63] "Pac-12" "Conference USA"
## [65] "Colonial" "Mid-American"
## [67] "West Coast" "Big East"
## [69] "Big East" "Big Ten"
## [71] "Western Athletic" "Missouri Valley"
## [73] "Conference USA" "Southeastern"
## [75] "Atlantic 10" "Sun Belt"
## [77] "Southeastern" "Atlantic Coast"
## [79] "Atlantic Coast" "Pac-12"
## [81] "Atlantic Sun" "Mountain West"
## [83] "Southeastern" "Patriot"
## [85] "Missouri Valley" "Mid-American"
## [87] "Mid-American" "Big 12"
## [89] "Atlantic 10" "Sun Belt"
## [91] "Horizon" "Big East"
## [93] "Mid-American" "Southeastern"
## [95] "Colonial" "Metro Atlantic"
## [97] "OVC" "Atlantic 10"
## [99] "Mid-Eastern" "Pac-12"
## [101] "Big Ten" "America East"
## [103] "Conference USA" "Pac-12"
## [105] "Metro Atlantic" "Mid-American"
## [107] "Big West" "Mountain West"
## [109] "Missouri Valley" "Atlantic Coast"
## [111] "Big West" "Southland"
## [113] "Atlantic Coast" "Big Ten"
## [115] "Summit" "Mountain West"
## [117] "Mountain West" "Patriot"
## [119] "Colonial" "Southeastern"
## [121] "Big 12" "Ivy"
## [123] "Big Ten" "Southeastern"
## [125] "Sun Belt" "Southern"
## [127] "Patriot" "Big East"
## [129] "Northeast" "Ivy"
## [131] "Horizon" "Colonial"
## [133] "West Coast" "Missouri Valley"
## [135] "Mountain West" "Atlantic Coast"
## [137] "Mountain West" "Summit"
## [139] "Summit" "Southeastern"
## [141] "OVC" "OVC"
## [143] "AAC" "Western Athletic"
## [145] "Big Ten" "Atlantic Sun"
## [147] "Ivy" "Southeastern"
## [149] "Big Sky" "Big East"
## [151] "Conference USA" "Conference USA"
## [153] "Southern" "Big East"
## [155] "Atlantic 10" "West Coast"
## [157] "Atlantic Coast" "Southland"
## [159] "West Coast" "Big West"
## [161] "Mid-American" "Southern"
## [163] "Atlantic Sun" "Atlantic 10"
## [165] "America East" "Big West"
## [167] "Patriot" "Big South"
## [169] "Southeastern" "Conference USA"
## [171] "Southwestern" "Big South"
## [173] "Horizon" "Missouri Valley"
## [175] "AAC" "Pac-12"
## [177] "Northeast" "West Coast"
## [179] "West Coast" "Sun Belt"
## [181] "Horizon" "Mountain West"
## [183] "Big Sky" "Horizon"
## [185] "America East" "Northeast"
## [187] "Sun Belt" "Atlantic 10"
## [189] "Northeast" "Atlantic Sun"
## [191] "Southland" "AAC"
## [193] "Metro Atlantic" "Mid-American"
## [195] "Conference USA" "Big South"
## [197] "Big Sky" "Metro Atlantic"
## [199] "Sun Belt" "Missouri Valley"
## [201] "Big South" "Conference USA"
## [203] "Western Athletic" "Mid-Eastern"
## [205] "Big Sky" "Atlantic Sun"
## [207] "Atlantic Coast" "Big West"
## [209] "Atlantic Sun" "West Coast"
## [211] "Big South" "Big 12"
## [213] "Mid-American" "Missouri Valley"
## [215] "OVC" "Conference USA"
## [217] "Mid-American" "Northeast"
## [219] "Summit" "AAC"
## [221] "Southern" "Colonial"
## [223] "Conference USA" "Pac-12"
## [225] "Mid-Eastern" "Atlantic 10"
## [227] "Mid-Eastern" "Big Sky"
## [229] "Ivy" "Horizon"
## [231] "Metro Atlantic" "AAC"
## [233] "Colonial" "America East"
## [235] "Patriot" "Patriot"
## [237] "Horizon" "Western Athletic"
## [239] "Southwestern" "Big South"
## [241] "Big South" "Colonial"
## [243] "Southern" "Sun Belt"
## [245] "Southeastern" "Atlantic Coast"
## [247] "Big Sky" "Southland"
## [249] "Metro Atlantic" "Mountain West"
## [251] "Western Athletic" "Big Sky"
## [253] "Big West" "Southwestern"
## [255] "Summit" "Patriot"
## [257] "OVC" "Patriot"
## [259] "Western Athletic" "Southland"
## [261] "Mid-Eastern" "Southland"
## [263] "Big Sky" "Southwestern"
## [265] "Missouri Valley" "Ivy"
## [267] "Western Athletic" "Big South"
## [269] "Northeast" "Conference USA"
## [271] "Southwestern" "Southland"
## [273] "Mid-Eastern" "Patriot"
## [275] "Big Sky" "Conference USA"
## [277] "Colonial" "Mountain West"
## [279] "Western Athletic" "Sun Belt"
## [281] "Colonial" "Atlantic Sun"
## [283] "America East" "Ivy"
## [285] "Sun Belt" "Big West"
## [287] "Big Sky" "Missouri Valley"
## [289] "Metro Atlantic" "Summit"
## [291] "OVC" "Metro Atlantic"
## [293] "Northeast" "Mid-Eastern"
## [295] "Mid-American" "Southwestern"
## [297] "Northeast" "Northeast"
## [299] "Conference USA" "OVC"
## [301] "Southern" "Metro Atlantic"
## [303] "Southern" "Southwestern"
## [305] "OVC" "Conference USA"
## [307] "Sun Belt" "Division I Independents"
## [309] "Mid-American" "Atlantic Sun"
## [311] "Big South" "Western Athletic"
## [313] "Southern" "Southwestern"
## [315] "Horizon" "Big South"
## [317] "OVC" "Southland"
## [319] "Big West" "OVC"
## [321] "Patriot" "Summit"
## [323] "Mid-Eastern" "Atlantic Sun"
## [325] "America East" "Big West"
## [327] "America East" "Southland"
## [329] "Mid-Eastern" "America East"
## [331] "Mid-Eastern" "OVC"
## [333] "Atlantic Sun" "Ivy"
## [335] "Southern" "Mid-Eastern"
## [337] "Big South" "Northeast"
## [339] "Southern" "America East"
## [341] "Southland" "Southwestern"
## [343] "Mid-Eastern" "Southland"
## [345] "Southern" "Big Sky"
## [347] "Mid-Eastern" "Southwestern"
## [349] "Big South"
unique(march$conference)
## [1] "Southeastern" "Pac-12"
## [3] "Big 12" "Missouri Valley"
## [5] "Big East" "Big Ten"
## [7] "Atlantic Coast" "Mountain West"
## [9] "Atlantic 10" "AAC"
## [11] "West Coast" "Conference USA"
## [13] "Summit" "Mid-American"
## [15] "Ivy" "Southland"
## [17] "OVC" "Horizon"
## [19] "Metro Atlantic" "Colonial"
## [21] "Western Athletic" "Sun Belt"
## [23] "Atlantic Sun" "Patriot"
## [25] "Mid-Eastern" "America East"
## [27] "Big West" "Southern"
## [29] "Northeast" "Big Sky"
## [31] "Big South" "Southwestern"
## [33] "Division I Independents"
length(unique(march$conference))
## [1] 33
# Approach 2
march$conference <- as.factor(march$conference)
march$conference
## [1] Southeastern Pac-12
## [3] Big 12 Missouri Valley
## [5] Big East Big Ten
## [7] Big 12 Atlantic Coast
## [9] Atlantic Coast Big East
## [11] Big Ten Mountain West
## [13] Atlantic 10 Pac-12
## [15] Mountain West Atlantic Coast
## [17] Southeastern Big Ten
## [19] AAC West Coast
## [21] AAC AAC
## [23] Atlantic 10 Big Ten
## [25] Atlantic Coast Big 12
## [27] Atlantic 10 Pac-12
## [29] Atlantic 10 Big 12
## [31] West Coast Atlantic 10
## [33] Conference USA Pac-12
## [35] Summit Big 12
## [37] AAC Mid-American
## [39] Atlantic Coast Big East
## [41] Pac-12 Southeastern
## [43] Atlantic 10 Pac-12
## [45] Big 12 Ivy
## [47] Big East Big Ten
## [49] Southeastern Big Ten
## [51] Big 12 Southland
## [53] AAC Atlantic Coast
## [55] Atlantic Coast Big Ten
## [57] OVC Horizon
## [59] Metro Atlantic Metro Atlantic
## [61] West Coast Conference USA
## [63] Pac-12 Conference USA
## [65] Colonial Mid-American
## [67] West Coast Big East
## [69] Big East Big Ten
## [71] Western Athletic Missouri Valley
## [73] Conference USA Southeastern
## [75] Atlantic 10 Sun Belt
## [77] Southeastern Atlantic Coast
## [79] Atlantic Coast Pac-12
## [81] Atlantic Sun Mountain West
## [83] Southeastern Patriot
## [85] Missouri Valley Mid-American
## [87] Mid-American Big 12
## [89] Atlantic 10 Sun Belt
## [91] Horizon Big East
## [93] Mid-American Southeastern
## [95] Colonial Metro Atlantic
## [97] OVC Atlantic 10
## [99] Mid-Eastern Pac-12
## [101] Big Ten America East
## [103] Conference USA Pac-12
## [105] Metro Atlantic Mid-American
## [107] Big West Mountain West
## [109] Missouri Valley Atlantic Coast
## [111] Big West Southland
## [113] Atlantic Coast Big Ten
## [115] Summit Mountain West
## [117] Mountain West Patriot
## [119] Colonial Southeastern
## [121] Big 12 Ivy
## [123] Big Ten Southeastern
## [125] Sun Belt Southern
## [127] Patriot Big East
## [129] Northeast Ivy
## [131] Horizon Colonial
## [133] West Coast Missouri Valley
## [135] Mountain West Atlantic Coast
## [137] Mountain West Summit
## [139] Summit Southeastern
## [141] OVC OVC
## [143] AAC Western Athletic
## [145] Big Ten Atlantic Sun
## [147] Ivy Southeastern
## [149] Big Sky Big East
## [151] Conference USA Conference USA
## [153] Southern Big East
## [155] Atlantic 10 West Coast
## [157] Atlantic Coast Southland
## [159] West Coast Big West
## [161] Mid-American Southern
## [163] Atlantic Sun Atlantic 10
## [165] America East Big West
## [167] Patriot Big South
## [169] Southeastern Conference USA
## [171] Southwestern Big South
## [173] Horizon Missouri Valley
## [175] AAC Pac-12
## [177] Northeast West Coast
## [179] West Coast Sun Belt
## [181] Horizon Mountain West
## [183] Big Sky Horizon
## [185] America East Northeast
## [187] Sun Belt Atlantic 10
## [189] Northeast Atlantic Sun
## [191] Southland AAC
## [193] Metro Atlantic Mid-American
## [195] Conference USA Big South
## [197] Big Sky Metro Atlantic
## [199] Sun Belt Missouri Valley
## [201] Big South Conference USA
## [203] Western Athletic Mid-Eastern
## [205] Big Sky Atlantic Sun
## [207] Atlantic Coast Big West
## [209] Atlantic Sun West Coast
## [211] Big South Big 12
## [213] Mid-American Missouri Valley
## [215] OVC Conference USA
## [217] Mid-American Northeast
## [219] Summit AAC
## [221] Southern Colonial
## [223] Conference USA Pac-12
## [225] Mid-Eastern Atlantic 10
## [227] Mid-Eastern Big Sky
## [229] Ivy Horizon
## [231] Metro Atlantic AAC
## [233] Colonial America East
## [235] Patriot Patriot
## [237] Horizon Western Athletic
## [239] Southwestern Big South
## [241] Big South Colonial
## [243] Southern Sun Belt
## [245] Southeastern Atlantic Coast
## [247] Big Sky Southland
## [249] Metro Atlantic Mountain West
## [251] Western Athletic Big Sky
## [253] Big West Southwestern
## [255] Summit Patriot
## [257] OVC Patriot
## [259] Western Athletic Southland
## [261] Mid-Eastern Southland
## [263] Big Sky Southwestern
## [265] Missouri Valley Ivy
## [267] Western Athletic Big South
## [269] Northeast Conference USA
## [271] Southwestern Southland
## [273] Mid-Eastern Patriot
## [275] Big Sky Conference USA
## [277] Colonial Mountain West
## [279] Western Athletic Sun Belt
## [281] Colonial Atlantic Sun
## [283] America East Ivy
## [285] Sun Belt Big West
## [287] Big Sky Missouri Valley
## [289] Metro Atlantic Summit
## [291] OVC Metro Atlantic
## [293] Northeast Mid-Eastern
## [295] Mid-American Southwestern
## [297] Northeast Northeast
## [299] Conference USA OVC
## [301] Southern Metro Atlantic
## [303] Southern Southwestern
## [305] OVC Conference USA
## [307] Sun Belt Division I Independents
## [309] Mid-American Atlantic Sun
## [311] Big South Western Athletic
## [313] Southern Southwestern
## [315] Horizon Big South
## [317] OVC Southland
## [319] Big West OVC
## [321] Patriot Summit
## [323] Mid-Eastern Atlantic Sun
## [325] America East Big West
## [327] America East Southland
## [329] Mid-Eastern America East
## [331] Mid-Eastern OVC
## [333] Atlantic Sun Ivy
## [335] Southern Mid-Eastern
## [337] Big South Northeast
## [339] Southern America East
## [341] Southland Southwestern
## [343] Mid-Eastern Southland
## [345] Southern Big Sky
## [347] Mid-Eastern Southwestern
## [349] Big South
## 33 Levels: AAC America East Atlantic 10 Atlantic Coast ... Western Athletic
nlevels(march$conference)
## [1] 33
#9a. Calculate the difference in rank from previous rank
march$diff <- march$rank -march$previous
#9b. Print a count and list of schools that have changed 3 or more places
march_subset <- subset(x=march, subset=abs(march$diff) >=3)
march_subset
## # A tibble: 9 x 10
## rank previous school conference record road neutral home `non-di`
## <int> <int> <chr> <fct> <chr> <chr> <chr> <chr> <chr>
## 1 8 11 Virginia Atlantic C… 28-6 7-4 6- 0 15-2 0-0
## 2 18 21 Michigan… Big Ten 26-8 7-3 7- 1 12-4 0-0
## 3 32 37 Saint Jo… Atlantic 10 24-9 8-4 5- 1 11-4 0-0
## 4 76 70 Georgia … Sun Belt 25-8 10-6 2- 2 11-0 2-0
## 5 90 98 La.-Lafa… Sun Belt 23-11 6-9 4- 0 10-2 3-0
## 6 196 199 Coastal … Big South 21-12 6-7 0- 0 12-5 3-0
## 7 198 201 Rider Metro Atla… 14-17 6-8 2- 2 6-7 0-0
## 8 201 196 UNC Ashe… Big South 17-15 5-10 1- 3 8-2 3-0
## 9 245 242 Mississi… Southeaste… 14-19 0-10 3- 2 11-7 0-0
## # ... with 1 more variable: diff <int>
march$diff <- march$rank - march$previous
march$diff
## [1] -1 1 0 0 0 0 -1 -3 2 0 2 -1 1 0 0 0 0 -3 1 1 1 0 0
## [24] 0 0 -1 1 0 0 0 0 -5 1 1 1 1 1 0 0 0 0 0 0 0 0 0
## [47] 0 0 0 0 0 0 0 0 0 -1 1 0 0 0 0 0 0 0 0 0 -2 -1 2
## [70] -1 -2 -1 -1 -1 -1 6 0 0 0 0 -2 1 1 0 -1 1 0 0 0 -8 1 1
## [93] 1 1 1 1 1 1 0 -1 1 -1 0 0 0 0 0 0 0 0 0 -1 1 0 0
## [116] 0 0 0 -1 1 0 0 -1 1 0 0 0 0 0 0 0 0 -1 1 0 0 0 0
## [139] -1 0 0 0 0 -1 1 0 -1 2 -1 -1 2 0 0 0 0 0 0 -1 1 -1 -1
## [162] 2 0 0 0 0 -2 1 1 0 0 0 0 0 -1 1 0 0 -1 1 -1 1 0 0
## [185] 0 0 1 0 0 0 0 0 0 0 0 -3 0 -3 1 -2 5 2 0 0 -1 -1 2
## [208] 0 0 -1 1 1 0 0 -1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [231] -1 1 0 0 0 0 0 0 0 0 0 -2 -2 1 3 0 0 0 0 0 0 0 0
## [254] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 -1 1 0 0 0 0 -1 1
## [277] 0 0 -1 1 0 -1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [300] 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [323] 0 0 -1 1 0 0 0 -1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## [346] 0 0 0 0
march_subset <- subset(x=march, subset=abs(march$diff) >=3)
march_subset
## # A tibble: 9 x 10
## rank previous school conference record road neutral home `non-di`
## <int> <int> <chr> <fct> <chr> <chr> <chr> <chr> <chr>
## 1 8 11 Virginia Atlantic C… 28-6 7-4 6- 0 15-2 0-0
## 2 18 21 Michigan… Big Ten 26-8 7-3 7- 1 12-4 0-0
## 3 32 37 Saint Jo… Atlantic 10 24-9 8-4 5- 1 11-4 0-0
## 4 76 70 Georgia … Sun Belt 25-8 10-6 2- 2 11-0 2-0
## 5 90 98 La.-Lafa… Sun Belt 23-11 6-9 4- 0 10-2 3-0
## 6 196 199 Coastal … Big South 21-12 6-7 0- 0 12-5 3-0
## 7 198 201 Rider Metro Atla… 14-17 6-8 2- 2 6-7 0-0
## 8 201 196 UNC Ashe… Big South 17-15 5-10 1- 3 8-2 3-0
## 9 245 242 Mississi… Southeaste… 14-19 0-10 3- 2 11-7 0-0
## # ... with 1 more variable: diff <int>