#Kristen Sosulski
#April 6, 2018
#1.Set working directory
setwd("~/Dropbox/R_Fundamentals")
#2. Importing our data
library(readr)
march <- read_csv("march_madness.csv")
## Parsed with column specification:
## cols(
## RANK = col_integer(),
## PREVIOUS = col_integer(),
## SCHOOL = col_character(),
## CONFERENCE = col_character(),
## RECORD = col_character(),
## ROAD = col_character(),
## NEUTRAL = col_character(),
## HOME = col_character(),
## `NON-DI` = col_character()
## )
#3.Viewing our data
march
## # A tibble: 349 x 9
## RANK PREVIOUS SCHOOL CONFERENCE RECORD ROAD NEUTRAL HOME `NON-DI`
## <int> <int> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 2 Florida Southeaste… 32-2 10-2 5- 0 17-0 0-0
## 2 2 1 Arizona Pac-12 30-4 8-3 4- 1 18-0 0-0
## 3 3 3 Kansas Big 12 24-9 5-6 4- 2 15-1 0-0
## 4 4 4 Wichita… Missouri V… 34-0 12-0 5- 0 16-0 1-0
## 5 5 5 Villano… Big East 28-4 10-2 3- 1 15-1 0-0
## 6 6 6 Wiscons… Big Ten 26-7 8-3 4- 1 14-3 0-0
## 7 7 8 Iowa St. Big 12 26-7 4-6 7- 0 15-1 0-0
## 8 8 11 Virginia Atlantic C… 28-6 7-4 6- 0 15-2 0-0
## 9 9 7 Duke Atlantic C… 26-8 4-5 5- 3 17-0 0-0
## 10 10 10 Creight… Big East 26-7 7-4 3- 3 16-0 0-0
## # ... with 339 more rows
#4.Print the number of rows and columns
dim(march)
## [1] 349 9
#5.Print the column names
names(march)
## [1] "RANK" "PREVIOUS" "SCHOOL" "CONFERENCE" "RECORD"
## [6] "ROAD" "NEUTRAL" "HOME" "NON-DI"
#6.Change my column names to lowercase
names(march) <- tolower(names(march))
names(march)
## [1] "rank" "previous" "school" "conference" "record"
## [6] "road" "neutral" "home" "non-di"
#7.Explore variables types
str(march)
## Classes 'tbl_df', 'tbl' and 'data.frame': 349 obs. of 9 variables:
## $ rank : int 1 2 3 4 5 6 7 8 9 10 ...
## $ previous : int 2 1 3 4 5 6 8 11 7 10 ...
## $ school : chr "Florida" "Arizona" "Kansas" "Wichita St." ...
## $ conference: chr "Southeastern" "Pac-12" "Big 12" "Missouri Valley" ...
## $ record : chr "32-2" "30-4" "24-9" "34-0" ...
## $ road : chr "10-2" "8-3" "5-6" "12-0" ...
## $ neutral : chr "5- 0" "4- 1" "4- 2" "5- 0" ...
## $ home : chr "17-0" "18-0" "15-1" "16-0" ...
## $ non-di : chr "0-0" "0-0" "0-0" "1-0" ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 9
## .. ..$ RANK : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ PREVIOUS : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ SCHOOL : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ CONFERENCE: list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ RECORD : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ ROAD : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ NEUTRAL : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ HOME : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ NON-DI : list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
#8.How many different conferences are there?
### approach 1 casting as factor
march$conference <-as.factor(march$conference)
march$conference
## [1] Southeastern Pac-12
## [3] Big 12 Missouri Valley
## [5] Big East Big Ten
## [7] Big 12 Atlantic Coast
## [9] Atlantic Coast Big East
## [11] Big Ten Mountain West
## [13] Atlantic 10 Pac-12
## [15] Mountain West Atlantic Coast
## [17] Southeastern Big Ten
## [19] AAC West Coast
## [21] AAC AAC
## [23] Atlantic 10 Big Ten
## [25] Atlantic Coast Big 12
## [27] Atlantic 10 Pac-12
## [29] Atlantic 10 Big 12
## [31] West Coast Atlantic 10
## [33] Conference USA Pac-12
## [35] Summit Big 12
## [37] AAC Mid-American
## [39] Atlantic Coast Big East
## [41] Pac-12 Southeastern
## [43] Atlantic 10 Pac-12
## [45] Big 12 Ivy
## [47] Big East Big Ten
## [49] Southeastern Big Ten
## [51] Big 12 Southland
## [53] AAC Atlantic Coast
## [55] Atlantic Coast Big Ten
## [57] OVC Horizon
## [59] Metro Atlantic Metro Atlantic
## [61] West Coast Conference USA
## [63] Pac-12 Conference USA
## [65] Colonial Mid-American
## [67] West Coast Big East
## [69] Big East Big Ten
## [71] Western Athletic Missouri Valley
## [73] Conference USA Southeastern
## [75] Atlantic 10 Sun Belt
## [77] Southeastern Atlantic Coast
## [79] Atlantic Coast Pac-12
## [81] Atlantic Sun Mountain West
## [83] Southeastern Patriot
## [85] Missouri Valley Mid-American
## [87] Mid-American Big 12
## [89] Atlantic 10 Sun Belt
## [91] Horizon Big East
## [93] Mid-American Southeastern
## [95] Colonial Metro Atlantic
## [97] OVC Atlantic 10
## [99] Mid-Eastern Pac-12
## [101] Big Ten America East
## [103] Conference USA Pac-12
## [105] Metro Atlantic Mid-American
## [107] Big West Mountain West
## [109] Missouri Valley Atlantic Coast
## [111] Big West Southland
## [113] Atlantic Coast Big Ten
## [115] Summit Mountain West
## [117] Mountain West Patriot
## [119] Colonial Southeastern
## [121] Big 12 Ivy
## [123] Big Ten Southeastern
## [125] Sun Belt Southern
## [127] Patriot Big East
## [129] Northeast Ivy
## [131] Horizon Colonial
## [133] West Coast Missouri Valley
## [135] Mountain West Atlantic Coast
## [137] Mountain West Summit
## [139] Summit Southeastern
## [141] OVC OVC
## [143] AAC Western Athletic
## [145] Big Ten Atlantic Sun
## [147] Ivy Southeastern
## [149] Big Sky Big East
## [151] Conference USA Conference USA
## [153] Southern Big East
## [155] Atlantic 10 West Coast
## [157] Atlantic Coast Southland
## [159] West Coast Big West
## [161] Mid-American Southern
## [163] Atlantic Sun Atlantic 10
## [165] America East Big West
## [167] Patriot Big South
## [169] Southeastern Conference USA
## [171] Southwestern Big South
## [173] Horizon Missouri Valley
## [175] AAC Pac-12
## [177] Northeast West Coast
## [179] West Coast Sun Belt
## [181] Horizon Mountain West
## [183] Big Sky Horizon
## [185] America East Northeast
## [187] Sun Belt Atlantic 10
## [189] Northeast Atlantic Sun
## [191] Southland AAC
## [193] Metro Atlantic Mid-American
## [195] Conference USA Big South
## [197] Big Sky Metro Atlantic
## [199] Sun Belt Missouri Valley
## [201] Big South Conference USA
## [203] Western Athletic Mid-Eastern
## [205] Big Sky Atlantic Sun
## [207] Atlantic Coast Big West
## [209] Atlantic Sun West Coast
## [211] Big South Big 12
## [213] Mid-American Missouri Valley
## [215] OVC Conference USA
## [217] Mid-American Northeast
## [219] Summit AAC
## [221] Southern Colonial
## [223] Conference USA Pac-12
## [225] Mid-Eastern Atlantic 10
## [227] Mid-Eastern Big Sky
## [229] Ivy Horizon
## [231] Metro Atlantic AAC
## [233] Colonial America East
## [235] Patriot Patriot
## [237] Horizon Western Athletic
## [239] Southwestern Big South
## [241] Big South Colonial
## [243] Southern Sun Belt
## [245] Southeastern Atlantic Coast
## [247] Big Sky Southland
## [249] Metro Atlantic Mountain West
## [251] Western Athletic Big Sky
## [253] Big West Southwestern
## [255] Summit Patriot
## [257] OVC Patriot
## [259] Western Athletic Southland
## [261] Mid-Eastern Southland
## [263] Big Sky Southwestern
## [265] Missouri Valley Ivy
## [267] Western Athletic Big South
## [269] Northeast Conference USA
## [271] Southwestern Southland
## [273] Mid-Eastern Patriot
## [275] Big Sky Conference USA
## [277] Colonial Mountain West
## [279] Western Athletic Sun Belt
## [281] Colonial Atlantic Sun
## [283] America East Ivy
## [285] Sun Belt Big West
## [287] Big Sky Missouri Valley
## [289] Metro Atlantic Summit
## [291] OVC Metro Atlantic
## [293] Northeast Mid-Eastern
## [295] Mid-American Southwestern
## [297] Northeast Northeast
## [299] Conference USA OVC
## [301] Southern Metro Atlantic
## [303] Southern Southwestern
## [305] OVC Conference USA
## [307] Sun Belt Division I Independents
## [309] Mid-American Atlantic Sun
## [311] Big South Western Athletic
## [313] Southern Southwestern
## [315] Horizon Big South
## [317] OVC Southland
## [319] Big West OVC
## [321] Patriot Summit
## [323] Mid-Eastern Atlantic Sun
## [325] America East Big West
## [327] America East Southland
## [329] Mid-Eastern America East
## [331] Mid-Eastern OVC
## [333] Atlantic Sun Ivy
## [335] Southern Mid-Eastern
## [337] Big South Northeast
## [339] Southern America East
## [341] Southland Southwestern
## [343] Mid-Eastern Southland
## [345] Southern Big Sky
## [347] Mid-Eastern Southwestern
## [349] Big South
## 33 Levels: AAC America East Atlantic 10 Atlantic Coast ... Western Athletic
length(levels(march$conference))
## [1] 33
nlevels(march$conference)
## [1] 33
### approach 2
unique(march$conference)
## [1] Southeastern Pac-12
## [3] Big 12 Missouri Valley
## [5] Big East Big Ten
## [7] Atlantic Coast Mountain West
## [9] Atlantic 10 AAC
## [11] West Coast Conference USA
## [13] Summit Mid-American
## [15] Ivy Southland
## [17] OVC Horizon
## [19] Metro Atlantic Colonial
## [21] Western Athletic Sun Belt
## [23] Atlantic Sun Patriot
## [25] Mid-Eastern America East
## [27] Big West Southern
## [29] Northeast Big Sky
## [31] Big South Southwestern
## [33] Division I Independents
## 33 Levels: AAC America East Atlantic 10 Atlantic Coast ... Western Athletic
length(unique(march$conference))
## [1] 33
#9a. What is the difference in rank from previous
march$diff <-march$rank - march$previous
march
## # A tibble: 349 x 10
## rank previous school conference record road neutral home `non-di`
## <int> <int> <chr> <fct> <chr> <chr> <chr> <chr> <chr>
## 1 1 2 Florida Southeaste… 32-2 10-2 5- 0 17-0 0-0
## 2 2 1 Arizona Pac-12 30-4 8-3 4- 1 18-0 0-0
## 3 3 3 Kansas Big 12 24-9 5-6 4- 2 15-1 0-0
## 4 4 4 Wichita… Missouri V… 34-0 12-0 5- 0 16-0 1-0
## 5 5 5 Villano… Big East 28-4 10-2 3- 1 15-1 0-0
## 6 6 6 Wiscons… Big Ten 26-7 8-3 4- 1 14-3 0-0
## 7 7 8 Iowa St. Big 12 26-7 4-6 7- 0 15-1 0-0
## 8 8 11 Virginia Atlantic C… 28-6 7-4 6- 0 15-2 0-0
## 9 9 7 Duke Atlantic C… 26-8 4-5 5- 3 17-0 0-0
## 10 10 10 Creight… Big East 26-7 7-4 3- 3 16-0 0-0
## # ... with 339 more rows, and 1 more variable: diff <int>
#9b. Print a list of schools that have changed 3 or more places
march_subset <- subset (x=march, subset= abs(diff)>=3)