#1set the working directory for this assignment
setwd("~/NYU/classes/2. R/Assignments/Lesson 3")
#2 importing the march madness data
library(readr)
march_madness <- read_csv("march_madness.csv")
## Rows: 349 Columns: 9
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (7): SCHOOL, CONFERENCE, RECORD, ROAD, NEUTRAL, HOME, NON-DI
## dbl (2): RANK, PREVIOUS
##
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
#3 to view the file
View(march_madness)
#4 display number of rows and columns
dim(march_madness)
## [1] 349 9
#5 print column names
names(march_madness)
## [1] "RANK" "PREVIOUS" "SCHOOL" "CONFERENCE" "RECORD"
## [6] "ROAD" "NEUTRAL" "HOME" "NON-DI"
#6 change column names to lower case
names(march_madness)<-tolower(names(march_madness))
names(march_madness)
## [1] "rank" "previous" "school" "conference" "record"
## [6] "road" "neutral" "home" "non-di"
#7 explore the variable types
str(march_madness)
## spec_tbl_df [349 x 9] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ rank : num [1:349] 1 2 3 4 5 6 7 8 9 10 ...
## $ previous : num [1:349] 2 1 3 4 5 6 8 11 7 10 ...
## $ school : chr [1:349] "Florida" "Arizona" "Kansas" "Wichita St." ...
## $ conference: chr [1:349] "Southeastern" "Pac-12" "Big 12" "Missouri Valley" ...
## $ record : chr [1:349] "32-2" "30-4" "24-9" "34-0" ...
## $ road : chr [1:349] "2-Oct" "3-Aug" "6-May" "Dec-00" ...
## $ neutral : chr [1:349] "May-00" "1-Apr" "2-Apr" "May-00" ...
## $ home : chr [1:349] "17-0" "18-0" "15-1" "16-0" ...
## $ non-di : chr [1:349] "0-0" "0-0" "0-0" "Jan-00" ...
## - attr(*, "spec")=
## .. cols(
## .. RANK = col_double(),
## .. PREVIOUS = col_double(),
## .. SCHOOL = col_character(),
## .. CONFERENCE = col_character(),
## .. RECORD = col_character(),
## .. ROAD = col_character(),
## .. NEUTRAL = col_character(),
## .. HOME = col_character(),
## .. `NON-DI` = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
#8query how many differenent conferences
march_madness$conference=as.factor(march_madness$conference)
march_madness$conference
## [1] Southeastern Pac-12 Big 12
## [4] Missouri Valley Big East Big Ten
## [7] Big 12 Atlantic Coast Atlantic Coast
## [10] Big East Big Ten Mountain West
## [13] Atlantic 10 Pac-12 Mountain West
## [16] Atlantic Coast Southeastern Big Ten
## [19] AAC West Coast AAC
## [22] AAC Atlantic 10 Big Ten
## [25] Atlantic Coast Big 12 Atlantic 10
## [28] Pac-12 Atlantic 10 Big 12
## [31] West Coast Atlantic 10 Conference USA
## [34] Pac-12 Summit Big 12
## [37] AAC Mid-American Atlantic Coast
## [40] Big East Pac-12 Southeastern
## [43] Atlantic 10 Pac-12 Big 12
## [46] Ivy Big East Big Ten
## [49] Southeastern Big Ten Big 12
## [52] Southland AAC Atlantic Coast
## [55] Atlantic Coast Big Ten OVC
## [58] Horizon Metro Atlantic Metro Atlantic
## [61] West Coast Conference USA Pac-12
## [64] Conference USA Colonial Mid-American
## [67] West Coast Big East Big East
## [70] Big Ten Western Athletic Missouri Valley
## [73] Conference USA Southeastern Atlantic 10
## [76] Sun Belt Southeastern Atlantic Coast
## [79] Atlantic Coast Pac-12 Atlantic Sun
## [82] Mountain West Southeastern Patriot
## [85] Missouri Valley Mid-American Mid-American
## [88] Big 12 Atlantic 10 Sun Belt
## [91] Horizon Big East Mid-American
## [94] Southeastern Colonial Metro Atlantic
## [97] OVC Atlantic 10 Mid-Eastern
## [100] Pac-12 Big Ten America East
## [103] Conference USA Pac-12 Metro Atlantic
## [106] Mid-American Big West Mountain West
## [109] Missouri Valley Atlantic Coast Big West
## [112] Southland Atlantic Coast Big Ten
## [115] Summit Mountain West Mountain West
## [118] Patriot Colonial Southeastern
## [121] Big 12 Ivy Big Ten
## [124] Southeastern Sun Belt Southern
## [127] Patriot Big East Northeast
## [130] Ivy Horizon Colonial
## [133] West Coast Missouri Valley Mountain West
## [136] Atlantic Coast Mountain West Summit
## [139] Summit Southeastern OVC
## [142] OVC AAC Western Athletic
## [145] Big Ten Atlantic Sun Ivy
## [148] Southeastern Big Sky Big East
## [151] Conference USA Conference USA Southern
## [154] Big East Atlantic 10 West Coast
## [157] Atlantic Coast Southland West Coast
## [160] Big West Mid-American Southern
## [163] Atlantic Sun Atlantic 10 America East
## [166] Big West Patriot Big South
## [169] Southeastern Conference USA Southwestern
## [172] Big South Horizon Missouri Valley
## [175] AAC Pac-12 Northeast
## [178] West Coast West Coast Sun Belt
## [181] Horizon Mountain West Big Sky
## [184] Horizon America East Northeast
## [187] Sun Belt Atlantic 10 Northeast
## [190] Atlantic Sun Southland AAC
## [193] Metro Atlantic Mid-American Conference USA
## [196] Big South Big Sky Metro Atlantic
## [199] Sun Belt Missouri Valley Big South
## [202] Conference USA Western Athletic Mid-Eastern
## [205] Big Sky Atlantic Sun Atlantic Coast
## [208] Big West Atlantic Sun West Coast
## [211] Big South Big 12 Mid-American
## [214] Missouri Valley OVC Conference USA
## [217] Mid-American Northeast Summit
## [220] AAC Southern Colonial
## [223] Conference USA Pac-12 Mid-Eastern
## [226] Atlantic 10 Mid-Eastern Big Sky
## [229] Ivy Horizon Metro Atlantic
## [232] AAC Colonial America East
## [235] Patriot Patriot Horizon
## [238] Western Athletic Southwestern Big South
## [241] Big South Colonial Southern
## [244] Sun Belt Southeastern Atlantic Coast
## [247] Big Sky Southland Metro Atlantic
## [250] Mountain West Western Athletic Big Sky
## [253] Big West Southwestern Summit
## [256] Patriot OVC Patriot
## [259] Western Athletic Southland Mid-Eastern
## [262] Southland Big Sky Southwestern
## [265] Missouri Valley Ivy Western Athletic
## [268] Big South Northeast Conference USA
## [271] Southwestern Southland Mid-Eastern
## [274] Patriot Big Sky Conference USA
## [277] Colonial Mountain West Western Athletic
## [280] Sun Belt Colonial Atlantic Sun
## [283] America East Ivy Sun Belt
## [286] Big West Big Sky Missouri Valley
## [289] Metro Atlantic Summit OVC
## [292] Metro Atlantic Northeast Mid-Eastern
## [295] Mid-American Southwestern Northeast
## [298] Northeast Conference USA OVC
## [301] Southern Metro Atlantic Southern
## [304] Southwestern OVC Conference USA
## [307] Sun Belt Division I Independents Mid-American
## [310] Atlantic Sun Big South Western Athletic
## [313] Southern Southwestern Horizon
## [316] Big South OVC Southland
## [319] Big West OVC Patriot
## [322] Summit Mid-Eastern Atlantic Sun
## [325] America East Big West America East
## [328] Southland Mid-Eastern America East
## [331] Mid-Eastern OVC Atlantic Sun
## [334] Ivy Southern Mid-Eastern
## [337] Big South Northeast Southern
## [340] America East Southland Southwestern
## [343] Mid-Eastern Southland Southern
## [346] Big Sky Mid-Eastern Southwestern
## [349] Big South
## 33 Levels: AAC America East Atlantic 10 Atlantic Coast Atlantic Sun ... Western Athletic
dim(march_madness)
## [1] 349 9
#9a compute a new vector called "diff
march_madness$diff=march_madness$rank - march_madness$previous
#9b how many teams changed 3 or more places
mm_subset=subset(x=march_madness, subset=abs(diff)>=3)
mm_subset
## # A tibble: 9 x 10
## rank previous school conference record road neutral home `non-di` diff
## <dbl> <dbl> <chr> <fct> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 8 11 Virginia Atlantic C~ 28-6 4-Jul Jun-00 15-2 0-0 -3
## 2 18 21 Michigan~ Big Ten 26-8 3-Jul 1-Jul 4-Dec 0-0 -3
## 3 32 37 Saint Jo~ Atlantic 10 24-9 4-Aug 1-May 4-Nov 0-0 -5
## 4 76 70 Georgia ~ Sun Belt 25-8 6-Oct 2-Feb Nov-~ Feb-00 6
## 5 90 98 La.-Lafa~ Sun Belt 23-11 9-Jun Apr-00 2-Oct Mar-00 -8
## 6 196 199 Coastal ~ Big South 21-12 7-Jun 0- 0 5-Dec Mar-00 -3
## 7 198 201 Rider Metro Atla~ 14-17 8-Jun 2-Feb 7-Jun 0-0 -3
## 8 201 196 UNC Ashe~ Big South 17-15 10-M~ 3-Jan 2-Aug Mar-00 5
## 9 245 242 Mississi~ Southeaste~ 14-19 0-10 2-Mar 7-Nov 0-0 3
dim(mm_subset)
## [1] 9 10