#1set the working directory for this assignment
setwd("~/NYU/classes/2. R/Assignments/Lesson 3")
#2 importing the march madness data
library(readr)
march_madness <- read_csv("march_madness.csv")
## Rows: 349 Columns: 9
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr (7): SCHOOL, CONFERENCE, RECORD, ROAD, NEUTRAL, HOME, NON-DI
## dbl (2): RANK, PREVIOUS
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.
#3 to view the file
View(march_madness)
#4 display number of rows and columns
dim(march_madness)
## [1] 349   9
#5 print column names
names(march_madness)
## [1] "RANK"       "PREVIOUS"   "SCHOOL"     "CONFERENCE" "RECORD"    
## [6] "ROAD"       "NEUTRAL"    "HOME"       "NON-DI"
#6 change column names to lower case
names(march_madness)<-tolower(names(march_madness))
names(march_madness)
## [1] "rank"       "previous"   "school"     "conference" "record"    
## [6] "road"       "neutral"    "home"       "non-di"
#7 explore the variable types
str(march_madness)
## spec_tbl_df [349 x 9] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ rank      : num [1:349] 1 2 3 4 5 6 7 8 9 10 ...
##  $ previous  : num [1:349] 2 1 3 4 5 6 8 11 7 10 ...
##  $ school    : chr [1:349] "Florida" "Arizona" "Kansas" "Wichita St." ...
##  $ conference: chr [1:349] "Southeastern" "Pac-12" "Big 12" "Missouri Valley" ...
##  $ record    : chr [1:349] "32-2" "30-4" "24-9" "34-0" ...
##  $ road      : chr [1:349] "2-Oct" "3-Aug" "6-May" "Dec-00" ...
##  $ neutral   : chr [1:349] "May-00" "1-Apr" "2-Apr" "May-00" ...
##  $ home      : chr [1:349] "17-0" "18-0" "15-1" "16-0" ...
##  $ non-di    : chr [1:349] "0-0" "0-0" "0-0" "Jan-00" ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   RANK = col_double(),
##   ..   PREVIOUS = col_double(),
##   ..   SCHOOL = col_character(),
##   ..   CONFERENCE = col_character(),
##   ..   RECORD = col_character(),
##   ..   ROAD = col_character(),
##   ..   NEUTRAL = col_character(),
##   ..   HOME = col_character(),
##   ..   `NON-DI` = col_character()
##   .. )
##  - attr(*, "problems")=<externalptr>
#8query how many differenent conferences 
march_madness$conference=as.factor(march_madness$conference)
march_madness$conference
##   [1] Southeastern            Pac-12                  Big 12                 
##   [4] Missouri Valley         Big East                Big Ten                
##   [7] Big 12                  Atlantic Coast          Atlantic Coast         
##  [10] Big East                Big Ten                 Mountain West          
##  [13] Atlantic 10             Pac-12                  Mountain West          
##  [16] Atlantic Coast          Southeastern            Big Ten                
##  [19] AAC                     West Coast              AAC                    
##  [22] AAC                     Atlantic 10             Big Ten                
##  [25] Atlantic Coast          Big 12                  Atlantic 10            
##  [28] Pac-12                  Atlantic 10             Big 12                 
##  [31] West Coast              Atlantic 10             Conference USA         
##  [34] Pac-12                  Summit                  Big 12                 
##  [37] AAC                     Mid-American            Atlantic Coast         
##  [40] Big East                Pac-12                  Southeastern           
##  [43] Atlantic 10             Pac-12                  Big 12                 
##  [46] Ivy                     Big East                Big Ten                
##  [49] Southeastern            Big Ten                 Big 12                 
##  [52] Southland               AAC                     Atlantic Coast         
##  [55] Atlantic Coast          Big Ten                 OVC                    
##  [58] Horizon                 Metro Atlantic          Metro Atlantic         
##  [61] West Coast              Conference USA          Pac-12                 
##  [64] Conference USA          Colonial                Mid-American           
##  [67] West Coast              Big East                Big East               
##  [70] Big Ten                 Western Athletic        Missouri Valley        
##  [73] Conference USA          Southeastern            Atlantic 10            
##  [76] Sun Belt                Southeastern            Atlantic Coast         
##  [79] Atlantic Coast          Pac-12                  Atlantic Sun           
##  [82] Mountain West           Southeastern            Patriot                
##  [85] Missouri Valley         Mid-American            Mid-American           
##  [88] Big 12                  Atlantic 10             Sun Belt               
##  [91] Horizon                 Big East                Mid-American           
##  [94] Southeastern            Colonial                Metro Atlantic         
##  [97] OVC                     Atlantic 10             Mid-Eastern            
## [100] Pac-12                  Big Ten                 America East           
## [103] Conference USA          Pac-12                  Metro Atlantic         
## [106] Mid-American            Big West                Mountain West          
## [109] Missouri Valley         Atlantic Coast          Big West               
## [112] Southland               Atlantic Coast          Big Ten                
## [115] Summit                  Mountain West           Mountain West          
## [118] Patriot                 Colonial                Southeastern           
## [121] Big 12                  Ivy                     Big Ten                
## [124] Southeastern            Sun Belt                Southern               
## [127] Patriot                 Big East                Northeast              
## [130] Ivy                     Horizon                 Colonial               
## [133] West Coast              Missouri Valley         Mountain West          
## [136] Atlantic Coast          Mountain West           Summit                 
## [139] Summit                  Southeastern            OVC                    
## [142] OVC                     AAC                     Western Athletic       
## [145] Big Ten                 Atlantic Sun            Ivy                    
## [148] Southeastern            Big Sky                 Big East               
## [151] Conference USA          Conference USA          Southern               
## [154] Big East                Atlantic 10             West Coast             
## [157] Atlantic Coast          Southland               West Coast             
## [160] Big West                Mid-American            Southern               
## [163] Atlantic Sun            Atlantic 10             America East           
## [166] Big West                Patriot                 Big South              
## [169] Southeastern            Conference USA          Southwestern           
## [172] Big South               Horizon                 Missouri Valley        
## [175] AAC                     Pac-12                  Northeast              
## [178] West Coast              West Coast              Sun Belt               
## [181] Horizon                 Mountain West           Big Sky                
## [184] Horizon                 America East            Northeast              
## [187] Sun Belt                Atlantic 10             Northeast              
## [190] Atlantic Sun            Southland               AAC                    
## [193] Metro Atlantic          Mid-American            Conference USA         
## [196] Big South               Big Sky                 Metro Atlantic         
## [199] Sun Belt                Missouri Valley         Big South              
## [202] Conference USA          Western Athletic        Mid-Eastern            
## [205] Big Sky                 Atlantic Sun            Atlantic Coast         
## [208] Big West                Atlantic Sun            West Coast             
## [211] Big South               Big 12                  Mid-American           
## [214] Missouri Valley         OVC                     Conference USA         
## [217] Mid-American            Northeast               Summit                 
## [220] AAC                     Southern                Colonial               
## [223] Conference USA          Pac-12                  Mid-Eastern            
## [226] Atlantic 10             Mid-Eastern             Big Sky                
## [229] Ivy                     Horizon                 Metro Atlantic         
## [232] AAC                     Colonial                America East           
## [235] Patriot                 Patriot                 Horizon                
## [238] Western Athletic        Southwestern            Big South              
## [241] Big South               Colonial                Southern               
## [244] Sun Belt                Southeastern            Atlantic Coast         
## [247] Big Sky                 Southland               Metro Atlantic         
## [250] Mountain West           Western Athletic        Big Sky                
## [253] Big West                Southwestern            Summit                 
## [256] Patriot                 OVC                     Patriot                
## [259] Western Athletic        Southland               Mid-Eastern            
## [262] Southland               Big Sky                 Southwestern           
## [265] Missouri Valley         Ivy                     Western Athletic       
## [268] Big South               Northeast               Conference USA         
## [271] Southwestern            Southland               Mid-Eastern            
## [274] Patriot                 Big Sky                 Conference USA         
## [277] Colonial                Mountain West           Western Athletic       
## [280] Sun Belt                Colonial                Atlantic Sun           
## [283] America East            Ivy                     Sun Belt               
## [286] Big West                Big Sky                 Missouri Valley        
## [289] Metro Atlantic          Summit                  OVC                    
## [292] Metro Atlantic          Northeast               Mid-Eastern            
## [295] Mid-American            Southwestern            Northeast              
## [298] Northeast               Conference USA          OVC                    
## [301] Southern                Metro Atlantic          Southern               
## [304] Southwestern            OVC                     Conference USA         
## [307] Sun Belt                Division I Independents Mid-American           
## [310] Atlantic Sun            Big South               Western Athletic       
## [313] Southern                Southwestern            Horizon                
## [316] Big South               OVC                     Southland              
## [319] Big West                OVC                     Patriot                
## [322] Summit                  Mid-Eastern             Atlantic Sun           
## [325] America East            Big West                America East           
## [328] Southland               Mid-Eastern             America East           
## [331] Mid-Eastern             OVC                     Atlantic Sun           
## [334] Ivy                     Southern                Mid-Eastern            
## [337] Big South               Northeast               Southern               
## [340] America East            Southland               Southwestern           
## [343] Mid-Eastern             Southland               Southern               
## [346] Big Sky                 Mid-Eastern             Southwestern           
## [349] Big South              
## 33 Levels: AAC America East Atlantic 10 Atlantic Coast Atlantic Sun ... Western Athletic
dim(march_madness)
## [1] 349   9
#9a compute a new vector called "diff
march_madness$diff=march_madness$rank - march_madness$previous
#9b how many teams changed 3 or more places 
mm_subset=subset(x=march_madness, subset=abs(diff)>=3)
mm_subset
## # A tibble: 9 x 10
##    rank previous school    conference  record road  neutral home  `non-di`  diff
##   <dbl>    <dbl> <chr>     <fct>       <chr>  <chr> <chr>   <chr> <chr>    <dbl>
## 1     8       11 Virginia  Atlantic C~ 28-6   4-Jul Jun-00  15-2  0-0         -3
## 2    18       21 Michigan~ Big Ten     26-8   3-Jul 1-Jul   4-Dec 0-0         -3
## 3    32       37 Saint Jo~ Atlantic 10 24-9   4-Aug 1-May   4-Nov 0-0         -5
## 4    76       70 Georgia ~ Sun Belt    25-8   6-Oct 2-Feb   Nov-~ Feb-00       6
## 5    90       98 La.-Lafa~ Sun Belt    23-11  9-Jun Apr-00  2-Oct Mar-00      -8
## 6   196      199 Coastal ~ Big South   21-12  7-Jun 0- 0    5-Dec Mar-00      -3
## 7   198      201 Rider     Metro Atla~ 14-17  8-Jun 2-Feb   7-Jun 0-0         -3
## 8   201      196 UNC Ashe~ Big South   17-15  10-M~ 3-Jan   2-Aug Mar-00       5
## 9   245      242 Mississi~ Southeaste~ 14-19  0-10  2-Mar   7-Nov 0-0          3
dim(mm_subset)
## [1]  9 10