library(readr)
library(tidyverse)
library(naniar)
draft_66_pass <- read_csv("draft_66_pass.csv")
## Parsed with column specification:
## cols(
## .default = col_double(),
## draft_team = col_character(),
## name = col_character(),
## college = col_character(),
## nba_season = col_character(),
## nba_tm = col_character(),
## nba_lg = col_character(),
## nba_pos = col_character(),
## nba_g = col_character(),
## nba_gs = col_logical(),
## nba_mip = col_logical(),
## nba_6th_man = col_logical(),
## nba_finals_mvp = col_logical(),
## nba_mvp = col_logical(),
## nba_all_rookie_2nd = col_logical(),
## nba_all_3rd = col_logical(),
## nba_def_mvp = col_logical(),
## nba_def_1st = col_logical(),
## nba_def_2nd = col_logical()
## )
## See spec(...) for full column specifications.
draft_66_fail <- read_csv("draft_66_fail.csv")
## Parsed with column specification:
## cols(
## draft_year = col_double(),
## pick_number = col_double(),
## draft_team = col_character(),
## name = col_character(),
## college = col_character()
## )
draft_66_pass <-select(draft_66_pass, c("nba_g","pick_number","name","draft_team","college",
"nba_pts","nba_trb","nba_ast","nba_blk","nba_stl"))
draft_66_pass %>% arrange(pick_number) %>% vis_miss()
gg_miss_var(draft_66_pass, facet = draft_team)
draft_66_pass <- draft_66_pass %>% fill(name,college)
miss_scan_count(data = draft_66_pass, search = list("N/A","missing","na","NA","n/a"))
## # A tibble: 10 x 2
## Variable n
## <chr> <int>
## 1 nba_g 7
## 2 pick_number 0
## 3 name 0
## 4 draft_team 0
## 5 college 11
## 6 nba_pts 0
## 7 nba_trb 0
## 8 nba_ast 0
## 9 nba_blk 0
## 10 nba_stl 0
draft_66_pass <- replace_with_na(draft_66_pass, replace = list(nba_g = c("N/A","missing","na","NA","n/a")))
miss_scan_count(data = draft_66_pass, search = list("N/A","missing","na","NA","n/a"))
## # A tibble: 10 x 2
## Variable n
## <chr> <int>
## 1 nba_g 0
## 2 pick_number 0
## 3 name 0
## 4 draft_team 0
## 5 college 11
## 6 nba_pts 0
## 7 nba_trb 0
## 8 nba_ast 0
## 9 nba_blk 0
## 10 nba_stl 0
draft_66_pass$nba_g<-as.numeric(draft_66_pass$nba_g) # need to convert this variable to numeric first
draft_66_pass[is.na(draft_66_pass)] <- 0
draft_66_pass <- draft_66_pass %>%
group_by(pick_number,name,draft_team,college) %>%
summarize(nba_g=sum(nba_g),nba_pts=sum(nba_pts),nba_trb=sum(nba_trb),
nba_ast=sum(nba_ast),nba_blk=sum(nba_blk),nba_stl=sum(nba_stl))
## `summarise()` regrouping output by 'pick_number', 'name', 'draft_team' (override with `.groups` argument)
#Now Draft_66_Pass has summary variables
head(draft_66_pass)
## # A tibble: 6 x 10
## # Groups: pick_number, name, draft_team [6]
## pick_number name draft_team college nba_g nba_pts nba_trb nba_ast nba_blk
## <dbl> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 Cazz~ NYK Univer~ 817 12377 3068 1838 33
## 2 2 Dave~ DET Syracu~ 901 18327 3420 5397 89
## 3 3 Clyd~ SFW Vander~ 822 6204 8383 893 84
## 4 4 Lou ~ STL Univer~ 890 17940 3926 2432 98
## 5 5 Jack~ BAL Duke U~ 1002 13922 4885 2121 96
## 6 6 Walt~ CIN Univer~ 692 5282 3457 428 30
## # ... with 1 more variable: nba_stl <dbl>
draft_66_test <- draft_66_pass %>%
inner_join(draft_66_fail, by = c("name" = "name"), suffix = c("_pass", "_fail")) ## no matches
str(draft_66_test)
## tibble [0 x 14] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ pick_number_pass: num(0)
## $ name : chr(0)
## $ draft_team_pass : chr(0)
## $ college_pass : chr(0)
## $ nba_g : num(0)
## $ nba_pts : num(0)
## $ nba_trb : num(0)
## $ nba_ast : num(0)
## $ nba_blk : num(0)
## $ nba_stl : num(0)
## $ draft_year : num(0)
## $ pick_number_fail: num(0)
## $ draft_team_fail : chr(0)
## $ college_fail : chr(0)
## - attr(*, "groups")= tibble [0 x 2] (S3: tbl_df/tbl/data.frame)
## ..$ name : chr(0)
## ..$ .rows: list<int> [1:0]
## list()
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
draft_66_test <- draft_66_pass %>%
left_join(draft_66_fail, by = c("name" = "name"), suffix = c("_pass", "_fail")) ## 25 observations (pass only)
str(draft_66_pass)
## tibble [25 x 10] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ pick_number: num [1:25] 1 2 3 4 5 6 7 8 9 10 ...
## $ name : chr [1:25] "Cazzie Russell" "Dave Bing" "Clyde Lee" "Lou Hudson" ...
## $ draft_team : chr [1:25] "NYK" "DET" "SFW" "STL" ...
## $ college : chr [1:25] "University of Michigan" "Syracuse University" "Vanderbilt University" "University of Minnesota" ...
## $ nba_g : num [1:25] 817 901 822 890 1002 ...
## $ nba_pts : num [1:25] 12377 18327 6204 17940 13922 ...
## $ nba_trb : num [1:25] 3068 3420 8383 3926 4885 ...
## $ nba_ast : num [1:25] 1838 5397 893 2432 2121 ...
## $ nba_blk : num [1:25] 33 89 84 98 96 30 3 46 65 0 ...
## $ nba_stl : num [1:25] 239 483 110 516 246 23 11 178 189 0 ...
## - attr(*, "groups")= tibble [25 x 4] (S3: tbl_df/tbl/data.frame)
## ..$ pick_number: num [1:25] 1 2 3 4 5 6 7 8 9 10 ...
## ..$ name : chr [1:25] "Cazzie Russell" "Dave Bing" "Clyde Lee" "Lou Hudson" ...
## ..$ draft_team : chr [1:25] "NYK" "DET" "SFW" "STL" ...
## ..$ .rows : list<int> [1:25]
## .. ..$ : int 1
## .. ..$ : int 2
## .. ..$ : int 3
## .. ..$ : int 4
## .. ..$ : int 5
## .. ..$ : int 6
## .. ..$ : int 7
## .. ..$ : int 8
## .. ..$ : int 9
## .. ..$ : int 10
## .. ..$ : int 11
## .. ..$ : int 12
## .. ..$ : int 13
## .. ..$ : int 14
## .. ..$ : int 15
## .. ..$ : int 16
## .. ..$ : int 17
## .. ..$ : int 18
## .. ..$ : int 19
## .. ..$ : int 20
## .. ..$ : int 21
## .. ..$ : int 22
## .. ..$ : int 23
## .. ..$ : int 24
## .. ..$ : int 25
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
draft_66_test <- draft_66_pass %>%
right_join(draft_66_fail, by = c("name" = "name"), suffix = c("_pass", "_fail")) ## 35 observations (fail only)
str(draft_66_test)
## tibble [35 x 14] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ pick_number_pass: num [1:35] NA NA NA NA NA NA NA NA NA NA ...
## $ name : chr [1:35] "Jerry Lee Wells" "Leon Clark" "Stew Johnson" "Ollie Darden" ...
## $ draft_team_pass : chr [1:35] NA NA NA NA ...
## $ college_pass : chr [1:35] NA NA NA NA ...
## $ nba_g : num [1:35] NA NA NA NA NA NA NA NA NA NA ...
## $ nba_pts : num [1:35] NA NA NA NA NA NA NA NA NA NA ...
## $ nba_trb : num [1:35] NA NA NA NA NA NA NA NA NA NA ...
## $ nba_ast : num [1:35] NA NA NA NA NA NA NA NA NA NA ...
## $ nba_blk : num [1:35] NA NA NA NA NA NA NA NA NA NA ...
## $ nba_stl : num [1:35] NA NA NA NA NA NA NA NA NA NA ...
## $ draft_year : num [1:35] 1966 1966 1966 1966 1966 ...
## $ pick_number_fail: num [1:35] 16 18 21 22 23 25 28 30 31 32 ...
## $ draft_team_fail : chr [1:35] "CIN" "BOS" "NYK" "DET" ...
## $ college_fail : chr [1:35] "Oklahoma City University" "University of Wyoming" "Murray State University" "University of Michigan" ...
## - attr(*, "groups")= tibble [35 x 2] (S3: tbl_df/tbl/data.frame)
## ..$ name : chr [1:35] "Bob McIntyre" "Carroll Hooser" "Charles Schmaus" "Charlie Hunter" ...
## ..$ .rows: list<int> [1:35]
## .. ..$ : int 12
## .. ..$ : int 27
## .. ..$ : int 14
## .. ..$ : int 33
## .. ..$ : int 6
## .. ..$ : int 19
## .. ..$ : int 8
## .. ..$ : int 7
## .. ..$ : int 26
## .. ..$ : int 13
## .. ..$ : int 30
## .. ..$ : int 10
## .. ..$ : int 1
## .. ..$ : int 28
## .. ..$ : int 15
## .. ..$ : int 20
## .. ..$ : int 21
## .. ..$ : int 32
## .. ..$ : int 25
## .. ..$ : int 9
## .. ..$ : int 2
## .. ..$ : int 29
## .. ..$ : int 35
## .. ..$ : int 4
## .. ..$ : int 34
## .. ..$ : int 22
## .. ..$ : int 16
## .. ..$ : int 23
## .. ..$ : int 5
## .. ..$ : int 11
## .. ..$ : int 31
## .. ..$ : int 3
## .. ..$ : int 24
## .. ..$ : int 18
## .. ..$ : int 17
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
draft_66_test <- draft_66_pass %>%
full_join(draft_66_fail, by = c("name" = "name"), suffix = c("_pass", "_fail")) ## 60 observations (pass + fail)
str(draft_66_test)
## tibble [60 x 14] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ pick_number_pass: num [1:60] 1 2 3 4 5 6 7 8 9 10 ...
## $ name : chr [1:60] "Cazzie Russell" "Dave Bing" "Clyde Lee" "Lou Hudson" ...
## $ draft_team_pass : chr [1:60] "NYK" "DET" "SFW" "STL" ...
## $ college_pass : chr [1:60] "University of Michigan" "Syracuse University" "Vanderbilt University" "University of Minnesota" ...
## $ nba_g : num [1:60] 817 901 822 890 1002 ...
## $ nba_pts : num [1:60] 12377 18327 6204 17940 13922 ...
## $ nba_trb : num [1:60] 3068 3420 8383 3926 4885 ...
## $ nba_ast : num [1:60] 1838 5397 893 2432 2121 ...
## $ nba_blk : num [1:60] 33 89 84 98 96 30 3 46 65 0 ...
## $ nba_stl : num [1:60] 239 483 110 516 246 23 11 178 189 0 ...
## $ draft_year : num [1:60] NA NA NA NA NA NA NA NA NA NA ...
## $ pick_number_fail: num [1:60] NA NA NA NA NA NA NA NA NA NA ...
## $ draft_team_fail : chr [1:60] NA NA NA NA ...
## $ college_fail : chr [1:60] NA NA NA NA ...
## - attr(*, "groups")= tibble [60 x 2] (S3: tbl_df/tbl/data.frame)
## ..$ name : chr [1:60] "Archie Clark" "Bill Melchionni" "Bob McIntyre" "Carroll Hooser" ...
## ..$ .rows: list<int> [1:60]
## .. ..$ : int 23
## .. ..$ : int 17
## .. ..$ : int 37
## .. ..$ : int 52
## .. ..$ : int 1
## .. ..$ : int 39
## .. ..$ : int 58
## .. ..$ : int 3
## .. ..$ : int 2
## .. ..$ : int 10
## .. ..$ : int 31
## .. ..$ : int 44
## .. ..$ : int 14
## .. ..$ : int 22
## .. ..$ : int 12
## .. ..$ : int 33
## .. ..$ : int 18
## .. ..$ : int 32
## .. ..$ : int 51
## .. ..$ : int 38
## .. ..$ : int 16
## .. ..$ : int 11
## .. ..$ : int 5
## .. ..$ : int 55
## .. ..$ : int 35
## .. ..$ : int 7
## .. ..$ : int 26
## .. ..$ : int 8
## .. ..$ : int 53
## .. ..$ : int 20
## .. ..$ : int 40
## .. ..$ : int 13
## .. ..$ : int 45
## .. ..$ : int 21
## .. ..$ : int 46
## .. ..$ : int 24
## .. ..$ : int 57
## .. ..$ : int 25
## .. ..$ : int 50
## .. ..$ : int 34
## .. ..$ : int 27
## .. ..$ : int 54
## .. ..$ : int 4
## .. ..$ : int 9
## .. ..$ : int 60
## .. ..$ : int 15
## .. ..$ : int 29
## .. ..$ : int 59
## .. ..$ : int 47
## .. ..$ : int 41
## .. ..$ : int 48
## .. ..$ : int 30
## .. ..$ : int 36
## .. ..$ : int 56
## .. ..$ : int 28
## .. ..$ : int 49
## .. ..$ : int 43
## .. ..$ : int 19
## .. ..$ : int 6
## .. ..$ : int 42
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
draft_66_test <- draft_66_pass %>%
full_join(draft_66_fail, by = c("name" = "name"), suffix = c("_pass", "_fail")) %>%
filter(college_pass=="University of Utah") ## 1 match (players that made the league from the U. of Utah in '66)
str(draft_66_test)
## tibble [1 x 14] (S3: grouped_df/tbl_df/tbl/data.frame)
## $ pick_number_pass: num 7
## $ name : chr "Jerry Chambers"
## $ draft_team_pass : chr "LAL"
## $ college_pass : chr "University of Utah"
## $ nba_g : num 320
## $ nba_pts : num 2667
## $ nba_trb : num 1032
## $ nba_ast : num 270
## $ nba_blk : num 3
## $ nba_stl : num 11
## $ draft_year : num NA
## $ pick_number_fail: num NA
## $ draft_team_fail : chr NA
## $ college_fail : chr NA
## - attr(*, "groups")= tibble [1 x 2] (S3: tbl_df/tbl/data.frame)
## ..$ name : chr "Jerry Chambers"
## ..$ .rows: list<int> [1:1]
## .. ..$ : int 1
## .. ..@ ptype: int(0)
## ..- attr(*, ".drop")= logi TRUE
draft_66_test <- draft_66_test %>% separate(name,into=c("first_name","last_name"))
draft_66_test <- draft_66_test %>% unite(name,first_name,last_name,sep="|")