library(tidyverse)
## -- Attaching packages -------------------------------------------- tidyverse 1.2.1 --
## √ ggplot2 2.2.1 √ purrr 0.2.4
## √ tibble 1.4.2 √ dplyr 0.7.4
## √ tidyr 0.8.0 √ stringr 1.2.0
## √ readr 1.1.1 √ forcats 0.2.0
## -- Conflicts ----------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
data2<-read.csv(file.choose())
data2_tibble<-as_tibble(data2)
new1<-filter(data2_tibble, Year==1991)
new2<-arrange(new1,desc(PTS))
new2[1,2]
## # A tibble: 1 x 1
## Player
## <fct>
## 1 Michael Jordan*
new3<-filter(data2_tibble, Year>=2000)
new4<-arrange(new3,desc(FT.))
new4[1,2]
## # A tibble: 1 x 1
## Player
## <fct>
## 1 Drew Barry
new5<-rename(data2_tibble, Position=Pos)
new5[1:10,]
## # A tibble: 10 x 52
## Year Player Position Age Tm G GS MP PER TS. X3PAr
## <int> <fct> <fct> <int> <fct> <int> <int> <int> <dbl> <dbl> <dbl>
## 1 1950 Curly A~ G-F 31 FTW 63 NA NA NA 0.368 NA
## 2 1950 Cliff B~ SG 29 INO 49 NA NA NA 0.435 NA
## 3 1950 Leo Bar~ SF 25 CHS 67 NA NA NA 0.394 NA
## 4 1950 Ed Bart~ F 24 TOT 15 NA NA NA 0.312 NA
## 5 1950 Ed Bart~ F 24 DNN 13 NA NA NA 0.308 NA
## 6 1950 Ed Bart~ F 24 NYK 2 NA NA NA 0.376 NA
## 7 1950 Ralph B~ G 22 INO 60 NA NA NA 0.422 NA
## 8 1950 Gene Be~ G-F 23 TRI 3 NA NA NA 0.275 NA
## 9 1950 Charlie~ F-C 28 TOT 65 NA NA NA 0.346 NA
## 10 1950 Charlie~ F-C 28 FTW 36 NA NA NA 0.362 NA
## # ... with 41 more variables: FTr <dbl>, ORB. <dbl>, DRB. <dbl>,
## # TRB. <dbl>, AST. <dbl>, STL. <dbl>, BLK. <dbl>, TOV. <dbl>,
## # USG. <dbl>, blanl <lgl>, OWS <dbl>, DWS <dbl>, WS <dbl>, WS.48 <dbl>,
## # blank2 <lgl>, OBPM <dbl>, DBPM <dbl>, BPM <dbl>, VORP <dbl>, FG <int>,
## # FGA <int>, FG. <dbl>, X3P <int>, X3PA <int>, X3P. <dbl>, X2P <int>,
## # X2PA <int>, X2P. <dbl>, eFG. <dbl>, FT <int>, FTA <int>, FT. <dbl>,
## # ORB <int>, DRB <int>, TRB <int>, AST <int>, STL <int>, BLK <int>,
## # TOV <int>, PF <int>, PTS <int>
new6<-separate(new5, Position, into=c("first_position","second_position" ))
## Warning: Expected 2 pieces. Missing pieces filled with `NA` in 24212 rows
## [2, 3, 4, 5, 6, 7, 12, 14, 16, 17, 18, 20, 22, 23, 24, 25, 27, 33, 37,
## 38, ...].
new6[1:10,]
## # A tibble: 10 x 53
## Year Player first_position second_position Age Tm G GS
## <int> <fct> <chr> <chr> <int> <fct> <int> <int>
## 1 1950 Curly Arm~ G F 31 FTW 63 NA
## 2 1950 Cliff Bar~ SG <NA> 29 INO 49 NA
## 3 1950 Leo Barnh~ SF <NA> 25 CHS 67 NA
## 4 1950 Ed Bartels F <NA> 24 TOT 15 NA
## 5 1950 Ed Bartels F <NA> 24 DNN 13 NA
## 6 1950 Ed Bartels F <NA> 24 NYK 2 NA
## 7 1950 Ralph Bea~ G <NA> 22 INO 60 NA
## 8 1950 Gene Berce G F 23 TRI 3 NA
## 9 1950 Charlie B~ F C 28 TOT 65 NA
## 10 1950 Charlie B~ F C 28 FTW 36 NA
## # ... with 45 more variables: MP <int>, PER <dbl>, TS. <dbl>, X3PAr <dbl>,
## # FTr <dbl>, ORB. <dbl>, DRB. <dbl>, TRB. <dbl>, AST. <dbl>, STL. <dbl>,
## # BLK. <dbl>, TOV. <dbl>, USG. <dbl>, blanl <lgl>, OWS <dbl>, DWS <dbl>,
## # WS <dbl>, WS.48 <dbl>, blank2 <lgl>, OBPM <dbl>, DBPM <dbl>,
## # BPM <dbl>, VORP <dbl>, FG <int>, FGA <int>, FG. <dbl>, X3P <int>,
## # X3PA <int>, X3P. <dbl>, X2P <int>, X2PA <int>, X2P. <dbl>, eFG. <dbl>,
## # FT <int>, FTA <int>, FT. <dbl>, ORB <int>, DRB <int>, TRB <int>,
## # AST <int>, STL <int>, BLK <int>, TOV <int>, PF <int>, PTS <int>
new7<-unite(new6, position_united, first_position,second_position,sep="-")
new7[1:10,]
## # A tibble: 10 x 52
## Year Player position_united Age Tm G GS MP PER TS.
## <int> <fct> <chr> <int> <fct> <int> <int> <int> <dbl> <dbl>
## 1 1950 Curly ~ G-F 31 FTW 63 NA NA NA 0.368
## 2 1950 Cliff ~ SG-NA 29 INO 49 NA NA NA 0.435
## 3 1950 Leo Ba~ SF-NA 25 CHS 67 NA NA NA 0.394
## 4 1950 Ed Bar~ F-NA 24 TOT 15 NA NA NA 0.312
## 5 1950 Ed Bar~ F-NA 24 DNN 13 NA NA NA 0.308
## 6 1950 Ed Bar~ F-NA 24 NYK 2 NA NA NA 0.376
## 7 1950 Ralph ~ G-NA 22 INO 60 NA NA NA 0.422
## 8 1950 Gene B~ G-F 23 TRI 3 NA NA NA 0.275
## 9 1950 Charli~ F-C 28 TOT 65 NA NA NA 0.346
## 10 1950 Charli~ F-C 28 FTW 36 NA NA NA 0.362
## # ... with 42 more variables: X3PAr <dbl>, FTr <dbl>, ORB. <dbl>,
## # DRB. <dbl>, TRB. <dbl>, AST. <dbl>, STL. <dbl>, BLK. <dbl>,
## # TOV. <dbl>, USG. <dbl>, blanl <lgl>, OWS <dbl>, DWS <dbl>, WS <dbl>,
## # WS.48 <dbl>, blank2 <lgl>, OBPM <dbl>, DBPM <dbl>, BPM <dbl>,
## # VORP <dbl>, FG <int>, FGA <int>, FG. <dbl>, X3P <int>, X3PA <int>,
## # X3P. <dbl>, X2P <int>, X2PA <int>, X2P. <dbl>, eFG. <dbl>, FT <int>,
## # FTA <int>, FT. <dbl>, ORB <int>, DRB <int>, TRB <int>, AST <int>,
## # STL <int>, BLK <int>, TOV <int>, PF <int>, PTS <int>
dataset1<-data2_tibble %>%
select(Year,Pos,Tm:PTS)
dataset1[1:10,]
## # A tibble: 10 x 50
## Year Pos Tm G GS MP PER TS. X3PAr FTr ORB. DRB.
## <int> <fct> <fct> <int> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1950 G-F FTW 63 NA NA NA 0.368 NA 0.467 NA NA
## 2 1950 SG INO 49 NA NA NA 0.435 NA 0.387 NA NA
## 3 1950 SF CHS 67 NA NA NA 0.394 NA 0.259 NA NA
## 4 1950 F TOT 15 NA NA NA 0.312 NA 0.395 NA NA
## 5 1950 F DNN 13 NA NA NA 0.308 NA 0.378 NA NA
## 6 1950 F NYK 2 NA NA NA 0.376 NA 0.750 NA NA
## 7 1950 G INO 60 NA NA NA 0.422 NA 0.301 NA NA
## 8 1950 G-F TRI 3 NA NA NA 0.275 NA 0.313 NA NA
## 9 1950 F-C TOT 65 NA NA NA 0.346 NA 0.395 NA NA
## 10 1950 F-C FTW 36 NA NA NA 0.362 NA 0.480 NA NA
## # ... with 38 more variables: TRB. <dbl>, AST. <dbl>, STL. <dbl>,
## # BLK. <dbl>, TOV. <dbl>, USG. <dbl>, blanl <lgl>, OWS <dbl>, DWS <dbl>,
## # WS <dbl>, WS.48 <dbl>, blank2 <lgl>, OBPM <dbl>, DBPM <dbl>,
## # BPM <dbl>, VORP <dbl>, FG <int>, FGA <int>, FG. <dbl>, X3P <int>,
## # X3PA <int>, X3P. <dbl>, X2P <int>, X2PA <int>, X2P. <dbl>, eFG. <dbl>,
## # FT <int>, FTA <int>, FT. <dbl>, ORB <int>, DRB <int>, TRB <int>,
## # AST <int>, STL <int>, BLK <int>, TOV <int>, PF <int>, PTS <int>
dataset2<-data2_tibble %>%
select(Year,Player, Age)
dataset2[1:10,]
## # A tibble: 10 x 3
## Year Player Age
## <int> <fct> <int>
## 1 1950 Curly Armstrong 31
## 2 1950 Cliff Barker 29
## 3 1950 Leo Barnhorst 25
## 4 1950 Ed Bartels 24
## 5 1950 Ed Bartels 24
## 6 1950 Ed Bartels 24
## 7 1950 Ralph Beard 22
## 8 1950 Gene Berce 23
## 9 1950 Charlie Black 28
## 10 1950 Charlie Black 28
meigeid=seq(1,nrow(dataset1),1)
dataset3<-data.frame(meigeid,dataset1)
dataset3[1:10,]
## meigeid Year Pos Tm G GS MP PER TS. X3PAr FTr ORB. DRB. TRB. AST.
## 1 1 1950 G-F FTW 63 NA NA NA 0.368 NA 0.467 NA NA NA NA
## 2 2 1950 SG INO 49 NA NA NA 0.435 NA 0.387 NA NA NA NA
## 3 3 1950 SF CHS 67 NA NA NA 0.394 NA 0.259 NA NA NA NA
## 4 4 1950 F TOT 15 NA NA NA 0.312 NA 0.395 NA NA NA NA
## 5 5 1950 F DNN 13 NA NA NA 0.308 NA 0.378 NA NA NA NA
## 6 6 1950 F NYK 2 NA NA NA 0.376 NA 0.750 NA NA NA NA
## 7 7 1950 G INO 60 NA NA NA 0.422 NA 0.301 NA NA NA NA
## 8 8 1950 G-F TRI 3 NA NA NA 0.275 NA 0.313 NA NA NA NA
## 9 9 1950 F-C TOT 65 NA NA NA 0.346 NA 0.395 NA NA NA NA
## 10 10 1950 F-C FTW 36 NA NA NA 0.362 NA 0.480 NA NA NA NA
## STL. BLK. TOV. USG. blanl OWS DWS WS WS.48 blank2 OBPM DBPM BPM
## 1 NA NA NA NA NA -0.1 3.6 3.5 NA NA NA NA NA
## 2 NA NA NA NA NA 1.6 0.6 2.2 NA NA NA NA NA
## 3 NA NA NA NA NA 0.9 2.8 3.6 NA NA NA NA NA
## 4 NA NA NA NA NA -0.5 -0.1 -0.6 NA NA NA NA NA
## 5 NA NA NA NA NA -0.5 -0.1 -0.6 NA NA NA NA NA
## 6 NA NA NA NA NA 0.0 0.0 0.0 NA NA NA NA NA
## 7 NA NA NA NA NA 3.6 1.2 4.8 NA NA NA NA NA
## 8 NA NA NA NA NA -0.1 0.0 -0.1 NA NA NA NA NA
## 9 NA NA NA NA NA -2.2 5.0 2.8 NA NA NA NA NA
## 10 NA NA NA NA NA -0.7 2.2 1.5 NA NA NA NA NA
## VORP FG FGA FG. X3P X3PA X3P. X2P X2PA X2P. eFG. FT FTA FT. ORB
## 1 NA 144 516 0.279 NA NA NA 144 516 0.279 0.279 170 241 0.705 NA
## 2 NA 102 274 0.372 NA NA NA 102 274 0.372 0.372 75 106 0.708 NA
## 3 NA 174 499 0.349 NA NA NA 174 499 0.349 0.349 90 129 0.698 NA
## 4 NA 22 86 0.256 NA NA NA 22 86 0.256 0.256 19 34 0.559 NA
## 5 NA 21 82 0.256 NA NA NA 21 82 0.256 0.256 17 31 0.548 NA
## 6 NA 1 4 0.250 NA NA NA 1 4 0.250 0.250 2 3 0.667 NA
## 7 NA 340 936 0.363 NA NA NA 340 936 0.363 0.363 215 282 0.762 NA
## 8 NA 5 16 0.313 NA NA NA 5 16 0.313 0.313 0 5 0.000 NA
## 9 NA 226 813 0.278 NA NA NA 226 813 0.278 0.278 209 321 0.651 NA
## 10 NA 125 435 0.287 NA NA NA 125 435 0.287 0.287 132 209 0.632 NA
## DRB TRB AST STL BLK TOV PF PTS
## 1 NA NA 176 NA NA NA 217 458
## 2 NA NA 109 NA NA NA 99 279
## 3 NA NA 140 NA NA NA 192 438
## 4 NA NA 20 NA NA NA 29 63
## 5 NA NA 20 NA NA NA 27 59
## 6 NA NA 0 NA NA NA 2 4
## 7 NA NA 233 NA NA NA 132 895
## 8 NA NA 2 NA NA NA 6 10
## 9 NA NA 163 NA NA NA 273 661
## 10 NA NA 75 NA NA NA 140 382
dataset4<-data.frame(meigeid,dataset2)
dataset4[1:10,]
## meigeid Year Player Age
## 1 1 1950 Curly Armstrong 31
## 2 2 1950 Cliff Barker 29
## 3 3 1950 Leo Barnhorst 25
## 4 4 1950 Ed Bartels 24
## 5 5 1950 Ed Bartels 24
## 6 6 1950 Ed Bartels 24
## 7 7 1950 Ralph Beard 22
## 8 8 1950 Gene Berce 23
## 9 9 1950 Charlie Black 28
## 10 10 1950 Charlie Black 28
datasetjoin<-left_join(dataset3, dataset4, by="meigeid")
datasetjoin[1:10,]
## meigeid Year.x Pos Tm G GS MP PER TS. X3PAr FTr ORB. DRB. TRB.
## 1 1 1950 G-F FTW 63 NA NA NA 0.368 NA 0.467 NA NA NA
## 2 2 1950 SG INO 49 NA NA NA 0.435 NA 0.387 NA NA NA
## 3 3 1950 SF CHS 67 NA NA NA 0.394 NA 0.259 NA NA NA
## 4 4 1950 F TOT 15 NA NA NA 0.312 NA 0.395 NA NA NA
## 5 5 1950 F DNN 13 NA NA NA 0.308 NA 0.378 NA NA NA
## 6 6 1950 F NYK 2 NA NA NA 0.376 NA 0.750 NA NA NA
## 7 7 1950 G INO 60 NA NA NA 0.422 NA 0.301 NA NA NA
## 8 8 1950 G-F TRI 3 NA NA NA 0.275 NA 0.313 NA NA NA
## 9 9 1950 F-C TOT 65 NA NA NA 0.346 NA 0.395 NA NA NA
## 10 10 1950 F-C FTW 36 NA NA NA 0.362 NA 0.480 NA NA NA
## AST. STL. BLK. TOV. USG. blanl OWS DWS WS WS.48 blank2 OBPM DBPM
## 1 NA NA NA NA NA NA -0.1 3.6 3.5 NA NA NA NA
## 2 NA NA NA NA NA NA 1.6 0.6 2.2 NA NA NA NA
## 3 NA NA NA NA NA NA 0.9 2.8 3.6 NA NA NA NA
## 4 NA NA NA NA NA NA -0.5 -0.1 -0.6 NA NA NA NA
## 5 NA NA NA NA NA NA -0.5 -0.1 -0.6 NA NA NA NA
## 6 NA NA NA NA NA NA 0.0 0.0 0.0 NA NA NA NA
## 7 NA NA NA NA NA NA 3.6 1.2 4.8 NA NA NA NA
## 8 NA NA NA NA NA NA -0.1 0.0 -0.1 NA NA NA NA
## 9 NA NA NA NA NA NA -2.2 5.0 2.8 NA NA NA NA
## 10 NA NA NA NA NA NA -0.7 2.2 1.5 NA NA NA NA
## BPM VORP FG FGA FG. X3P X3PA X3P. X2P X2PA X2P. eFG. FT FTA FT.
## 1 NA NA 144 516 0.279 NA NA NA 144 516 0.279 0.279 170 241 0.705
## 2 NA NA 102 274 0.372 NA NA NA 102 274 0.372 0.372 75 106 0.708
## 3 NA NA 174 499 0.349 NA NA NA 174 499 0.349 0.349 90 129 0.698
## 4 NA NA 22 86 0.256 NA NA NA 22 86 0.256 0.256 19 34 0.559
## 5 NA NA 21 82 0.256 NA NA NA 21 82 0.256 0.256 17 31 0.548
## 6 NA NA 1 4 0.250 NA NA NA 1 4 0.250 0.250 2 3 0.667
## 7 NA NA 340 936 0.363 NA NA NA 340 936 0.363 0.363 215 282 0.762
## 8 NA NA 5 16 0.313 NA NA NA 5 16 0.313 0.313 0 5 0.000
## 9 NA NA 226 813 0.278 NA NA NA 226 813 0.278 0.278 209 321 0.651
## 10 NA NA 125 435 0.287 NA NA NA 125 435 0.287 0.287 132 209 0.632
## ORB DRB TRB AST STL BLK TOV PF PTS Year.y Player Age
## 1 NA NA NA 176 NA NA NA 217 458 1950 Curly Armstrong 31
## 2 NA NA NA 109 NA NA NA 99 279 1950 Cliff Barker 29
## 3 NA NA NA 140 NA NA NA 192 438 1950 Leo Barnhorst 25
## 4 NA NA NA 20 NA NA NA 29 63 1950 Ed Bartels 24
## 5 NA NA NA 20 NA NA NA 27 59 1950 Ed Bartels 24
## 6 NA NA NA 0 NA NA NA 2 4 1950 Ed Bartels 24
## 7 NA NA NA 233 NA NA NA 132 895 1950 Ralph Beard 22
## 8 NA NA NA 2 NA NA NA 6 10 1950 Gene Berce 23
## 9 NA NA NA 163 NA NA NA 273 661 1950 Charlie Black 28
## 10 NA NA NA 75 NA NA NA 140 382 1950 Charlie Black 28
dataset1995<-filter(data2_tibble,Year==1995)
datasetgroup<-group_by(dataset1995,Year,Tm)
datasetgroup[1:10,]
## # A tibble: 10 x 52
## # Groups: Year, Tm [9]
## Year Player Pos Age Tm G GS MP PER TS. X3PAr
## <int> <fct> <fct> <int> <fct> <int> <int> <int> <dbl> <dbl> <dbl>
## 1 1995 Alaa Ab~ PF 26 TOT 54 0 506 12.6 0.519 0.00900
## 2 1995 Alaa Ab~ PF 26 SAC 51 0 476 14.1 0.540 0.00900
## 3 1995 Alaa Ab~ PF 26 PHI 3 0 30 -12.5 0.0910 0
## 4 1995 Mahmoud~ PG 25 DEN 73 43 2082 17.8 0.543 0.214
## 5 1995 Michael~ PG 32 CHH 29 0 443 17.7 0.583 0.547
## 6 1995 Rafael ~ SF 30 DET 79 16 1776 11.3 0.521 0.142
## 7 1995 Danny A~ SG 35 PHO 74 1 1374 14.3 0.596 0.507
## 8 1995 Victor ~ C 25 GSW 50 29 1237 12.8 0.530 0.0560
## 9 1995 Derrick~ C 22 PHI 64 1 1032 11.6 0.481 0.0160
## 10 1995 Greg An~ PF 30 ATL 51 0 622 12.6 0.547 0
## # ... with 41 more variables: FTr <dbl>, ORB. <dbl>, DRB. <dbl>,
## # TRB. <dbl>, AST. <dbl>, STL. <dbl>, BLK. <dbl>, TOV. <dbl>,
## # USG. <dbl>, blanl <lgl>, OWS <dbl>, DWS <dbl>, WS <dbl>, WS.48 <dbl>,
## # blank2 <lgl>, OBPM <dbl>, DBPM <dbl>, BPM <dbl>, VORP <dbl>, FG <int>,
## # FGA <int>, FG. <dbl>, X3P <int>, X3PA <int>, X3P. <dbl>, X2P <int>,
## # X2PA <int>, X2P. <dbl>, eFG. <dbl>, FT <int>, FTA <int>, FT. <dbl>,
## # ORB <int>, DRB <int>, TRB <int>, AST <int>, STL <int>, BLK <int>,
## # TOV <int>, PF <int>, PTS <int>
datasetsum<-summarise(datasetgroup, yearavg=mean(PTS, na.rm=TRUE))
datasetfinal<-arrange(datasetsum,desc(yearavg))
datasetfinal[1:10,]
## # A tibble: 10 x 3
## # Groups: Year [1]
## Year Tm yearavg
## <int> <fct> <dbl>
## 1 1995 SEA 647
## 2 1995 ORL 606
## 3 1995 PHO 605
## 4 1995 DAL 604
## 5 1995 MIL 582
## 6 1995 UTA 582
## 7 1995 MIA 553
## 8 1995 SAS 546
## 9 1995 IND 542
## 10 1995 LAL 538
datasetspread<-spread(datasetfinal, Tm,yearavg)
datasetspread
## # A tibble: 1 x 29
## # Groups: Year [1]
## Year ATL BOS CHH CHI CLE DAL DEN DET GSW HOU IND
## <int> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1995 440 496 516 520 494 604 489 503 482 499 542
## # ... with 17 more variables: LAC <dbl>, LAL <dbl>, MIA <dbl>, MIL <dbl>,
## # MIN <dbl>, NJN <dbl>, NYK <dbl>, ORL <dbl>, PHI <dbl>, PHO <dbl>,
## # POR <dbl>, SAC <dbl>, SAS <dbl>, SEA <dbl>, TOT <dbl>, UTA <dbl>,
## # WSB <dbl>
datasetgather<-datasetspread %>%
gather(Tm,yearavg,c("ATL","BOS","CHH","CHI","CLE","DAL","DEN","DET","GSW","HOU","IND","LAC","LAL","MIA","MIL","MIN","NJN","NYK", "ORL","PHI","PHO", "POR","SAC","SAS","SEA", "TOT","UTA", "WSB"))
datasetgather[1:10,]
## # A tibble: 10 x 3
## # Groups: Year [1]
## Year Tm yearavg
## <int> <chr> <dbl>
## 1 1995 ATL 440
## 2 1995 BOS 496
## 3 1995 CHH 516
## 4 1995 CHI 520
## 5 1995 CLE 494
## 6 1995 DAL 604
## 7 1995 DEN 489
## 8 1995 DET 503
## 9 1995 GSW 482
## 10 1995 HOU 499
link of Rmarkdown: http://rpubs.com/Junyu/357505