####進階篇####
#讀取資料NBA1920資料
load(file = "NBA1920.RData")
#呼叫ggplot2套件
library(ggplot2)
#呼叫tidyverse套件
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble  3.1.0     ✓ dplyr   1.0.5
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
#  select(): 選要分析的欄位,欄位子集 (Column)
NBA1920_1 <- NBA1920 %>% select("Name","Team", "Position", "GamesPlayed", "TotalPoints")
#查看前5筆與後5筆資料
head(NBA1920_1,5) #前五筆
##               Name Team Position GamesPlayed TotalPoints
## 1     Steven Adams  OKL        C          63         684
## 2      Bam Adebayo  MIA       PF          72        1146
## 3 Lamarcu Aldridge  SAN        C          53        1001
## 4   Kyle Alexander  MIA       PF           2           2
## 5  Alexander-walke  NOR       SG          47         267
tail(NBA1920_1,5) #後五筆
##             Name Team Position GamesPlayed TotalPoints
## 525   Trae Young  ATL       PG          60        1778
## 526  Cody Zeller  CHA        C          58         642
## 527 Tyler Zeller  SAN        C           1           2
## 528   Ante Zizic  CLE        C          22          96
## 529  Ivica Zubac  LAC        C          72         596
#  filter(): 選要分析的觀察值,觀察值子集 (Row)
NBA1920_2 <- NBA1920_1 %>% 
  filter(Position=="PF") %>% #過濾
  arrange(desc(TotalPoints))%>% #排序 降冪
  slice(1:10)
#找出某個球隊Team的全體球員Name和場上位置Position,並且依照場上位置排序
NBA1920 %>% filter(Team=="MIL") %>% 
  select(Team,Position,Name) %>% 
  arrange(Position)
##    Team Position            Name
## 1   MIL        C     Brook Lopez
## 2   MIL        C     Robin Lopez
## 3   MIL       PF G Antetokounmpo
## 4   MIL       PF  Ersan Ilyasova
## 5   MIL       PF Marvin Williams
## 6   MIL       PF     D.j. Wilson
## 7   MIL       PG    Eric Bledsoe
## 8   MIL       SF T Antetokounmpo
## 9   MIL       SF Pat Connaughton
## 10  MIL       SF     Kyle Korver
## 11  MIL       SF Wesley Matthews
## 12  MIL       SF Khris Middleton
## 13  MIL       SG  Sterling Brown
## 14  MIL       SG Dont Divincenzo
## 15  MIL       SG     George Hill
## 16  MIL       SG     Frank Mason
#選取GamesPlayed最高的前10名中鋒(c)球員
NBA1920 %>% arrange(desc(GamesPlayed)) %>% 
  filter(Position=="C") %>%
  select(Name,Position,GamesPlayed) %>% 
  slice(1:10)
##               Name Position GamesPlayed
## 1     Nikola Jokic        C          73
## 2      Ivica Zubac        C          72
## 3  Jon Valanciunas        C          70
## 4    Dwight Howard        C          69
## 5      Rudy Gobert        C          68
## 6      Brook Lopez        C          68
## 7     Javale Mcgee        C          68
## 8     Kelly Olynyk        C          67
## 9  Hassa Whiteside        C          67
## 10     Robin Lopez        C          66
#  mutate(): 增加新欄位
summary(NBA1920$GamesPlayed)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   21.00   48.00   42.18   62.00   74.00
NBA1920_3 <- NBA1920 %>%
  mutate(AttendanceRate= GamesPlayed/82) %>% 
  arrange(desc(AttendanceRate)) %>% 
  slice(1:10) %>% 
  select(Name,Team,AttendanceRate)
NBA1920_3
##               Name Team AttendanceRate
## 1      Maxi Kleber  DAL      0.9024390
## 2    Mikal Bridges  PHO      0.8902439
## 3    Dillon Brooks  MEM      0.8902439
## 4   Justin Holiday  IND      0.8902439
## 5     Nikola Jokic  DEN      0.8902439
## 6     Monte Morris  DEN      0.8902439
## 7  Duncan Robinson  MIA      0.8902439
## 8     Delon Wright  DAL      0.8902439
## 9      Bam Adebayo  MIA      0.8780488
## 10 Harrison Barnes  SAC      0.8780488
#增加罰球命中率FreeThrowsRate欄位(FreeThrowsMade/FreeThrowsAttempted),並選出前10名球員
NBA1920_31 <- NBA1920 %>%
  mutate(FreeThrowsRate= FreeThrowsMade/FreeThrowsAttempted) %>% 
  arrange(FreeThrowsRate) %>% 
  select(Name,Team,FreeThrowsRate) %>% 
  filter(FreeThrowsRate <= 0.5) #小於多少
dim(NBA1920_31)
## [1] 41  3
NBA1920_31
##                Name Team FreeThrowsRate
## 1        Louis King  DET      0.0000000
## 2      Jeremy Pargo  GSW      0.0000000
## 3         Dean Wade  CLE      0.0000000
## 4       Cory Brewer  SAC      0.3333333
## 5        Tacko Fall  BOS      0.3333333
## 6           Vic Law  ORL      0.3333333
## 7   Kenric Williams  NOR      0.3461538
## 8   Amile Jefferson  ORL      0.3571429
## 9       Moses Brown  POR      0.3750000
## 10   Malcolm Miller  TOR      0.3750000
## 11  Thabo Sefolosha  HOU      0.3750000
## 12    Yuta Watanabe  MEM      0.3750000
## 13   Andre Iguodala  MIA      0.4000000
## 14  T Antetokounmpo  MIL      0.4117647
## 15          Tj Leaf  IND      0.4375000
## 16   Tyson Chandler  HOU      0.4615385
## 17   Jarrett Culver  MIN      0.4615385
## 18     Jakob Poeltl  SAN      0.4651163
## 19     Justin James  SAC      0.4761905
## 20  Justin Anderson  BRO      0.5000000
## 21  K Antetokounmpo  LAL      0.5000000
## 22     Jonah Bolden  PHO      0.5000000
## 23  Jarrel Brantley  UTA      0.5000000
## 24         Ed Davis  UTA      0.5000000
## 25   Melvin Frazier  ORL      0.5000000
## 26       Devon Hall  OKL      0.5000000
## 27       Donta Hall  BRO      0.5000000
## 28      John Henson  DET      0.5000000
## 29  T Horton-tucker  LAL      0.5000000
## 30     John Konchar  MEM      0.5000000
## 31     Josh Magette  ORL      0.5000000
## 32 Luc Mbah_a_moute  HOU      0.5000000
## 33      Adam Mokoka  CHI      0.5000000
## 34        Kz Okpala  MIA      0.5000000
## 35   Garyiii Payton  WAS      0.5000000
## 36     Norvel Pelle  PHI      0.5000000
## 37   Andre Roberson  OKL      0.5000000
## 38     Zhaire Smith  PHI      0.5000000
## 39     Khyri Thomas  DET      0.5000000
## 40  Sinda Thornwell  NOR      0.5000000
## 41  Qu Weatherspoon  SAN      0.5000000
#用tidyverse的語法做類別變數的次數分配
#  summarise(): 計算統計值
#  group_by(): 分組依據
NBA1920_4 <- NBA1920 %>% group_by(Position) %>% summarise(人數= n())
NBA1920_5 <- NBA1920 %>% group_by(Position) %>% summarise(總籃板= sum(TotalRebounds))
NBA1920_6 <- NBA1920 %>% group_by(Position) %>% summarise(AvgFieldGoalsAttempted= mean(FieldGoalsAttempted))
NBA1920_4
## # A tibble: 5 x 2
##   Position  人數
##   <fct>    <int>
## 1 C           84
## 2 PF         108
## 3 PG         112
## 4 SF         116
## 5 SG         109
NBA1920_5
## # A tibble: 5 x 2
##   Position 總籃板
##   <fct>     <int>
## 1 C         24594
## 2 PF        22549
## 3 PG        15005
## 4 SF        19312
## 5 SG        13519
NBA1920_6
## # A tibble: 5 x 2
##   Position AvgFieldGoalsAttempted
##   <fct>                     <dbl>
## 1 C                          326.
## 2 PF                         315.
## 3 PG                         412.
## 4 SF                         342.
## 5 SG                         376.
#簡潔的寫法:
NBA1920_7 <- NBA1920 %>% 
  group_by(Position) %>% 
  summarise(counts= n(),TotalRebounds= sum(TotalRebounds),AvgFieldGoalsAttempted= mean(TotalRebounds))
NBA1920_7
## # A tibble: 5 x 4
##   Position counts TotalRebounds AvgFieldGoalsAttempted
##   <fct>     <int>         <int>                  <dbl>
## 1 C            84         24594                  24594
## 2 PF          108         22549                  22549
## 3 PG          112         15005                  15005
## 4 SF          116         19312                  19312
## 5 SG          109         13519                  13519
#練習
NBA1920_three <- NBA1920 %>% 
  mutate(三分球命中率= ThreesMade/ThreesAttempted) %>% 
  top_n(10, 三分球命中率) %>% 
  select(Name,Team,三分球命中率)
NBA1920_three
##                Name Team 三分球命中率
## 1      Tony Bradley  UTA    1.0000000
## 2      Drew Eubanks  SAN    1.0000000
## 3     Dusty Hannahs  MEM    0.6666667
## 4     Dwight Howard  LAL    0.6000000
## 5    Keldon Johnson  SAN    0.5909091
## 6      Caleb Martin  CHA    0.5405405
## 7  Johnathan Motley  LAC    1.0000000
## 8     Cameron Payne  PHO    0.5172414
## 9   Justin Robinson  WAS    0.6000000
## 10  Hassa Whiteside  POR    0.5714286
#球隊分析:各球隊總犯規次數(sum(PersonalFouls)各球隊平均失誤次數(mean(TurnOver))
NBA1920 %>% group_by(Team) %>% summarise(各球隊總犯規次數板= sum(PersonalFouls))
## # A tibble: 32 x 2
##    Team  各球隊總犯規次數板
##    <fct>              <int>
##  1 ATL                 1575
##  2 BOS                 1550
##  3 BRO                 1574
##  4 CHA                 1134
##  5 CHI                 1417
##  6 CLE                 1260
##  7 CLI                  180
##  8 DAL                 1602
##  9 DEN                 1499
## 10 DET                 1125
## # … with 22 more rows
NBA1920 %>% group_by(Team) %>% summarise(AVG= mean(Turnovers))
## # A tibble: 32 x 2
##    Team    AVG
##    <fct> <dbl>
##  1 ATL    60.6
##  2 BOS    55.7
##  3 BRO    45.1
##  4 CHA    62.1
##  5 CHI    55.9
##  6 CLE    66.5
##  7 CLI    97  
##  8 DAL    50.9
##  9 DEN    58.3
## 10 DET    45.1
## # … with 22 more rows
NBA_Team <- NBA1920 %>% 
  group_by(Team) %>% 
  summarise(PersonalFouls= sum(PersonalFouls),AVG= mean(Turnovers))
NBA_Team
## # A tibble: 32 x 3
##    Team  PersonalFouls   AVG
##    <fct>         <int> <dbl>
##  1 ATL            1575  60.6
##  2 BOS            1550  55.7
##  3 BRO            1574  45.1
##  4 CHA            1134  62.1
##  5 CHI            1417  55.9
##  6 CLE            1260  66.5
##  7 CLI             180  97  
##  8 DAL            1602  50.9
##  9 DEN            1499  58.3
## 10 DET            1125  45.1
## # … with 22 more rows