####進階篇####
#讀取資料NBA1920資料
load(file="NBA1920.RData")
#呼叫ggplot2套件
library(ggplot2)
#呼叫tidyverse套件
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble  3.1.0     ✓ dplyr   1.0.5
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
#  select(): 選要分析的欄位,欄位子集 (Column)
NBA1920_1 <- NBA1920 %>% select("Name","Team", "Position", "GamesPlayed", "TotalPoints")
#查看前5筆與後5筆資料
head(NBA1920_1,5)
##               Name Team Position GamesPlayed TotalPoints
## 1     Steven Adams  OKL        C          63         684
## 2      Bam Adebayo  MIA       PF          72        1146
## 3 Lamarcu Aldridge  SAN        C          53        1001
## 4   Kyle Alexander  MIA       PF           2           2
## 5  Alexander-walke  NOR       SG          47         267
tail(NBA1920_1,5)
##             Name Team Position GamesPlayed TotalPoints
## 525   Trae Young  ATL       PG          60        1778
## 526  Cody Zeller  CHA        C          58         642
## 527 Tyler Zeller  SAN        C           1           2
## 528   Ante Zizic  CLE        C          22          96
## 529  Ivica Zubac  LAC        C          72         596
#  filter(): 選要分析的觀察值,觀察值子集 (Row)
NBA1920_2 <- NBA1920_1 %>%
  filter(Position=="PF")%>%
  arrange(desc(TotalPoints))%>%
  slice(c(1:10))

NBA1920_2
##               Name Team Position GamesPlayed TotalPoints
## 1  G Antetokounmpo  MIL       PF          63        1857
## 2    Anthony Davis  LAL       PF          62        1618
## 3    Tobias Harris  PHI       PF          72        1410
## 4    Pascal Siakam  TOR       PF          60        1371
## 5  Krist Porzingis  DAL       PF          57        1164
## 6  Danil Gallinari  OKL       PF          62        1160
## 7      Bam Adebayo  MIA       PF          72        1146
## 8    Marcus Morris  CLI       PF          62        1033
## 9       Kevin Love  CLE       PF          56         987
## 10 Carmelo Anthony  POR       PF          58         895
#找出某個球隊Team的全體球員Name和場上位置Position,並且依照場上位置排序
NBA1920%>%filter(Team=="NYK")%>%
  select(Team,Position,Name)%>%
  arrange(Position)
##    Team Position            Name
## 1   NYK        C    Bobby Portis
## 2   NYK        C   Julius Randle
## 3   NYK        C Mitche Robinson
## 4   NYK       PF      Taj Gibson
## 5   NYK       PG   Elfrid Payton
## 6   NYK       PG    Dennis Smith
## 7   NYK       SF      Rj Barrett
## 8   NYK       SF Igna Brazdeikis
## 9   NYK       SF  Reggie Bullock
## 10  NYK       SF    Moe Harkless
## 11  NYK       SF      Kevin Knox
## 12  NYK       SG    Kadeem Allen
## 13  NYK       SG  Damyean Dotson
## 14  NYK       SG Wayne Ellington
## 15  NYK       SG Frank Ntilikina
## 16  NYK       SG   Allonzo Trier
#選取GamesPlayed最高的前10名中鋒(c)球員
NBA1920_2 <- NBA1920_1 %>% 
  filter(Position=="PF") %>% 
  arrange(desc(GamesPlayed)) %>% #desc為排大小 
  slice(c(1:10)) #slice為前10筆資料

#  mutate(): 增加新欄位
summary(NBA1920$GamesPlayed)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   21.00   48.00   42.18   62.00   74.00
NBA1920_3 <- NBA1920 %>% 
  mutate(AttendanceRate= GamesPlayed/82)%>%
  arrange(desc(AttendanceRate))%>%
  slice(c(1:10))%>%
  select(Team,Name,AttendanceRate)
NBA1920_3
##    Team            Name AttendanceRate
## 1   DAL     Maxi Kleber      0.9024390
## 2   PHO   Mikal Bridges      0.8902439
## 3   MEM   Dillon Brooks      0.8902439
## 4   IND  Justin Holiday      0.8902439
## 5   DEN    Nikola Jokic      0.8902439
## 6   DEN    Monte Morris      0.8902439
## 7   MIA Duncan Robinson      0.8902439
## 8   DAL    Delon Wright      0.8902439
## 9   MIA     Bam Adebayo      0.8780488
## 10  SAC Harrison Barnes      0.8780488
#增加罰球命中率FreeThrowsRate欄位(FreeThrowsMade/FreeThrowsAttempted),並選出前10名球員
NBA1920_4 <- NBA1920 %>% 
  mutate(FreeThrowsRate=FreeThrowsMade/FreeThrowsAttempted)%>%
  arrange(FreeThrowsRate)%>%
  slice(c(1:10))%>%
  select(Team,Name,FreeThrowsRate)%>%
  filter(FreeThrowsRate<=0.5)
dim(NBA1920_4)
## [1] 10  3
#用tidyverse的語法做類別變數的次數分配
#  summarise(): 計算統計值
#  group_by(): 分組依據
NBA1920_4 <- NBA1920 %>% group_by(Position) %>% summarise(人數= n())
NBA1920_5 <- NBA1920 %>% group_by(Position) %>% summarise(總籃板= sum(TotalRebounds))
NBA1920_6 <- NBA1920 %>% group_by(Position) %>% summarise(平均FieldGoalsAttempted= mean(FieldGoalsAttempted))

#簡潔的寫法:
NBA1920_7 <- NBA1920 %>% 
  group_by(Position) %>% 
  summarise(counts= n(),TotalRebounds= sum(TotalRebounds),平均FieldGoalsAttempted= mean(FieldGoalsAttempted))

#練習
NBA1920_three <- NBA1920 %>% 
  mutate(三分球命中率= ThreesMade/ThreesAttempted) %>% 
  top_n(10, 三分球命中率)%>%
  select(Team,Name,三分球命中率)

NBA1920_three
##    Team             Name 三分球命中率
## 1   UTA     Tony Bradley    1.0000000
## 2   SAN     Drew Eubanks    1.0000000
## 3   MEM    Dusty Hannahs    0.6666667
## 4   LAL    Dwight Howard    0.6000000
## 5   SAN   Keldon Johnson    0.5909091
## 6   CHA     Caleb Martin    0.5405405
## 7   LAC Johnathan Motley    1.0000000
## 8   PHO    Cameron Payne    0.5172414
## 9   WAS  Justin Robinson    0.6000000
## 10  POR  Hassa Whiteside    0.5714286
#球隊分析:各球隊的總犯規次數(sum(personalfouls))和平均失誤次數
NBA_team<-NBA1920%>% 
  group_by(Team)%>% 
  summarise(PersonalFouls=sum(PersonalFouls),Avg_TurnOver=mean(Turnovers))%>% 
  arrange(desc(Avg_TurnOver))