####進階篇####
#讀取資料NBA1920資料
load(file="NBA1920.RData")
#呼叫ggplot2套件
library(ggplot2)
#呼叫tidyverse套件
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 3.1.0 ✓ dplyr 1.0.5
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# select(): 選要分析的欄位,欄位子集 (Column)
NBA1920_1 <- NBA1920 %>% select("Name","Team", "Position", "GamesPlayed", "TotalPoints")
#查看前5筆與後5筆資料
head(NBA1920_1,5)
## Name Team Position GamesPlayed TotalPoints
## 1 Steven Adams OKL C 63 684
## 2 Bam Adebayo MIA PF 72 1146
## 3 Lamarcu Aldridge SAN C 53 1001
## 4 Kyle Alexander MIA PF 2 2
## 5 Alexander-walke NOR SG 47 267
tail(NBA1920_1,5)
## Name Team Position GamesPlayed TotalPoints
## 525 Trae Young ATL PG 60 1778
## 526 Cody Zeller CHA C 58 642
## 527 Tyler Zeller SAN C 1 2
## 528 Ante Zizic CLE C 22 96
## 529 Ivica Zubac LAC C 72 596
# filter(): 選要分析的觀察值,觀察值子集 (Row)
NBA1920_2 <- NBA1920_1 %>%
filter(Position=="PF")%>%
arrange(desc(TotalPoints))%>%
slice(c(1:10))
NBA1920_2
## Name Team Position GamesPlayed TotalPoints
## 1 G Antetokounmpo MIL PF 63 1857
## 2 Anthony Davis LAL PF 62 1618
## 3 Tobias Harris PHI PF 72 1410
## 4 Pascal Siakam TOR PF 60 1371
## 5 Krist Porzingis DAL PF 57 1164
## 6 Danil Gallinari OKL PF 62 1160
## 7 Bam Adebayo MIA PF 72 1146
## 8 Marcus Morris CLI PF 62 1033
## 9 Kevin Love CLE PF 56 987
## 10 Carmelo Anthony POR PF 58 895
#找出某個球隊Team的全體球員Name和場上位置Position,並且依照場上位置排序
NBA1920%>%filter(Team=="NYK")%>%
select(Team,Position,Name)%>%
arrange(Position)
## Team Position Name
## 1 NYK C Bobby Portis
## 2 NYK C Julius Randle
## 3 NYK C Mitche Robinson
## 4 NYK PF Taj Gibson
## 5 NYK PG Elfrid Payton
## 6 NYK PG Dennis Smith
## 7 NYK SF Rj Barrett
## 8 NYK SF Igna Brazdeikis
## 9 NYK SF Reggie Bullock
## 10 NYK SF Moe Harkless
## 11 NYK SF Kevin Knox
## 12 NYK SG Kadeem Allen
## 13 NYK SG Damyean Dotson
## 14 NYK SG Wayne Ellington
## 15 NYK SG Frank Ntilikina
## 16 NYK SG Allonzo Trier
#選取GamesPlayed最高的前10名中鋒(c)球員
NBA1920_2 <- NBA1920_1 %>%
filter(Position=="PF") %>%
arrange(desc(GamesPlayed)) %>% #desc為排大小
slice(c(1:10)) #slice為前10筆資料
# mutate(): 增加新欄位
summary(NBA1920$GamesPlayed)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 21.00 48.00 42.18 62.00 74.00
NBA1920_3 <- NBA1920 %>%
mutate(AttendanceRate= GamesPlayed/82)%>%
arrange(desc(AttendanceRate))%>%
slice(c(1:10))%>%
select(Team,Name,AttendanceRate)
NBA1920_3
## Team Name AttendanceRate
## 1 DAL Maxi Kleber 0.9024390
## 2 PHO Mikal Bridges 0.8902439
## 3 MEM Dillon Brooks 0.8902439
## 4 IND Justin Holiday 0.8902439
## 5 DEN Nikola Jokic 0.8902439
## 6 DEN Monte Morris 0.8902439
## 7 MIA Duncan Robinson 0.8902439
## 8 DAL Delon Wright 0.8902439
## 9 MIA Bam Adebayo 0.8780488
## 10 SAC Harrison Barnes 0.8780488
#增加罰球命中率FreeThrowsRate欄位(FreeThrowsMade/FreeThrowsAttempted),並選出前10名球員
NBA1920_4 <- NBA1920 %>%
mutate(FreeThrowsRate=FreeThrowsMade/FreeThrowsAttempted)%>%
arrange(FreeThrowsRate)%>%
slice(c(1:10))%>%
select(Team,Name,FreeThrowsRate)%>%
filter(FreeThrowsRate<=0.5)
dim(NBA1920_4)
## [1] 10 3
#用tidyverse的語法做類別變數的次數分配
# summarise(): 計算統計值
# group_by(): 分組依據
NBA1920_4 <- NBA1920 %>% group_by(Position) %>% summarise(人數= n())
NBA1920_5 <- NBA1920 %>% group_by(Position) %>% summarise(總籃板= sum(TotalRebounds))
NBA1920_6 <- NBA1920 %>% group_by(Position) %>% summarise(平均FieldGoalsAttempted= mean(FieldGoalsAttempted))
#簡潔的寫法:
NBA1920_7 <- NBA1920 %>%
group_by(Position) %>%
summarise(counts= n(),TotalRebounds= sum(TotalRebounds),平均FieldGoalsAttempted= mean(FieldGoalsAttempted))
#練習
NBA1920_three <- NBA1920 %>%
mutate(三分球命中率= ThreesMade/ThreesAttempted) %>%
top_n(10, 三分球命中率)%>%
select(Team,Name,三分球命中率)
NBA1920_three
## Team Name 三分球命中率
## 1 UTA Tony Bradley 1.0000000
## 2 SAN Drew Eubanks 1.0000000
## 3 MEM Dusty Hannahs 0.6666667
## 4 LAL Dwight Howard 0.6000000
## 5 SAN Keldon Johnson 0.5909091
## 6 CHA Caleb Martin 0.5405405
## 7 LAC Johnathan Motley 1.0000000
## 8 PHO Cameron Payne 0.5172414
## 9 WAS Justin Robinson 0.6000000
## 10 POR Hassa Whiteside 0.5714286
#球隊分析:各球隊的總犯規次數(sum(personalfouls))和平均失誤次數
NBA_team<-NBA1920%>%
group_by(Team)%>%
summarise(PersonalFouls=sum(PersonalFouls),Avg_TurnOver=mean(Turnovers))%>%
arrange(desc(Avg_TurnOver))