####進階篇####
#讀取資料NBA1920資料
load(file = "NBA1920.RData")
#呼叫ggplot2套件
library(ggplot2)
#呼叫tidyverse套件
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## √ tibble 3.1.0 √ dplyr 1.0.5
## √ tidyr 1.1.3 √ stringr 1.4.0
## √ readr 1.4.0 √ forcats 0.5.1
## √ purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# select(): 選要分析的欄位,欄位子集 (Column)
NBA1920_1 <- NBA1920 %>% select("Name","Team", "Position", "GamesPlayed", "TotalPoints")
#查看前5筆與後5筆資料
head(NBA1920_1, 5)
## Name Team Position GamesPlayed TotalPoints
## 1 Steven Adams OKL C 63 684
## 2 Bam Adebayo MIA PF 72 1146
## 3 Lamarcu Aldridge SAN C 53 1001
## 4 Kyle Alexander MIA PF 2 2
## 5 Alexander-walke NOR SG 47 267
tail(NBA1920_1, 5)
## Name Team Position GamesPlayed TotalPoints
## 525 Trae Young ATL PG 60 1778
## 526 Cody Zeller CHA C 58 642
## 527 Tyler Zeller SAN C 1 2
## 528 Ante Zizic CLE C 22 96
## 529 Ivica Zubac LAC C 72 596
# filter(): 選要分析的觀察值,觀察值子集 (Row)
NBA1920_2 <- NBA1920_1 %>%
filter(Position=="PF") %>%
arrange(desc(TotalPoints)) %>%
slice(c(1:10))
NBA1920_2
## Name Team Position GamesPlayed TotalPoints
## 1 G Antetokounmpo MIL PF 63 1857
## 2 Anthony Davis LAL PF 62 1618
## 3 Tobias Harris PHI PF 72 1410
## 4 Pascal Siakam TOR PF 60 1371
## 5 Krist Porzingis DAL PF 57 1164
## 6 Danil Gallinari OKL PF 62 1160
## 7 Bam Adebayo MIA PF 72 1146
## 8 Marcus Morris CLI PF 62 1033
## 9 Kevin Love CLE PF 56 987
## 10 Carmelo Anthony POR PF 58 895
#找出某個球隊Team的全體球員Name和場上位置Position,並且依照場上位置排序
NBA1920 %>% filter(Team=="NYK") %>%
select(Team, Position, Name) %>%
arrange(Position)
## Team Position Name
## 1 NYK C Bobby Portis
## 2 NYK C Julius Randle
## 3 NYK C Mitche Robinson
## 4 NYK PF Taj Gibson
## 5 NYK PG Elfrid Payton
## 6 NYK PG Dennis Smith
## 7 NYK SF Rj Barrett
## 8 NYK SF Igna Brazdeikis
## 9 NYK SF Reggie Bullock
## 10 NYK SF Moe Harkless
## 11 NYK SF Kevin Knox
## 12 NYK SG Kadeem Allen
## 13 NYK SG Damyean Dotson
## 14 NYK SG Wayne Ellington
## 15 NYK SG Frank Ntilikina
## 16 NYK SG Allonzo Trier
#選取GamesPlayed最高的前10名中鋒(c)球員
# mutate(): 增加新欄位
summary(NBA1920$GamesPlayed)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 21.00 48.00 42.18 62.00 74.00
NBA1920_3 <- NBA1920 %>%
mutate(AttendanceRate= GamesPlayed/82) %>%
arrange(desc(AttendanceRate)) %>%
slice(c(1:10)) %>%
select(Team, Name, AttendanceRate)
NBA1920_3
## Team Name AttendanceRate
## 1 DAL Maxi Kleber 0.9024390
## 2 PHO Mikal Bridges 0.8902439
## 3 MEM Dillon Brooks 0.8902439
## 4 IND Justin Holiday 0.8902439
## 5 DEN Nikola Jokic 0.8902439
## 6 DEN Monte Morris 0.8902439
## 7 MIA Duncan Robinson 0.8902439
## 8 DAL Delon Wright 0.8902439
## 9 MIA Bam Adebayo 0.8780488
## 10 SAC Harrison Barnes 0.8780488
#增加罰球命中率FreeThrowsRate欄位(FreeThrowsMade/FreeThrowsAttempted),並選出後10名球員
NBA1920_4.1 <- NBA1920 %>%
mutate(FreeThrowsRate= FreeThrowsMade/FreeThrowsAttempted) %>%
arrange(FreeThrowsRate) %>%
select(Team, Name, FreeThrowsRate) %>%
filter(FreeThrowsRate<=0.5)
dim(NBA1920_4.1)
## [1] 41 3
#用tidyverse的語法做類別變數的次數分配
# summarise(): 計算統計值
# group_by(): 分組依據
NBA1920 %>% group_by(Position) %>% summarise(人數= n())
## # A tibble: 5 x 2
## Position 人數
## <fct> <int>
## 1 C 84
## 2 PF 108
## 3 PG 112
## 4 SF 116
## 5 SG 109
NBA1920 %>% group_by(Position) %>% summarise(總籃板= sum(TotalRebounds))
## # A tibble: 5 x 2
## Position 總籃板
## <fct> <int>
## 1 C 24594
## 2 PF 22549
## 3 PG 15005
## 4 SF 19312
## 5 SG 13519
NBA1920 %>% group_by(Position) %>% summarise(平均FieldGoalsAttempted= mean(FieldGoalsAttempted))
## # A tibble: 5 x 2
## Position 平均FieldGoalsAttempted
## <fct> <dbl>
## 1 C 326.
## 2 PF 315.
## 3 PG 412.
## 4 SF 342.
## 5 SG 376.
#簡潔的寫法:
NBA1920_7 <- NBA1920 %>%
group_by(Position) %>%
summarise(counts= n(),TotalRebounds= sum(TotalRebounds),Avg_FieldGoalsAttempted= mean(FieldGoalsAttempted))
NBA1920_7
## # A tibble: 5 x 4
## Position counts TotalRebounds Avg_FieldGoalsAttempted
## <fct> <int> <int> <dbl>
## 1 C 84 24594 326.
## 2 PF 108 22549 315.
## 3 PG 112 15005 412.
## 4 SF 116 19312 342.
## 5 SG 109 13519 376.
#練習
NBA1920_three <- NBA1920 %>%
mutate(三分球命中率= ThreesMade/ThreesAttempted) %>%
top_n(10, 三分球命中率) %>%
select(Team, Name, 三分球命中率)
NBA1920_three
## Team Name 三分球命中率
## 1 UTA Tony Bradley 1.0000000
## 2 SAN Drew Eubanks 1.0000000
## 3 MEM Dusty Hannahs 0.6666667
## 4 LAL Dwight Howard 0.6000000
## 5 SAN Keldon Johnson 0.5909091
## 6 CHA Caleb Martin 0.5405405
## 7 LAC Johnathan Motley 1.0000000
## 8 PHO Cameron Payne 0.5172414
## 9 WAS Justin Robinson 0.6000000
## 10 POR Hassa Whiteside 0.5714286
#球隊分析:各球隊的總犯規次數(sum(PersonalFouls))和平均失誤次數(mean(TurnOvers))
NBA_team <- NBA1920 %>%
group_by(Team) %>%
summarise(PersonalFouls=sum(PersonalFouls), Avg_TurnOvers= mean(Turnovers)) %>%
arrange(desc(Avg_TurnOvers))
NBA_team
## # A tibble: 32 x 3
## Team PersonalFouls Avg_TurnOvers
## <fct> <int> <dbl>
## 1 CLI 180 97
## 2 MIL 1486 68.1
## 3 NOR 1524 67.2
## 4 PHI 1608 66.7
## 5 CLE 1260 66.5
## 6 LAL 1529 64
## 7 CHA 1134 62.1
## 8 ATL 1575 60.6
## 9 MEM 1551 59.3
## 10 DEN 1499 58.3
## # ... with 22 more rows