####進階篇####
#讀取資料NBA1920資料
load(file = "NBA1920.RData")
#呼叫ggplot2套件
library(ggplot2)
#呼叫tidyverse套件
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble 3.1.0 ✓ dplyr 1.0.5
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ✓ purrr 0.3.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
# select(): 選要分析的欄位,欄位子集 (Column)
NBA1920_1 <- NBA1920 %>% select("Name","Team", "Position", "GamesPlayed", "TotalPoints")
#查看前5筆與後5筆資料
head(NBA1920_1,5) #前五筆
## Name Team Position GamesPlayed TotalPoints
## 1 Steven Adams OKL C 63 684
## 2 Bam Adebayo MIA PF 72 1146
## 3 Lamarcu Aldridge SAN C 53 1001
## 4 Kyle Alexander MIA PF 2 2
## 5 Alexander-walke NOR SG 47 267
tail(NBA1920_1,5) #後五筆
## Name Team Position GamesPlayed TotalPoints
## 525 Trae Young ATL PG 60 1778
## 526 Cody Zeller CHA C 58 642
## 527 Tyler Zeller SAN C 1 2
## 528 Ante Zizic CLE C 22 96
## 529 Ivica Zubac LAC C 72 596
# filter(): 選要分析的觀察值,觀察值子集 (Row)
NBA1920_2 <- NBA1920_1 %>%
filter(Position=="PF") %>% #過濾
arrange(desc(TotalPoints))%>% #排序 降冪
slice(1:10)
#找出某個球隊Team的全體球員Name和場上位置Position,並且依照場上位置排序
NBA1920 %>% filter(Team=="MIL") %>%
select(Team,Position,Name) %>%
arrange(Position)
## Team Position Name
## 1 MIL C Brook Lopez
## 2 MIL C Robin Lopez
## 3 MIL PF G Antetokounmpo
## 4 MIL PF Ersan Ilyasova
## 5 MIL PF Marvin Williams
## 6 MIL PF D.j. Wilson
## 7 MIL PG Eric Bledsoe
## 8 MIL SF T Antetokounmpo
## 9 MIL SF Pat Connaughton
## 10 MIL SF Kyle Korver
## 11 MIL SF Wesley Matthews
## 12 MIL SF Khris Middleton
## 13 MIL SG Sterling Brown
## 14 MIL SG Dont Divincenzo
## 15 MIL SG George Hill
## 16 MIL SG Frank Mason
#選取GamesPlayed最高的前10名中鋒(c)球員
NBA1920 %>% arrange(desc(GamesPlayed)) %>%
filter(Position=="C") %>%
select(Name,Position,GamesPlayed) %>%
slice(1:10)
## Name Position GamesPlayed
## 1 Nikola Jokic C 73
## 2 Ivica Zubac C 72
## 3 Jon Valanciunas C 70
## 4 Dwight Howard C 69
## 5 Rudy Gobert C 68
## 6 Brook Lopez C 68
## 7 Javale Mcgee C 68
## 8 Kelly Olynyk C 67
## 9 Hassa Whiteside C 67
## 10 Robin Lopez C 66
# mutate(): 增加新欄位
summary(NBA1920$GamesPlayed)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 21.00 48.00 42.18 62.00 74.00
NBA1920_3 <- NBA1920 %>%
mutate(AttendanceRate= GamesPlayed/82) %>%
arrange(desc(AttendanceRate)) %>%
slice(1:10) %>%
select(Name,Team,AttendanceRate)
NBA1920_3
## Name Team AttendanceRate
## 1 Maxi Kleber DAL 0.9024390
## 2 Mikal Bridges PHO 0.8902439
## 3 Dillon Brooks MEM 0.8902439
## 4 Justin Holiday IND 0.8902439
## 5 Nikola Jokic DEN 0.8902439
## 6 Monte Morris DEN 0.8902439
## 7 Duncan Robinson MIA 0.8902439
## 8 Delon Wright DAL 0.8902439
## 9 Bam Adebayo MIA 0.8780488
## 10 Harrison Barnes SAC 0.8780488
#增加罰球命中率FreeThrowsRate欄位(FreeThrowsMade/FreeThrowsAttempted),並選出前10名球員
NBA1920_31 <- NBA1920 %>%
mutate(FreeThrowsRate= FreeThrowsMade/FreeThrowsAttempted) %>%
arrange(FreeThrowsRate) %>%
select(Name,Team,FreeThrowsRate) %>%
filter(FreeThrowsRate <= 0.5) #小於多少
dim(NBA1920_31)
## [1] 41 3
NBA1920_31
## Name Team FreeThrowsRate
## 1 Louis King DET 0.0000000
## 2 Jeremy Pargo GSW 0.0000000
## 3 Dean Wade CLE 0.0000000
## 4 Cory Brewer SAC 0.3333333
## 5 Tacko Fall BOS 0.3333333
## 6 Vic Law ORL 0.3333333
## 7 Kenric Williams NOR 0.3461538
## 8 Amile Jefferson ORL 0.3571429
## 9 Moses Brown POR 0.3750000
## 10 Malcolm Miller TOR 0.3750000
## 11 Thabo Sefolosha HOU 0.3750000
## 12 Yuta Watanabe MEM 0.3750000
## 13 Andre Iguodala MIA 0.4000000
## 14 T Antetokounmpo MIL 0.4117647
## 15 Tj Leaf IND 0.4375000
## 16 Tyson Chandler HOU 0.4615385
## 17 Jarrett Culver MIN 0.4615385
## 18 Jakob Poeltl SAN 0.4651163
## 19 Justin James SAC 0.4761905
## 20 Justin Anderson BRO 0.5000000
## 21 K Antetokounmpo LAL 0.5000000
## 22 Jonah Bolden PHO 0.5000000
## 23 Jarrel Brantley UTA 0.5000000
## 24 Ed Davis UTA 0.5000000
## 25 Melvin Frazier ORL 0.5000000
## 26 Devon Hall OKL 0.5000000
## 27 Donta Hall BRO 0.5000000
## 28 John Henson DET 0.5000000
## 29 T Horton-tucker LAL 0.5000000
## 30 John Konchar MEM 0.5000000
## 31 Josh Magette ORL 0.5000000
## 32 Luc Mbah_a_moute HOU 0.5000000
## 33 Adam Mokoka CHI 0.5000000
## 34 Kz Okpala MIA 0.5000000
## 35 Garyiii Payton WAS 0.5000000
## 36 Norvel Pelle PHI 0.5000000
## 37 Andre Roberson OKL 0.5000000
## 38 Zhaire Smith PHI 0.5000000
## 39 Khyri Thomas DET 0.5000000
## 40 Sinda Thornwell NOR 0.5000000
## 41 Qu Weatherspoon SAN 0.5000000
#用tidyverse的語法做類別變數的次數分配
# summarise(): 計算統計值
# group_by(): 分組依據
NBA1920_4 <- NBA1920 %>% group_by(Position) %>% summarise(人數= n())
NBA1920_5 <- NBA1920 %>% group_by(Position) %>% summarise(總籃板= sum(TotalRebounds))
NBA1920_6 <- NBA1920 %>% group_by(Position) %>% summarise(AvgFieldGoalsAttempted= mean(FieldGoalsAttempted))
NBA1920_4
## # A tibble: 5 x 2
## Position 人數
## <fct> <int>
## 1 C 84
## 2 PF 108
## 3 PG 112
## 4 SF 116
## 5 SG 109
NBA1920_5
## # A tibble: 5 x 2
## Position 總籃板
## <fct> <int>
## 1 C 24594
## 2 PF 22549
## 3 PG 15005
## 4 SF 19312
## 5 SG 13519
NBA1920_6
## # A tibble: 5 x 2
## Position AvgFieldGoalsAttempted
## <fct> <dbl>
## 1 C 326.
## 2 PF 315.
## 3 PG 412.
## 4 SF 342.
## 5 SG 376.
#簡潔的寫法:
NBA1920_7 <- NBA1920 %>%
group_by(Position) %>%
summarise(counts= n(),TotalRebounds= sum(TotalRebounds),AvgFieldGoalsAttempted= mean(TotalRebounds))
NBA1920_7
## # A tibble: 5 x 4
## Position counts TotalRebounds AvgFieldGoalsAttempted
## <fct> <int> <int> <dbl>
## 1 C 84 24594 24594
## 2 PF 108 22549 22549
## 3 PG 112 15005 15005
## 4 SF 116 19312 19312
## 5 SG 109 13519 13519
#練習
NBA1920_three <- NBA1920 %>%
mutate(三分球命中率= ThreesMade/ThreesAttempted) %>%
top_n(10, 三分球命中率) %>%
select(Name,Team,三分球命中率)
NBA1920_three
## Name Team 三分球命中率
## 1 Tony Bradley UTA 1.0000000
## 2 Drew Eubanks SAN 1.0000000
## 3 Dusty Hannahs MEM 0.6666667
## 4 Dwight Howard LAL 0.6000000
## 5 Keldon Johnson SAN 0.5909091
## 6 Caleb Martin CHA 0.5405405
## 7 Johnathan Motley LAC 1.0000000
## 8 Cameron Payne PHO 0.5172414
## 9 Justin Robinson WAS 0.6000000
## 10 Hassa Whiteside POR 0.5714286
#球隊分析:各球隊總犯規次數(sum(PersonalFouls)各球隊平均失誤次數(mean(TurnOver))
NBA1920 %>% group_by(Team) %>% summarise(各球隊總犯規次數板= sum(PersonalFouls))
## # A tibble: 32 x 2
## Team 各球隊總犯規次數板
## <fct> <int>
## 1 ATL 1575
## 2 BOS 1550
## 3 BRO 1574
## 4 CHA 1134
## 5 CHI 1417
## 6 CLE 1260
## 7 CLI 180
## 8 DAL 1602
## 9 DEN 1499
## 10 DET 1125
## # … with 22 more rows
NBA1920 %>% group_by(Team) %>% summarise(AVG= mean(Turnovers))
## # A tibble: 32 x 2
## Team AVG
## <fct> <dbl>
## 1 ATL 60.6
## 2 BOS 55.7
## 3 BRO 45.1
## 4 CHA 62.1
## 5 CHI 55.9
## 6 CLE 66.5
## 7 CLI 97
## 8 DAL 50.9
## 9 DEN 58.3
## 10 DET 45.1
## # … with 22 more rows
NBA_Team <- NBA1920 %>%
group_by(Team) %>%
summarise(PersonalFouls= sum(PersonalFouls),AVG= mean(Turnovers))
NBA_Team
## # A tibble: 32 x 3
## Team PersonalFouls AVG
## <fct> <int> <dbl>
## 1 ATL 1575 60.6
## 2 BOS 1550 55.7
## 3 BRO 1574 45.1
## 4 CHA 1134 62.1
## 5 CHI 1417 55.9
## 6 CLE 1260 66.5
## 7 CLI 180 97
## 8 DAL 1602 50.9
## 9 DEN 1499 58.3
## 10 DET 1125 45.1
## # … with 22 more rows