load(file = "NBA1920.RData")
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## √ ggplot2 3.3.3 √ purrr 0.3.4
## √ tibble 3.1.0 √ dplyr 1.0.5
## √ tidyr 1.1.3 √ stringr 1.4.0
## √ readr 1.4.0 √ forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggplot2)
# 1.選取NBA資料中的Name, Team, Position
df1 <- NBA1920 %>% select(Name, Team, Position)
# 2. 選取NBA資料中前五筆及後五筆
#傳統作法
head(NBA1920,3)
## League Name Team Position GamesPlayed TotalMinutesPlayed
## 1 NBA Steven Adams OKL C 63 1679
## 2 NBA Bam Adebayo MIA PF 72 2415
## 3 NBA Lamarcu Aldridge SAN C 53 1757
## FieldGoalsMade FieldGoalsAttempted ThreesMade ThreesAttempted FreeThrowsMade
## 1 283 478 1 3 117
## 2 440 791 2 14 264
## 3 391 794 61 157 158
## FreeThrowsAttempted OffensiveRebounds TotalRebounds Assists Steals Turnovers
## 1 201 208 582 146 51 95
## 2 382 177 735 368 82 203
## 3 191 104 392 129 36 74
## Blocks PersonalFouls Disqualifications TotalPoints Technicals Ejections
## 1 68 122 1 684 2 0
## 2 93 182 2 1146 0 0
## 3 87 128 1 1001 0 0
## FlagrantFouls GamesStarted
## 1 0 63
## 2 0 72
## 3 0 53
tail(NBA1920,10)
## League Name Team Position GamesPlayed TotalMinutesPlayed
## 520 NBA Justise Winslow MIA SF 11 353
## 521 NBA Christian Wood DET PF 62 1319
## 522 NBA Delon Wright DAL SG 73 1570
## 523 NBA Wright-foreman UTA PG 4 45
## 524 NBA Thaddeus Young CHI PF 64 1594
## 525 NBA Trae Young ATL PG 60 2119
## 526 NBA Cody Zeller CHA C 58 1343
## 527 NBA Tyler Zeller SAN C 1 4
## 528 NBA Ante Zizic CLE C 22 222
## 529 NBA Ivica Zubac LAC C 72 1326
## FieldGoalsMade FieldGoalsAttempted ThreesMade ThreesAttempted
## 520 50 129 6 27
## 521 288 508 54 140
## 522 190 412 47 128
## 523 7 20 2 10
## 524 269 602 79 223
## 525 546 1249 205 568
## 526 251 480 18 75
## 527 1 4 0 0
## 528 41 72 0 0
## 529 236 387 0 2
## FreeThrowsMade FreeThrowsAttempted OffensiveRebounds TotalRebounds Assists
## 520 18 27 16 73 44
## 521 181 243 104 390 60
## 522 77 100 71 281 244
## 523 3 4 0 5 7
## 524 42 72 94 315 117
## 525 481 559 31 255 560
## 526 122 179 161 412 88
## 527 0 0 3 4 0
## 528 14 19 17 65 6
## 529 124 166 199 545 82
## Steals Turnovers Blocks PersonalFouls Disqualifications TotalPoints
## 520 8 24 5 38 0 124
## 521 34 84 55 98 0 811
## 522 84 73 22 93 0 504
## 523 2 3 0 5 0 19
## 524 92 102 23 134 1 659
## 525 65 289 8 104 0 1778
## 526 40 75 27 140 1 642
## 527 0 0 0 0 0 2
## 528 7 10 5 27 0 96
## 529 16 61 66 168 2 596
## Technicals Ejections FlagrantFouls GamesStarted
## 520 0 0 0 5
## 521 1 0 0 12
## 522 0 0 0 6
## 523 0 0 0 0
## 524 4 0 0 16
## 525 7 0 0 60
## 526 0 0 0 39
## 527 0 0 0 0
## 528 0 0 0 0
## 529 1 0 0 70
#使用tidyverse
nrow(NBA1920)#算列數
## [1] 529
df2 <- NBA1920 %>% slice(c(1:5, (nrow(NBA1920)-4):nrow(NBA1920)))
df21 <- NBA1920 %>% slice(c(1,3,5))
# 3. 誰是鋼鐵人?選取NBA資料中GamesPlayed大於或等於65的紀錄
#先畫直方圖瞭解資料
summary(NBA1920$GamesPlayed)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 21.00 48.00 42.18 62.00 74.00
ggplot(data=NBA1920, aes(x=GamesPlayed))+
geom_histogram(bins = 20)

df3 <- NBA1920%>% filter(GamesPlayed>=65)
# 4. 按照GamesPlayed的值,由大到小排序NBA資料,選取前5筆資料,只保留Name, Team, GamesPlayed三個欄位
#由小到大
df41<- NBA1920%>% arrange(GamesPlayed)
#由大到小
df42<- NBA1920%>%
arrange(desc(GamesPlayed)) %>%
slice(c(1:5)) %>%
select(Name, Team, GamesPlayed)
# 5. 誰是三分王?選取選取NBA資料中ThreesMade值最大的五筆紀錄
#tidy的做法(第一種),可以利用top_n()找出資料最大的前五筆
#特別注意,top_n找出的結果,其呈現時不會將其排序
df51<- NBA1920%>% top_n(5, ThreesMade)
#在tidy的做法(第二種),
#先依照ThreesMade的數值排列(arrange()),然後再取出前五筆的Observations(slice())。
df52 <- NBA1920 %>% arrange(desc(ThreesMade)) %>% slice(1:5)
# 6. 計算各球隊的球員人數
df6 <- NBA1920 %>%
group_by(Team) %>%
summarise(人數=n())
# 7.計算各球隊的球員人數,並且依人數由大到小排序
df7 <- NBA1920 %>%
group_by(Team) %>%
summarise(人數=n()) %>%
arrange(desc(人數))
ggplot(data = df7, aes(x = Team, y = 人數)) +
geom_col() +
scale_fill_brewer(palette="Set3")+
coord_flip()

#長條圖排序
ggplot(data = df7, aes(x = reorder(Team, 人數), y = 人數)) +
geom_col() +
scale_fill_brewer(palette="Set3")+
coord_flip()

###我是分隔線------------------------------------------
#練習1.選取NBA資料中的Name, GamesPlayed, GamesStarted
df1.1 <- NBA1920 %>% select(Name, GamesPlayed, GamesStarted)
head(df1.1,5)#只看前5筆
## Name GamesPlayed GamesStarted
## 1 Steven Adams 63 63
## 2 Bam Adebayo 72 72
## 3 Lamarcu Aldridge 53 53
## 4 Kyle Alexander 2 0
## 5 Alexander-walke 47 1
#練習3. 誰是鋼鐵人?選取NBA資料中TotalMinutesPlayed大於或等於2000的紀錄
summary(NBA1920$TotalMinutesPlayed)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 281.0 921.0 967.7 1594.0 2559.0
ggplot(NBA1920, aes(x=TotalMinutesPlayed))+
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

df3.3 <- NBA1920%>% filter(TotalMinutesPlayed>=2000)
nrow(df3.3)#共篩選出55筆
## [1] 55
#練習4 按照TotalMinutesPlayed的值,由大到小排序NBA資料, 選取前10筆資料,只保留Name, Team, TotalMinutesPlayed三個欄位
df4.4<- NBA1920%>%
arrange(desc(TotalMinutesPlayed)) %>%
slice(c(1:10)) %>%
select(Name, Team, TotalMinutesPlayed, Position)
#畫圖
ggplot(data = df4.4, aes(x= Name, y=TotalMinutesPlayed, fill= Position))+
geom_col()

#也可以不指派為物件,直接畫圖
NBA1920%>%
arrange(desc(TotalMinutesPlayed)) %>%
slice(c(1:10)) %>%
select(Name, Team, TotalMinutesPlayed, Position) %>%
ggplot(aes(x= Name, y=TotalMinutesPlayed, fill= Team))+
geom_col()

#練習5. 誰是罰球王?選取選取NBA資料中FreeThrowsMade值最大的五筆紀錄,保留Name, Team,FreeThrowsMade三個欄位,並畫圖
df5.1 <- NBA1920 %>%
arrange(desc(FreeThrowsMade)) %>%
slice(1:5) %>%
select(Name, Team,FreeThrowsMade)
ggplot(data = df5.1, aes(x= Name, y=FreeThrowsMade, fill= Team))+
geom_col()

#長條圖排序
ggplot(data = df5.1, aes(x= reorder(Name, -FreeThrowsMade), y=FreeThrowsMade, fill= Team))+
geom_col()

#練習6.計算各場上位置Position的球員人數
df6.1 <- NBA1920 %>%
group_by(Position)%>%
summarise(freq = n())%>%
arrange(desc(freq))
# 練習7 計算各球隊的不同場上位置球員人數
df7.2 <- NBA1920 %>%
group_by(Team, Position) %>%
summarise(freq=n())#命名frep為人數
## `summarise()` has grouped output by 'Team'. You can override using the `.groups` argument.
ggplot(data = df7.2, aes(x = Team, y = freq, fill = Position)) +
geom_col() +
scale_fill_brewer(palette="Set3")+
coord_flip()

#長條圖排序
ggplot(data = df7.2, aes(x = reorder(Team,freq), y = freq, fill = Position)) +
geom_col() +
scale_fill_brewer(palette="Set3")+
coord_flip()

# 列出所有色調模組
RColorBrewer::display.brewer.all()
