########tidyy資料格式與ggplot2畫圖####
# Alt+-(用於賦值運算符<-)

####安裝套件####
#install.packages("ggplot2")
# install.packages("SportsAnalytics")
# install.packages("titanic")
# install.packages("dplyr")
# install.packages("tidyverse")
# 多行註解 『 Command + Shift + C 』,若是想要取消註解,僅需將段落反白再執行一次Command + Shift + C 即可。

####tidyy資料格式常用的函數####
# 選取tibble中的幾個Variables: select()
# 依照位置選取tibble中的Observations: slice()
# 根據條件選取tibble中的Observations: filter()
# 增加或修改tibble的Variables: mutate()
# 依照某個Variable的資料數值大小,排列Observations: arrange()
# 依照某個Variable的資料數值大小,選出前幾筆Observations: top_n()
# 依照某個Variable的資料數值,將Observations分群: group_by()
# 對Observatiosn進行彙整(加總、平均、…): summarise()
# 刪除NA值:filter(欄位名稱!="NA") 或是 filter(!is.na(欄位名稱)) 


####ggplot2畫圖文法####

# 資料來源(data):指定原始資料來源的 data frame。
# 美學對應(aesthetic):指定原始資料與圖形之間的對應關係,例如哪一個變數要當作 x 座標變數,而哪一個要當作 y 座標變數,還有資料繪圖時的樣式等。
# 幾何圖案(geometry):要用什麼幾何圖形繪製資料,例如點、線條、多邊形等。
# 繪圖面(facet):指定如何將資料分散在多張子圖形中繪製,以利互相比較。
# 統計轉換(statistical transformation):指定如何以將資料轉換為各種統計量,例如將連續型資料轉為離散型的類別。
# 座標系統(coordinate system):指定繪圖時所使用的座標系統,除了常見的笛卡兒直角座標系統,也可以使用極坐標或地圖投影(map projection)。
# 主題(theme):控制資料以外的繪圖組件,例如座標軸、說明文字等。


library(ggplot2) ##須先安裝 install.packages("ggplot2")
#qplot()為ggplot2 “Hello, world!”,
#簡單使用qplot(x軸名稱,y軸名稱,data=使用資料)就可畫散佈圖
#library(SportsAnalytics)##須先安裝 install.packages("SportsAnalytics")
#NBA1920<-fetch_NBAPlayerStatistics("19-20") ## 讀入資料

####存取資料與讀取資料####
#存取資料檔(.csv)
#存檔
#write.csv(NBA1920, file = "NBA1920.csv", fileEncoding = "utf-8")
#讀取
#NBA1920_1 <- read.csv( file = "NBA1920.csv", fileEncoding = "utf-8")

#存取資料檔(.RData)
#存檔
# save(NBA1920, file = "NBA1920.RData")
# #存檔
load(file = "NBA1920.RData")


#查看資料
colnames(NBA1920)
##  [1] "League"              "Name"                "Team"               
##  [4] "Position"            "GamesPlayed"         "TotalMinutesPlayed" 
##  [7] "FieldGoalsMade"      "FieldGoalsAttempted" "ThreesMade"         
## [10] "ThreesAttempted"     "FreeThrowsMade"      "FreeThrowsAttempted"
## [13] "OffensiveRebounds"   "TotalRebounds"       "Assists"            
## [16] "Steals"              "Turnovers"           "Blocks"             
## [19] "PersonalFouls"       "Disqualifications"   "TotalPoints"        
## [22] "Technicals"          "Ejections"           "FlagrantFouls"      
## [25] "GamesStarted"
str(NBA1920)
## 'data.frame':    529 obs. of  25 variables:
##  $ League             : Factor w/ 1 level "NBA": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Name               : chr  "Steven Adams" "Bam Adebayo" "Lamarcu Aldridge" "Kyle Alexander" ...
##  $ Team               : Factor w/ 32 levels "ATL","BOS","BRO",..: 23 17 29 17 21 16 3 22 24 3 ...
##  $ Position           : Factor w/ 5 levels "C","PF","PG",..: 1 2 1 2 5 5 2 5 4 4 ...
##  $ GamesPlayed        : int  63 72 53 2 47 38 70 10 18 10 ...
##  $ TotalMinutesPlayed : int  1679 2415 1757 13 590 721 1854 118 381 108 ...
##  $ FieldGoalsMade     : int  283 440 391 1 98 117 302 19 25 10 ...
##  $ FieldGoalsAttempted: int  478 791 794 2 266 251 465 44 86 38 ...
##  $ ThreesMade         : int  1 2 61 0 46 57 0 5 9 6 ...
##  $ ThreesAttempted    : int  3 14 157 0 133 141 6 16 36 29 ...
##  $ FreeThrowsMade     : int  117 264 158 0 25 39 171 7 19 2 ...
##  $ FreeThrowsAttempted: int  201 382 191 0 37 45 270 11 29 4 ...
##  $ OffensiveRebounds  : int  208 177 104 2 9 8 216 2 24 1 ...
##  $ TotalRebounds      : int  582 735 392 3 84 85 671 9 88 21 ...
##  $ Assists            : int  146 368 129 0 89 52 110 21 21 8 ...
##  $ Steals             : int  51 82 36 0 17 12 40 5 18 0 ...
##  $ Turnovers          : int  95 203 74 1 54 33 77 8 17 4 ...
##  $ Blocks             : int  68 93 87 0 8 2 92 2 8 6 ...
##  $ PersonalFouls      : int  122 182 128 1 57 53 162 7 27 13 ...
##  $ Disqualifications  : int  1 2 1 0 0 0 2 0 0 0 ...
##  $ TotalPoints        : int  684 1146 1001 2 267 330 775 50 78 28 ...
##  $ Technicals         : int  2 0 0 0 0 1 1 0 0 0 ...
##  $ Ejections          : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ FlagrantFouls      : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ GamesStarted       : int  63 72 53 0 1 0 64 0 2 1 ...
summary(NBA1920)
##  League        Name                Team     Position  GamesPlayed   
##  NBA:529   Length:529         BRO    : 24   C : 84   Min.   : 1.00  
##            Class :character   HOU    : 20   PF:108   1st Qu.:21.00  
##            Mode  :character   SAC    : 20   PG:112   Median :48.00  
##                               WAS    : 20   SF:116   Mean   :42.18  
##                               DAL    : 19   SG:109   3rd Qu.:62.00  
##                               MIA    : 19            Max.   :74.00  
##                               (Other):407                           
##  TotalMinutesPlayed FieldGoalsMade  FieldGoalsAttempted   ThreesMade    
##  Min.   :   1.0     Min.   :  0.0   Min.   :   0.0      Min.   :  0.00  
##  1st Qu.: 281.0     1st Qu.: 34.0   1st Qu.:  72.0      1st Qu.:  4.00  
##  Median : 921.0     Median :124.0   Median : 277.0      Median : 31.00  
##  Mean   : 967.7     Mean   :163.6   Mean   : 355.6      Mean   : 48.89  
##  3rd Qu.:1594.0     3rd Qu.:252.0   3rd Qu.: 554.0      3rd Qu.: 78.00  
##  Max.   :2559.0     Max.   :685.0   Max.   :1514.0      Max.   :299.00  
##                                                                         
##  ThreesAttempted FreeThrowsMade  FreeThrowsAttempted OffensiveRebounds
##  Min.   :  0.0   Min.   :  0.0   Min.   :  0.00      Min.   :  0.00   
##  1st Qu.: 14.0   1st Qu.: 10.0   1st Qu.: 16.00      1st Qu.:  9.00   
##  Median : 94.0   Median : 40.0   Median : 57.00      Median : 26.00   
##  Mean   :136.6   Mean   : 71.5   Mean   : 92.51      Mean   : 40.35   
##  3rd Qu.:221.0   3rd Qu.: 93.0   3rd Qu.:122.00      3rd Qu.: 57.00   
##  Max.   :843.0   Max.   :692.0   Max.   :800.00      Max.   :257.00   
##                                                                       
##  TotalRebounds      Assists           Steals         Turnovers     
##  Min.   :  0.0   Min.   :  0.00   Min.   :  0.00   Min.   :  0.00  
##  1st Qu.: 49.0   1st Qu.: 18.00   1st Qu.:  7.00   1st Qu.: 14.00  
##  Median :140.0   Median : 60.00   Median : 25.00   Median : 43.00  
##  Mean   :179.5   Mean   : 97.64   Mean   : 30.64   Mean   : 55.72  
##  3rd Qu.:269.0   3rd Qu.:123.00   3rd Qu.: 48.00   3rd Qu.: 80.00  
##  Max.   :919.0   Max.   :684.00   Max.   :125.00   Max.   :308.00  
##                                                                    
##      Blocks       PersonalFouls    Disqualifications  TotalPoints    
##  Min.   :  0.00   Min.   :  0.00   Min.   : 0.0000   Min.   :   0.0  
##  1st Qu.:  4.00   1st Qu.: 31.00   1st Qu.: 0.0000   1st Qu.:  89.0  
##  Median : 11.00   Median : 83.00   Median : 0.0000   Median : 342.0  
##  Mean   : 19.64   Mean   : 83.17   Mean   : 0.5293   Mean   : 447.6  
##  3rd Qu.: 26.00   3rd Qu.:128.00   3rd Qu.: 1.0000   3rd Qu.: 684.0  
##  Max.   :196.00   Max.   :278.00   Max.   :10.0000   Max.   :2335.0  
##                                                                      
##    Technicals       Ejections FlagrantFouls  GamesStarted  
##  Min.   : 0.000   Min.   :0   Min.   :0     Min.   : 0.00  
##  1st Qu.: 0.000   1st Qu.:0   1st Qu.:0     1st Qu.: 0.00  
##  Median : 0.000   Median :0   Median :0     Median : 6.00  
##  Mean   : 1.172   Mean   :0   Mean   :0     Mean   :20.02  
##  3rd Qu.: 2.000   3rd Qu.:0   3rd Qu.:0     3rd Qu.:39.00  
##  Max.   :14.000   Max.   :0   Max.   :0     Max.   :73.00  
## 
####使用tidyverse套件的函數####
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## √ tibble  3.1.0     √ dplyr   1.0.5
## √ tidyr   1.1.3     √ stringr 1.4.0
## √ readr   1.4.0     √ forcats 0.5.1
## √ purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
#問題
# 1. 簡化資料,選取NBA資料中的Name, Team, Position
# 2. 選取NBA資料中前五筆及後五筆
# 3. 誰是鋼鐵人?選取NBA資料中GamesPlayed大於或等於65的紀錄
# 4. 按照GamesPlayed的值,由大到小排序NBA資料
# 5. 誰是三分王?選取空氣品質指標資料中AQI值最大的五筆紀錄
# 6. 計算各球隊的球員人數
# 7. 計算各球隊的球員人數,並且依人數由大到小排序
# 8. 查看Team=="NA"和(或)Team=="CLI"的資料
# 9.過濾Team=="NA"和(或)Team=="CLI"的資料
# 10. 找出某個球隊Team的全體球員Name和場上位置Position,並且依照場上位置排序
# 11. 新建一個投籃命中率欄位,命名為FieldGoalPercentage
# 12. 依據TotalPoints,將球員分類
# 13.1 新增FieldGoalPercentage(FieldGoalsMade/FieldGoalsAttempted)
# 13.2 依據Position將資料分組
# 13.3 新增一欄位(avg_fgp),填入FieldGoalPercentage的平均數

#Ctrl+Shift+M(用於管道操作符%>%)

# 1.選取NBA資料中的Name, Team, Position
df1 <- NBA1920 %>% select(Name, Team, Position)

# 2. 選取NBA資料中前五筆及後五筆
#傳統作法
head(NBA1920,3)
##   League             Name Team Position GamesPlayed TotalMinutesPlayed
## 1    NBA     Steven Adams  OKL        C          63               1679
## 2    NBA      Bam Adebayo  MIA       PF          72               2415
## 3    NBA Lamarcu Aldridge  SAN        C          53               1757
##   FieldGoalsMade FieldGoalsAttempted ThreesMade ThreesAttempted FreeThrowsMade
## 1            283                 478          1               3            117
## 2            440                 791          2              14            264
## 3            391                 794         61             157            158
##   FreeThrowsAttempted OffensiveRebounds TotalRebounds Assists Steals Turnovers
## 1                 201               208           582     146     51        95
## 2                 382               177           735     368     82       203
## 3                 191               104           392     129     36        74
##   Blocks PersonalFouls Disqualifications TotalPoints Technicals Ejections
## 1     68           122                 1         684          2         0
## 2     93           182                 2        1146          0         0
## 3     87           128                 1        1001          0         0
##   FlagrantFouls GamesStarted
## 1             0           63
## 2             0           72
## 3             0           53
tail(NBA1920,10)
##     League            Name Team Position GamesPlayed TotalMinutesPlayed
## 520    NBA Justise Winslow  MIA       SF          11                353
## 521    NBA  Christian Wood  DET       PF          62               1319
## 522    NBA    Delon Wright  DAL       SG          73               1570
## 523    NBA  Wright-foreman  UTA       PG           4                 45
## 524    NBA  Thaddeus Young  CHI       PF          64               1594
## 525    NBA      Trae Young  ATL       PG          60               2119
## 526    NBA     Cody Zeller  CHA        C          58               1343
## 527    NBA    Tyler Zeller  SAN        C           1                  4
## 528    NBA      Ante Zizic  CLE        C          22                222
## 529    NBA     Ivica Zubac  LAC        C          72               1326
##     FieldGoalsMade FieldGoalsAttempted ThreesMade ThreesAttempted
## 520             50                 129          6              27
## 521            288                 508         54             140
## 522            190                 412         47             128
## 523              7                  20          2              10
## 524            269                 602         79             223
## 525            546                1249        205             568
## 526            251                 480         18              75
## 527              1                   4          0               0
## 528             41                  72          0               0
## 529            236                 387          0               2
##     FreeThrowsMade FreeThrowsAttempted OffensiveRebounds TotalRebounds Assists
## 520             18                  27                16            73      44
## 521            181                 243               104           390      60
## 522             77                 100                71           281     244
## 523              3                   4                 0             5       7
## 524             42                  72                94           315     117
## 525            481                 559                31           255     560
## 526            122                 179               161           412      88
## 527              0                   0                 3             4       0
## 528             14                  19                17            65       6
## 529            124                 166               199           545      82
##     Steals Turnovers Blocks PersonalFouls Disqualifications TotalPoints
## 520      8        24      5            38                 0         124
## 521     34        84     55            98                 0         811
## 522     84        73     22            93                 0         504
## 523      2         3      0             5                 0          19
## 524     92       102     23           134                 1         659
## 525     65       289      8           104                 0        1778
## 526     40        75     27           140                 1         642
## 527      0         0      0             0                 0           2
## 528      7        10      5            27                 0          96
## 529     16        61     66           168                 2         596
##     Technicals Ejections FlagrantFouls GamesStarted
## 520          0         0             0            5
## 521          1         0             0           12
## 522          0         0             0            6
## 523          0         0             0            0
## 524          4         0             0           16
## 525          7         0             0           60
## 526          0         0             0           39
## 527          0         0             0            0
## 528          0         0             0            0
## 529          1         0             0           70
#使用tidyverse
nrow(NBA1920)#算列數
## [1] 529
df2 <- NBA1920 %>% slice(c(1:5, (nrow(NBA1920)-4):nrow(NBA1920)))
df21 <- NBA1920 %>% slice(c(1,3,5))
# 3. 誰是鋼鐵人?選取NBA資料中GamesPlayed大於或等於65的紀錄
#先畫直方圖瞭解資料
summary(NBA1920$GamesPlayed)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00   21.00   48.00   42.18   62.00   74.00
ggplot(data=NBA1920, aes(x=GamesPlayed))+
    geom_histogram(bins = 20)

df3 <- NBA1920%>% filter(GamesPlayed>=65)

# 4. 按照GamesPlayed的值,由大到小排序NBA資料,選取前5筆資料,只保留Name, Team, GamesPlayed三個欄位
#由小到大
df41<- NBA1920%>% arrange(GamesPlayed)

#由大到小
df42<- NBA1920%>% 
    arrange(desc(GamesPlayed)) %>% 
    slice(c(1:5)) %>% 
    select(Name, Team, GamesPlayed)


# 5. 誰是三分王?選取選取NBA資料中ThreesMade值最大的五筆紀錄

#tidy的做法(第一種),可以利用top_n()找出資料最大的前五筆
#特別注意,top_n找出的結果,其呈現時不會將其排序
df51<- NBA1920%>% top_n(5, ThreesMade)

#在tidy的做法(第二種),
#先依照ThreesMade的數值排列(arrange()),然後再取出前五筆的Observations(slice())。
df52 <- NBA1920 %>% arrange(desc(ThreesMade)) %>% slice(1:5)

# 6. 計算各球隊的球員人數
df6 <- NBA1920 %>%
    group_by(Team) %>%
    summarise(人數=n())

# 7.計算各球隊的球員人數,並且依人數由大到小排序
df7 <- NBA1920 %>%
    group_by(Team) %>%
    summarise(人數=n()) %>% 
    arrange(desc(人數))
ggplot(data = df7, aes(x = Team, y = 人數)) +
    geom_col() +
    scale_fill_brewer(palette="Set3")+
    coord_flip()

#長條圖排序
ggplot(data = df7, aes(x = reorder(Team, 人數), y = 人數)) +
    geom_col() +
    scale_fill_brewer(palette="Set3")+
    coord_flip()

#練習1.選取NBA資料中的Name, GamesPlayed, GamesStarted
df1.1 <- NBA1920 %>% select(Name, GamesPlayed, GamesStarted)

#練習3. 誰是鋼鐵人?選取NBA資料中TotalMinutesPlayed大於或等於2000的紀錄
summary(NBA1920$TotalMinutesPlayed)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##     1.0   281.0   921.0   967.7  1594.0  2559.0
ggplot(NBA1920, aes(x=TotalMinutesPlayed))+
    geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

df3.3 <- NBA1920%>% filter(TotalMinutesPlayed>=2000)
#練習4 按照TotalMinutesPlayed的值,由大到小排序NBA資料, 選取前10筆資料,只保留Name, Team, TotalMinutesPlayed三個欄位
df4.4<- NBA1920%>% 
    arrange(desc(TotalMinutesPlayed)) %>% 
    slice(c(1:10)) %>% 
    select(Name, Team, TotalMinutesPlayed, Position)
ggplot(data = df4.4, aes(x= Name, y=TotalMinutesPlayed, fill= Position))+
    geom_col()

#也可以不指派為物件,直接畫圖
NBA1920%>% 
    arrange(desc(TotalMinutesPlayed)) %>% 
    slice(c(1:10)) %>% 
    select(Name, Team, TotalMinutesPlayed, Position) %>% 
    ggplot(aes(x= Name, y=TotalMinutesPlayed, fill= Team))+
    geom_col()

#練習5. 誰是罰球王?選取選取NBA資料中FreeThrowsMade值最大的五筆紀錄,保留Name, Team,FreeThrowsMade三個欄位,並畫圖
df5.1 <- NBA1920 %>% 
    arrange(desc(FreeThrowsMade)) %>% 
    slice(1:5) %>% 
    select(Name, Team,FreeThrowsMade)
ggplot(data = df5.1, aes(x= Name, y=FreeThrowsMade, fill= Team))+
    geom_col()

#長條圖排序
ggplot(data = df5.1, aes(x= reorder(Name, -FreeThrowsMade), y=FreeThrowsMade, fill= Team))+
    geom_col()

#練習6.計算各場上位置Position的球員人數
df6.1 <- NBA1920 %>% 
    group_by(Position)%>%
    summarise(freq = n())%>%
    arrange(desc(freq))


# 練習7 計算各球隊的不同場上位置球員人數
df7.2 <- NBA1920 %>%
    group_by(Team, Position) %>%
    summarise(人數=n())
## `summarise()` has grouped output by 'Team'. You can override using the `.groups` argument.
ggplot(data = df7.2, aes(x = Team, y = 人數, fill = Position)) +
    geom_col() +
    scale_fill_brewer(palette="Set3")+
    coord_flip()

#長條圖排序
ggplot(data = df7.2, aes(x = reorder(Team,人數), y = 人數, fill = Position)) +
    geom_col() +
    scale_fill_brewer(palette="Set3")+
    coord_flip()