Required packages

The library() function is used for loading all the necessary packages. It is assumed that the packages were already installed previously in the current R session by using install.packages() function. The knitr::opts_knit$set was used to set working directory to the Project’s root directory.

library(dplyr)
library(readr)
library(magrittr)
library(knitr)
library(tidyr)
library(Hmisc)
library(outliers)
library(forecast)
library(rmarkdown)
library(webshot)

# set the global root directory to the Project's root directory in setup chunk 
# so that it is recognised in all subsequent chunks
knitr::opts_knit$set(root.dir = rprojroot::find_rstudio_root_file())

Executive Summary

Data

The 2 datasets contain stats of players in NBA in 2017-2018 season. Some of the stats are in nba dataset and some are in nba_extra dataset. As we are only some of the columns, so we have ised the following columns in the datasets:

nba <- read_csv("nba.csv")
nba_extra <- read_csv("nba_extra.csv")

Here is the preview of the datasets by using the head() function.

head(nba)
head(nba_extra)

The dim() function was used to check and show the dimension of the nba data frame. Noted that it consists 664 observations of 29 variables.

dim(nba)
## [1] 664  29

The dim() function was used to check and show the dimension of the nba_extra data frame. Noted that it consists 664 observations of 30 variables.

dim(nba_extra)
## [1] 664  30

Here, we have removed the columns on which we are not going to preprocess our analysis. We have removed some of the columns and selected the columns to be used in our analysis using select() function of dplyr package.

cols_to_rem_nba <- c("PER", "TS%", "FTr", "ORB%", "TRB%", "AST%", "STL%", "BLK%", "TOV%", "USG%", "X20", "X25", "BPM", "OBPM", "DBPM", "WS", "OWS", "WS/48", "VORP")
nba_clean <- select(nba, -cols_to_rem_nba)

cols_to_rem_nba_extra <- c("FG", "FGA", "FG%", "eFG%", "FT", "FTA", "FT%", "ORB", "TRB", "DRB", "PF")
nba_extra_clean <- select(nba_extra, -cols_to_rem_nba_extra)

After cleaning the dataset, the dimensions are as follows:

The first dataset nba, now nba_clean, consists 664 observations of 10 variables.

dim(nba_clean)
## [1] 664  10

The second dataset, nba_extra, now nba_extra_clean, consists 664 observations of 19 variables.

dim(nba_extra_clean)
## [1] 664  19

Tidy & Manipulate Data I

Here, we have joined both the datasets using the left_join() function of dplyr. The name of the joined dataset is nba_left.

nba_left <- left_join(nba_clean, nba_extra_clean)
## Joining, by = c("Rk", "Player", "Pos", "Age", "Tm", "G", "MP")

The dimension of the joined dataset is:

It consists of 664 observations of 22 variables.

dim(nba_left)
## [1] 664  22

Here, we have renamed some of the columns of the dataset as they were not following correct naming convention, namely: - 3PAr as ThreePAr, - 3P as ThreeP, - 3PA as ThreePA, - 3P% as ThreePPercent, - 2P as TwoP, - 2PA as TwoPA, - 2P% as TwoPPercent,

nba_left <- nba_left %>% rename(
  ThreePAr = "3PAr",
  ThreeP = "3P",
  ThreePA = "3PA",
  ThreePPercent = "3P%",
  TwoP = "2P",
  TwoPA = "2PA",
  TwoPPercent = "2P%",
)

Tidy & Manipulate Data II

Moving forward, we are manipulating the Player column in to PlayerName and PlayerId by using the separate() function of dplyr package.

nba_left <- nba_left %>% separate(Player, into = c("PlayerName", "PlayerId"), sep = "\\\\")

Understand

After separating the variable Player into PlayerName and PlayerId, we again check the dimension by using dim() function. Here, we get an extra column which increases the tally to 23 columns and 664 observations.

dim(nba_left)
## [1] 664  23

Here, we are checking the type of every variable in dataset.

lapply(nba_left, typeof)
## $Rk
## [1] "double"
## 
## $PlayerName
## [1] "character"
## 
## $PlayerId
## [1] "character"
## 
## $Pos
## [1] "character"
## 
## $Age
## [1] "double"
## 
## $Tm
## [1] "character"
## 
## $G
## [1] "double"
## 
## $MP
## [1] "double"
## 
## $ThreePAr
## [1] "double"
## 
## $`DRB%`
## [1] "double"
## 
## $DWS
## [1] "double"
## 
## $GS
## [1] "double"
## 
## $ThreeP
## [1] "double"
## 
## $ThreePA
## [1] "double"
## 
## $ThreePPercent
## [1] "double"
## 
## $TwoP
## [1] "double"
## 
## $TwoPA
## [1] "double"
## 
## $TwoPPercent
## [1] "double"
## 
## $AST
## [1] "double"
## 
## $STL
## [1] "double"
## 
## $BLK
## [1] "double"
## 
## $TOV
## [1] "double"
## 
## $PTS
## [1] "double"

Here, we have changed the type of variable from double to integer.

nba_left <- nba_left %>% mutate(
  Rk = as.integer(Rk),
  Age = as.integer(Age),
  G = as.integer(G),
  MP = as.integer(MP),
  GS = as.integer(GS),
  ThreeP = as.integer(ThreeP),
  ThreePA = as.integer(ThreePA),
  TwoP = as.integer(TwoP),
  TwoPA = as.integer(TwoPA),
  AST = as.integer(AST),
  STL = as.integer(STL),
  BLK = as.integer(BLK),
  TOV = as.integer(TOV),
  PTS = as.integer(PTS)
)

Further, we have factored the variable Pos into 7 levels.

nba_left <- nba_left %>% mutate(
  Pos = factor(Pos, levels = c("SG", "PF", "C", "PG", "SF", "SF-FG", "PG-SG"))
)
 is.factor(nba_left$Pos)
## [1] TRUE

Checking the type of all the variables in the dataset after changing the type above. We have used lapply() function of base package.

lapply(nba_left, typeof)
## $Rk
## [1] "integer"
## 
## $PlayerName
## [1] "character"
## 
## $PlayerId
## [1] "character"
## 
## $Pos
## [1] "integer"
## 
## $Age
## [1] "integer"
## 
## $Tm
## [1] "character"
## 
## $G
## [1] "integer"
## 
## $MP
## [1] "integer"
## 
## $ThreePAr
## [1] "double"
## 
## $`DRB%`
## [1] "double"
## 
## $DWS
## [1] "double"
## 
## $GS
## [1] "integer"
## 
## $ThreeP
## [1] "integer"
## 
## $ThreePA
## [1] "integer"
## 
## $ThreePPercent
## [1] "double"
## 
## $TwoP
## [1] "integer"
## 
## $TwoPA
## [1] "integer"
## 
## $TwoPPercent
## [1] "double"
## 
## $AST
## [1] "integer"
## 
## $STL
## [1] "integer"
## 
## $BLK
## [1] "integer"
## 
## $TOV
## [1] "integer"
## 
## $PTS
## [1] "integer"

Scan I

Following that we have checked if there are any null values in the dataset variables. We have checked all the columns and found that three columns have na values as shown below.

which(is.na(nba_left$ThreePAr))
## [1] 379 405 429 603
which(is.na(nba_left$ThreePPercent))
##  [1]   6  15  23  24  25  52  64 102 113 128 133 134 150 156 158 164 184
## [18] 215 218 278 284 315 316 326 334 349 351 379 385 386 387 391 405 424
## [35] 425 426 427 428 429 431 458 469 471 475 480 481 482 487 498 501 503
## [52] 516 524 531 564 573 589 603 618 635 648 649 650 651 663
which(is.na(nba_left$TwoPPercent))
##  [1]  71  83  89  97 173 174 175 251 254 379 394 405 408 422 429 571 572
## [18] 603

Further exploring the dataset, we found that the NA values were because of the incorrect division(0/0). So we have imputed the values for those rows in the variables TwoPPercent and ThreePPercent by 0. For ThreePAr, we have replaced NA values by mean of the values.

nba_left <- nba_left %>% mutate(
  ThreePAr = ifelse(is.na(ThreePAr),
                    mean(ThreePAr, na.rm = TRUE),
                    ThreePAr),
  ThreePPercent = ifelse(is.na(ThreePPercent),
                         0,
                         ThreePPercent),
  TwoPPercent = ifelse(is.na(TwoPPercent),
                             0,
                             TwoPPercent)
)

Checking if the imputation went right.

which(is.na(nba_left$ThreePAr))
## integer(0)
which(is.na(nba_left$ThreePPercent))
## integer(0)
which(is.na(nba_left$TwoPPercent))
## integer(0)

Scan II

This is the capping function which we will be using for capping.

cap <- function(x){
  quantiles <- quantile( x, c(.05, 0.25, 0.75, .95 ) )
  x[ x < quantiles[2] - 1.5*IQR(x) ] <- quantiles[1]
  x[ x > quantiles[3] + 1.5*IQR(x) ] <- quantiles[4]
  x }
age <- nba_left %>% 
  summarise(Min = min(Age, na.rm = TRUE),
            Q1 = quantile(Age, na.rm = TRUE)[2],
            Median = median(Age, na.rm = TRUE),
            Mean = mean(Age, na.rm = TRUE),
            Q3 = quantile(Age, na.rm = TRUE)[4],
            Max = max(Age, na.rm = TRUE),
            SD = sd(Age, na.rm = TRUE),
            IQR = IQR(Age, na.rm = TRUE))

age
lb_Age <- age$Q1 - (1.5 * age$IQR)
ub_Age <- age$Q3 + (1.5 * age$IQR)
lb_Age
## 25% 
##  14
ub_Age
## 75% 
##  38
which(nba_left$Age < lb_Age)
## integer(0)
which(nba_left$Age > ub_Age) 
## [1] 106 214 461 582
cap_age <-na.omit(nba_left$Age)

cap(cap_age) %>% boxplot()

threepar <- nba_left %>% 
  summarise(Min = min(ThreePAr, na.rm = TRUE),
            Q1 = quantile(ThreePAr, na.rm = TRUE)[2],
            Median = median(ThreePAr, na.rm = TRUE),
            Mean = mean(ThreePAr, na.rm = TRUE),
            Q3 = quantile(ThreePAr, na.rm = TRUE)[4],
            Max = max(ThreePAr, na.rm = TRUE),
            SD = sd(ThreePAr, na.rm = TRUE),
            IQR = IQR(ThreePAr, na.rm = TRUE))

threepar
lb_ThreePAr <- threepar$Q1 - (1.5 * threepar$IQR)
ub_ThreePAr <- threepar$Q3 + (1.5 * threepar$IQR)
lb_ThreePAr
##    25% 
## -0.321
ub_ThreePAr
##   75% 
## 0.969
which(nba_left$ThreePAr < lb_ThreePAr)
## integer(0)
which(nba_left$ThreePAr > ub_ThreePAr)
##  [1]  71  83  89  97 173 174 175 251 254 394 408 422 571 572
cap_threepar <-na.omit(nba_left$ThreePAr)

cap(cap_threepar) %>% boxplot()

drb <- nba_left %>% 
  summarise(Min = min(`DRB%`, na.rm = TRUE),
            Q1 = quantile(`DRB%`, na.rm = TRUE)[2],
            Median = median(`DRB%`, na.rm = TRUE),
            Mean = mean(`DRB%`, na.rm = TRUE),
            Q3 = quantile(`DRB%`, na.rm = TRUE)[4],
            Max = max(`DRB%`, na.rm = TRUE),
            SD = sd(`DRB%`, na.rm = TRUE),
            IQR = IQR(`DRB%`, na.rm = TRUE))

drb
lb_drb <- drb$Q1 - (1.5 * drb$IQR)
ub_drb <- drb$Q3 + (1.5 * drb$IQR)
lb_drb
##  25% 
## -3.2
ub_drb
##  75% 
## 32.2
which(nba_left$`DRB%` < lb_drb)
## integer(0)
which(nba_left$`DRB%` > ub_drb)
##  [1]  71  89 164 169 191 260 281 334 387 391 573 631
cap_drb <-na.omit(nba_left$`DRB%`)

cap(cap_drb) %>% boxplot()

dws <- nba_left %>% 
  summarise(Min = min(DWS, na.rm = TRUE),
            Q1 = quantile(DWS, na.rm = TRUE)[2],
            Median = median(DWS, na.rm = TRUE),
            Mean = mean(DWS, na.rm = TRUE),
            Q3 = quantile(DWS, na.rm = TRUE)[4],
            Max = max(DWS, na.rm = TRUE),
            SD = sd(DWS, na.rm = TRUE),
            IQR = IQR(DWS, na.rm = TRUE))

dws
lb_dws <- dws$Q1 - (1.5 * dws$IQR)
ub_dws <- dws$Q3 + (1.5 * dws$IQR)
lb_dws
## 25% 
##  -2
ub_dws
## 75% 
## 3.6
which(nba_left$DWS < lb_dws)
## integer(0)
which(nba_left$DWS > ub_dws)
##  [1]  13  16 104 136 149 169 178 210 215 233 279 281 290 421 463 473 539
## [18] 555 575 626
cap_dws <-na.omit(nba_left$DWS)

cap(cap_dws) %>% boxplot()

threep <- nba_left %>% 
  summarise(Min = min(ThreeP, na.rm = TRUE),
            Q1 = quantile(ThreeP, na.rm = TRUE)[2],
            Median = median(ThreeP, na.rm = TRUE),
            Mean = mean(ThreeP, na.rm = TRUE),
            Q3 = quantile(ThreeP, na.rm = TRUE)[4],
            Max = max(ThreeP, na.rm = TRUE),
            SD = sd(ThreeP, na.rm = TRUE),
            IQR = IQR(ThreeP, na.rm = TRUE))

threep
lb_threep <- threep$Q1 - (1.5 * threep$IQR)
ub_threep <- threep$Q3 + (1.5 * threep$IQR)
lb_threep
## 25% 
## -92
ub_threep
## 75% 
## 158
which(nba_left$ThreeP < lb_threep)
## integer(0)
which(nba_left$ThreeP > ub_threep)
##  [1]  17  20  41 100 136 137 147 148 172 177 210 217 233 241 263 271 290
## [18] 293 348 350 370 377 396 413 421 446 473 520 588 591 613 616 638
cap_threept <-na.omit(nba_left$ThreeP)

cap(cap_threept) %>% boxplot()

threepa <- nba_left %>% 
  summarise(Min = min(ThreePA, na.rm = TRUE),
            Q1 = quantile(ThreePA, na.rm = TRUE)[2],
            Median = median(ThreePA, na.rm = TRUE),
            Mean = mean(ThreePA, na.rm = TRUE),
            Q3 = quantile(ThreePA, na.rm = TRUE)[4],
            Max = max(ThreePA, na.rm = TRUE),
            SD = sd(ThreePA, na.rm = TRUE),
            IQR = IQR(ThreePA, na.rm = TRUE))

threepa
lb_threepa <- threepa$Q1 - (1.5 * threepa$IQR)
ub_threepa <- threepa$Q3 + (1.5 * threepa$IQR)
lb_threepa
##      25% 
## -266.375
ub_threepa
##     75% 
## 462.625
which(nba_left$ThreePA < lb_threepa)
## integer(0)
which(nba_left$ThreePA > ub_threepa)
##  [1]  17  41 136 137 147 177 210 217 233 290 370 377 396 421 588 613 638
cap_threepa <-na.omit(nba_left$ThreePA)

threepa_cap<-cap(cap_threepa)
twopt <- nba_left %>% 
  summarise(Min = min(TwoP, na.rm = TRUE),
            Q1 = quantile(TwoP, na.rm = TRUE)[2],
            Median = median(TwoP, na.rm = TRUE),
            Mean = mean(TwoP, na.rm = TRUE),
            Q3 = quantile(TwoP, na.rm = TRUE)[4],
            Max = max(TwoP, na.rm = TRUE),
            SD = sd(TwoP, na.rm = TRUE),
            IQR = IQR(TwoP, na.rm = TRUE))

twopt
lb_twopt <- twopt$Q1 - (1.5 * twopt$IQR)
ub_twopt <- twopt$Q3 + (1.5 * twopt$IQR)
lb_twopt
##      25% 
## -234.375
ub_twopt
##     75% 
## 430.625
which(nba_left$TwoP < lb_twopt)
## integer(0)
which(nba_left$TwoP > ub_twopt)
##  [1]   3   7  16  41 104 149 157 169 172 178 270 281 304 396 412 463 473
## [18] 517 555 592 619 626 632
cap_twopt <-na.omit(nba_left$TwoP)

twopt_cap<-cap(cap_twopt)
twopa <- nba_left %>% 
  summarise(Min = min(TwoPA, na.rm = TRUE),
            Q1 = quantile(TwoPA, na.rm = TRUE)[2],
            Median = median(TwoPA, na.rm = TRUE),
            Mean = mean(TwoPA, na.rm = TRUE),
            Q3 = quantile(TwoPA, na.rm = TRUE)[4],
            Max = max(TwoPA, na.rm = TRUE),
            SD = sd(TwoPA, na.rm = TRUE),
            IQR = IQR(TwoPA, na.rm = TRUE))

twopa
lb_twopa <- twopa$Q1 - (1.5 * twopa$IQR)
ub_twopa <- twopa$Q3 + (1.5 * twopa$IQR)
lb_twopa
##      25% 
## -459.375
ub_twopa
##     75% 
## 853.625
which(nba_left$TwoPA < lb_twopa)
## integer(0)
which(nba_left$TwoPA> ub_twopa)
##  [1]   7  16  35  41 149 157 169 270 281 304 396 412 463 473 517 545 555
## [18] 592 619 626 632
cap_twopa <-na.omit(nba_left$TwoPA)

twopa_cap<-cap(cap_twopa) 
twopper <- nba_left %>% 
  summarise(Min = min(TwoPPercent, na.rm = TRUE),
            Q1 = quantile(TwoPPercent, na.rm = TRUE)[2],
            Median = median(TwoPPercent, na.rm = TRUE),
            Mean = mean(TwoPPercent, na.rm = TRUE),
            Q3 = quantile(TwoPPercent, na.rm = TRUE)[4],
            Max = max(TwoPPercent, na.rm = TRUE),
            SD = sd(TwoPPercent, na.rm = TRUE),
            IQR = IQR(TwoPPercent, na.rm = TRUE))

twopper
lb_twopper <- twopper$Q1 - (1.5 * twopper$IQR)
ub_twopper <- twopper$Q3 + (1.5 * twopper$IQR)
lb_twopper
##   25% 
## 0.275
ub_twopper
##   75% 
## 0.707
which(nba_left$TwoPPercent < lb_twopper)
##  [1]  29  71  83  89  97 102 120 123 132 166 167 168 173 174 175 251 254
## [18] 272 278 307 331 346 351 379 394 398 405 408 422 428 429 444 499 554
## [35] 564 571 572 578 603 604 617 633 648
which(nba_left$TwoPPercent> ub_twopper) 
##  [1]  19 184 212 250 253 328 406 415 435 460 482 490 512 515 524 528 573
## [18] 640 663
cap_twopper <-na.omit(nba_left$TwoPPercent)

twopper_cap<-cap(cap_twopper) 
ast <- nba_left %>% 
  summarise(Min = min(AST, na.rm = TRUE),
            Q1 = quantile(AST, na.rm = TRUE)[2],
            Median = median(AST, na.rm = TRUE),
            Mean = mean(AST, na.rm = TRUE),
            Q3 = quantile(AST, na.rm = TRUE)[4],
            Max = max(AST, na.rm = TRUE),
            SD = sd(AST, na.rm = TRUE),
            IQR = IQR(AST, na.rm = TRUE))

ast
lb_ast <- ast$Q1 - (1.5 * ast$IQR)
ub_ast <- ast$Q3 + (1.5 * ast$IQR)
lb_ast
##  25% 
## -160
ub_ast
## 75% 
## 298
which(nba_left$AST < lb_ast)
## integer(0)
which(nba_left$AST> ub_ast) 
##  [1]  16  33  34  36  37  41  58  60 127 147 157 160 165 171 172 199 207
## [18] 221 224 228 233 270 279 290 293 296 304 325 366 370 377 397 412 473
## [35] 489 492 533 539 545 555 560 561 577 597 613 614 626 638
cap_ast <-na.omit(nba_left$AST)

ast_cap<-cap(cap_ast)
stl <- nba_left %>% 
  summarise(Min = min(STL, na.rm = TRUE),
            Q1 = quantile(STL, na.rm = TRUE)[2],
            Median = median(STL, na.rm = TRUE),
            Mean = mean(STL, na.rm = TRUE),
            Q3 = quantile(STL, na.rm = TRUE)[4],
            Max = max(STL, na.rm = TRUE),
            SD = sd(STL, na.rm = TRUE),
            IQR = IQR(STL, na.rm = TRUE))

stl
lb_stl <- stl$Q1 - (1.5 * stl$IQR)
ub_stl <- stl$Q3 + (1.5 * stl$IQR)
lb_stl
## 25% 
## -58
ub_stl
## 75% 
## 110
which(nba_left$STL < lb_stl)
## integer(0)
which(nba_left$STL> ub_stl) 
##  [1]  13  58  60  94 136 149 169 210 233 239 270 304 412 421 473 506 525
## [18] 539 555 626 657
cap_stl <-na.omit(nba_left$STL)

stl_cap<-cap(cap_stl) 
blk <- nba_left %>% 
  summarise(Min = min(BLK, na.rm = TRUE),
            Q1 = quantile(BLK, na.rm = TRUE)[2],
            Median = median(BLK, na.rm = TRUE),
            Mean = mean(BLK, na.rm = TRUE),
            Q3 = quantile(BLK, na.rm = TRUE)[4],
            Max = max(BLK, na.rm = TRUE),
            SD = sd(BLK, na.rm = TRUE),
            IQR = IQR(BLK, na.rm = TRUE))

blk
lb_blk <- blk$Q1 - (1.5 * blk$IQR)
ub_blk <- blk$Q3 + (1.5 * blk$IQR)
lb_blk
##   25% 
## -32.5
ub_blk
##  75% 
## 59.5
which(nba_left$BLK < lb_blk)
## integer(0)
which(nba_left$BLK> ub_blk) 
##  [1]   3   7   8  13  16  54 104 110 115 124 135 136 149 169 172 178 188
## [18] 207 208 215 218 220 223 224 256 270 279 281 285 304 324 325 334 362
## [35] 374 410 463 467 502 504 508 525 555 592 595 596 598 608 625 631
cap_blk <-na.omit(nba_left$BLK)

blk_cap<-cap(cap_blk)
tov <- nba_left %>% 
  summarise(Min = min(TOV, na.rm = TRUE),
            Q1 = quantile(TOV, na.rm = TRUE)[2],
            Median = median(TOV, na.rm = TRUE),
            Mean = mean(TOV, na.rm = TRUE),
            Q3 = quantile(TOV, na.rm = TRUE)[4],
            Max = max(TOV, na.rm = TRUE),
            SD = sd(TOV, na.rm = TRUE),
            IQR = IQR(TOV, na.rm = TRUE))

tov
lb_tov <- tov$Q1 - (1.5 * tov$IQR)
ub_tov <- tov$Q3 + (1.5 * tov$IQR)
lb_tov
##  25% 
## -109
ub_tov
## 75% 
## 203
which(nba_left$TOV < lb_tov)
## integer(0)
which(nba_left$TOV> ub_tov) 
##  [1]  16  41  58  60 135 172 178 210 233 270 281 304 325 370 421 473 517
## [18] 539 555 626 638
cap_tov <-na.omit(nba_left$TOV)

tov_cap<-cap(cap_tov)
pts <- nba_left %>% 
  summarise(Min = min(PTS, na.rm = TRUE),
            Q1 = quantile(PTS, na.rm = TRUE)[2],
            Median = median(PTS, na.rm = TRUE),
            Mean = mean(PTS, na.rm = TRUE),
            Q3 = quantile(PTS, na.rm = TRUE)[4],
            Max = max(PTS, na.rm = TRUE),
            SD = sd(PTS, na.rm = TRUE),
            IQR = IQR(PTS, na.rm = TRUE))

pts
lb_pts <- pts$Q1 - (1.5 * pts$IQR)
ub_pts <- pts$Q3 + (1.5 * pts$IQR)
lb_pts
##      25% 
## -853.375
ub_pts
##      75% 
## 1579.625
which(nba_left$PTS < lb_pts)
## integer(0)
which(nba_left$PTS> ub_pts) 
##  [1]   7  16  41 149 157 172 210 233 304 370 396 412 421 473 592 613 626
## [18] 638
cap_pts <-na.omit(nba_left$PTS)

pts_cap<-cap(cap_pts)

Transform

When the DRB% histogram is plotted we can see that the graph is right skewed and hence we have used the BoxCox transformation so that the graph can be transformed to a normal distribution. The lambda value generated for this graph is 0.3893152 from which we can see the transformed histogram.

box<- BoxCox(nba_clean$`DRB%`,lambda = "auto")

hist(box)

References

-[1] Dolgun, A 2019, lecture notes, MATH2349, RMIT University, http://rare-phoenix-161610.appspot.com/secured/index.html

-[2] (How to separate by backslash) https://github.com/STAT545-UBC/Discussion/issues/394