Setup

library(dplyr)
library(readr)
library(tidyr)
library(knitr)
library(magrittr)

Locate Data

This data set contains information related to e-sports, retrieved from Kaggle.

https://www.kaggle.com/rankirsh/esports-earnings/

Variables include:

Read/Import Data

Esports <- read_csv("GeneralEsportData.csv", show_col_types = FALSE)
head(Esports)

Data description

This data set contains information related to e-sports.

Variables include:

Source:

Ran, K, 2021, Esports Earnings 1998 - 2021, Kaggle, viewed 17 August 2021, <https://www.kaggle.com/rankirsh/esports-earnings/>

Inspect dataset and variables

# Check dimensions of the data frame
dim.data.frame(Esports)
## [1] 535   7
# Class of Variable "Game"
class(Esports$Game)
## [1] "character"
# Class of Variable "ReleaseDate"
class(Esports$ReleaseDate)
## [1] "numeric"
# Class of Variable "Genre"
class(Esports$Genre)
## [1] "character"
# Class of Variable "TotalEarnings"
class(Esports$TotalEarnings)
## [1] "numeric"
# Class of Variable "OnlineEarnings"
class(Esports$OnlineEarnings)
## [1] "numeric"
# Class of Variable "TotalPlayers"
class(Esports$TotalPlayers)
## [1] "numeric"
# Class of Variable "TotalTournaments"
class(Esports$TotalTournaments)
## [1] "numeric"
# Check column names in the data frame 
colnames(Esports)
## [1] "Game"             "ReleaseDate"      "Genre"            "TotalEarnings"   
## [5] "OnlineEarnings"   "TotalPlayers"     "TotalTournaments"

Tidy data

This data conforms the tidy data principles. The data frame is in tidy format.

Summary statistics

# Summary Stats of Total Earnings, grouped by Genre
Esports %>% 
  group_by(Genre) %>% 
  summarize(mean = mean(TotalEarnings),
            median = median(TotalEarnings),
            min = min (TotalEarnings),
            max = max (TotalEarnings),
            sd = sd(TotalEarnings))
# Summary Stats of Online Earnings, grouped by Genre
Esports %>% 
  group_by(Genre) %>% 
  summarize(mean = mean(OnlineEarnings),
            median = median(OnlineEarnings),
            min = min (OnlineEarnings),
            max = max (OnlineEarnings),
            sd = sd(OnlineEarnings))
# Summary Stats of Total Players, grouped by Genre
Esports %>% 
  group_by(Genre) %>% 
  summarize(mean = mean(TotalPlayers),
            median = median(TotalPlayers),
            min = min (TotalPlayers),
            max = max (TotalPlayers),
            sd = sd(TotalPlayers))
# Summary Stats of Total Tournaments, grouped by Genre
Esports %>% 
  group_by(Genre) %>% 
  summarize(mean = mean(TotalTournaments),
            median = median(TotalTournaments),
            min = min (TotalTournaments),
            max = max (TotalTournaments),
            sd = sd(TotalTournaments))

Create a list

mylist <- list(c("Battle Royale", "Collectible Card Game", "Fighting Game", "First-Person Shooter", "Multiplayer Online Battle Arena", "Puzzle Game", "Racing", "Role-Playing Game", "Sports", "Strategy", "Third-Person Shooter"), c(1:11))
mylist
## [[1]]
##  [1] "Battle Royale"                   "Collectible Card Game"          
##  [3] "Fighting Game"                   "First-Person Shooter"           
##  [5] "Multiplayer Online Battle Arena" "Puzzle Game"                    
##  [7] "Racing"                          "Role-Playing Game"              
##  [9] "Sports"                          "Strategy"                       
## [11] "Third-Person Shooter"           
## 
## [[2]]
##  [1]  1  2  3  4  5  6  7  8  9 10 11

Join the list

mydf <- as.data.frame(mylist)
colnames(mydf) <- c("Genre", "GenreID")

Esports %<>%
  left_join(mydf)
## Joining, by = "Genre"
Esports

Subsetting I

tenobs <- Esports[c(1:10), ]
my_mat <- as.matrix(tenobs)
str(my_mat)
##  chr [1:10, 1:8] "Age of Empires" "Age of Empires II" "Age of Empires III" ...
##  - attr(*, "dimnames")=List of 2
##   ..$ : NULL
##   ..$ : chr [1:8] "Game" "ReleaseDate" "Genre" "TotalEarnings" ...

Subsetting II

twovar <- Esports[ ,c(1,8)]
save(twovar, file = "First and Last Variable.RData")

Create a new Data Frame

Score1st <- c(98, 72, 65, 51, 6)
Grade <- factor(c("High Distinction", "Distinction", "Credit", "Pass", "Fail"),
                levels = c("Fail", "Pass", "Credit", "Distinction", "High Distinction"))
Results <- data.frame(Score1st, Grade)
Results
str(Results)
## 'data.frame':    5 obs. of  2 variables:
##  $ Score1st: num  98 72 65 51 6
##  $ Grade   : Factor w/ 5 levels "Fail","Pass",..: 5 4 3 2 1
Score2nd <- c(99, 73, 63, 52, 41)
Results2 <- cbind(Results, Score2nd)
Results2
str(Results2)
## 'data.frame':    5 obs. of  3 variables:
##  $ Score1st: num  98 72 65 51 6
##  $ Grade   : Factor w/ 5 levels "Fail","Pass",..: 5 4 3 2 1
##  $ Score2nd: num  99 73 63 52 41
dim.data.frame(Results2)
## [1] 5 3

Create another Data Frame

Create another data frame with a common variable to the dataset created in step 11.

# This is a chunk to create another data frame with the given specifications
StudentName <- c("Jessica", "Henry", "Peter", "Samantha", "Jeff")
Results3 <- data.frame(StudentName, Grade)
Results3
Results4 <- left_join(Results2, Results3)
## Joining, by = "Grade"
Results5 <- Results4[ ,c(4,2,1,3)]
Results5