The project was conducted base on my interest in both FIFA video game and practicing R. I am beginner in R so any comment, advice would be highly appreciated.
DATA COLLECTION FROM FUTWIZ.COM
# Loading needed packages:
library(rvest)
library(tidyverse)
# Checking method for getting data
specific_link <- "https://www.futwiz.com/en/fifa19/career-mode/players?page=811"
my_xpath <- '//*[@id="panel"]/div[5]/div/div[2]/table'
specific_link %>%
read_html() %>%
html_nodes(xpath = my_xpath) %>%
html_table() %>%
as.data.frame() %>%
select(-1) %>%
slice(-1) -> df811
df811 %>% head()
# Create a vector of all links
fifa19_links <- c()
for (i in 1:811) {
detail_link <- paste0("https://www.futwiz.com/en/fifa19/career-mode/players?page=",i)
fifa19_links <- c(fifa19_links, detail_link)
}
fifa19_links <- c("https://www.futwiz.com/en/fifa19/career-mode/players", fifa19_links)
# Create a funtion to extract data from a link
get_infor <- function(link) {
link %>%
read_html() %>%
html_nodes(xpath = my_xpath) %>%
html_table() %>%
as.data.frame() -> df_infor
Sys.sleep(1)
return(df_infor)
}
# Getting data
lapply(fifa19_links, get_infor) -> list_fifa19 #Result is a list
do.call("rbind", list_fifa19) -> df_fifa19 #Convert from list to a dataframe
df_fifa19 %>% write.csv("C:\\Users\\Thinh Dao\\Desktop\\Mini R Project 7.7.19\\data_codes\\fifa19.scv")
DATA PROCESSING
rm(list = ls())
library(tidyverse)
df_fifa19 <- read_csv("C:\\Users\\Thinh Dao\\Desktop\\Mini R Project 7.7.19\\data_codes\\fifa19.scv")
df_fifa19 %>%
slice(1) %>%
gather() %>%
pull(2) %>%
.[-1] -> vec_name # To get the name of 12 column apart from the first one
df_fifa19 %>%
select(-1) -> df_fifa19 # Remove the first column
names(df_fifa19) <- vec_name # Rename the dataframe
df_fifa19 %>%
filter(OVR != "OVR") -> df_fifa19 # Remove all titles of each df in list
df_fifa19$Player %>% duplicated() %>% sum() #Check duplicate name in Players
df_fifa19[!duplicated(df_fifa19$Player),] -> df_fifa19 # Remove duplicate value in Player column
# Extract player name, club name and League from Column Player
df_fifa19 %>%
select(1) %>%
mutate(Player_name = Player %>%
str_split(pattern = "\r\n", n = 2) %>%
unlist() %>%
matrix(ncol = 2, byrow = TRUE) %>%
as.data.frame() %>%
pull(1) %>%
str_squish()) %>% #Get the player_name
mutate(Club_League = Player %>%
str_split(pattern = "\r\n", n = 2) %>%
unlist() %>%
matrix(ncol = 2, byrow = TRUE) %>%
as.data.frame() %>%
pull(2) %>%
str_replace_all("\r\n", "") %>%
str_squish()) %>%
mutate(Club = Club_League %>%
str_split("\\|") %>%
unlist() %>%
matrix(ncol = 2, byrow = TRUE) %>%
as.data.frame() %>%
pull(1) %>%
str_squish()) %>% # Get the club name
mutate(League = Club_League %>%
str_split("\\|") %>%
unlist() %>%
matrix(ncol = 2, byrow = TRUE) %>%
as.data.frame() %>%
pull(2) %>%
str_replace_all("\\|", "") %>%
str_squish()) -> df_name #Get the League name and save as df_name
df_fifa19_final <- right_join(df_fifa19, df_name, by = "Player")
df_fifa19_final %>% head()
x <- c("OVR", "POT", "GROW", "Age", "Contract", "S/M", "W/F")
df_fifa19_final[x] <- sapply(df_fifa19_final[x], as.numeric) # Convert some column from character to numeric
df_fifa19_final %>%
rename(Total_Stats = `Total Stats`,
Skill_move = `S/M`,
Weak_foot = `W/F`,
W_Rs = `W/Rs`) -> df_fifa19_final
df_fifa19_final$Total_Stats <- df_fifa19_final$Total_Stats %>%
str_replace_all(",", "") %>%
as.numeric() # Convert the Total_stats from charater to numeric
df_fifa19_final %>% write.csv("C:\\Users\\Thinh Dao\\Desktop\\Mini R Project 7.7.19\\data_codes\\fifa19_tidy.scv")
VISUALISATION
Number of players in Fifa19 classified by position and foot preference
rm(list = ls())
library(tidyverse)
my_df <- read_csv("C:\\Users\\Thinh Dao\\Desktop\\Mini R Project 7.7.19\\data_codes\\fifa19_tidy.scv")
my_df %>%
mutate(POS_group = case_when(POS %in% c("CB", "LB", "RB", "LWB", "RWB") ~ "DEF",
POS %in% c("CM", "CDM", "CAM", "LM", "RM") ~ "MID",
POS == "GK" ~ "GK",
TRUE ~ "ATT")) -> df1
df1 %>%
group_by(POS_group, Foot) %>%
count() %>%
ungroup() %>%
spread(Foot, n) %>%
mutate(Total = Left + Right) %>%
arrange(Total) %>%
mutate(POS_group = factor(POS_group, levels = POS_group)) %>%
mutate(Left = -1*Left) %>%
select(-4) %>%
gather(Right, Left, - POS_group) %>%
rename(Foot = Right, Number = Left) -> dffull_plot
dffull_plot %>%
ggplot(aes(POS_group, Number, fill = Foot))+
geom_col(position = "Stack", alpha = 0.7, width = 0.7)+
coord_flip()+
scale_y_continuous(breaks = seq(-2200, 6000, 200),
limits = c(-2500, 6200),
labels = c(seq(2200, 0, -200), seq(200,6000, 200)))+
geom_text(aes(label = -1*Number), data = dffull_plot %>% filter(Number < 0), hjust = 1.1, color = "yellow", size = 7)+
geom_text(aes(label = Number), data = dffull_plot %>% filter(Number > 0), hjust = -0.1, color = "cyan", size = 7)+
theme_minimal()+
theme(panel.grid.major.x = element_blank()) +
theme(panel.grid.major.y = element_blank()) +
theme(panel.grid.minor.y = element_blank()) +
theme(panel.grid.minor.x = element_blank()) +
theme(axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.ticks.x = element_blank()) +
theme(axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_text(face = "bold", color = "white", size = 19))+
theme(plot.background = element_rect(fill = "grey30"))+
theme(legend.text = element_text(face = "bold.italic", color = "grey100", size = 15),
legend.position = c(0.9, 0.20),
legend.key.size = unit(1.5, "cm"))+
scale_fill_manual(values =c('yellow','cyan'), name ="", labels = c("Left footed", "Right footed"))+
theme(plot.margin = unit(c(1, 1, 1, 1), "cm")) +
theme(plot.title = element_text(size = 29, color = "gray100")) +
theme(plot.subtitle = element_text(size = 17, color = "gray80", face ="italic" )) +
theme(plot.caption = element_text(size = 15, color = "gray80", face = "italic"))+
labs(x = NULL, y = NULL,
subtitle = "Total number of players classified by foot preference \n Created by: Thinh Dao" ,
title = "FIFA 19 CAREER MODE PLAYERS" ,
caption = "Data Source: https://www.futwiz.com/en/fifa19/career-mode")
Number of players in 6 European supper leagues clasified by position and foot preference
df1 %>%
filter(League %in% c("ENG 1", "ITA 1", "FRA 1", "ESP 1", "GER 1", "NED 1")) %>%
group_by(League, POS_group, Foot) %>%
count() %>%
ungroup() %>%
spread(Foot, n) %>%
mutate(Total = Left + Right) %>%
arrange(Total) %>%
mutate(POS_group = factor(POS_group, levels = POS_group %>% unique())) %>%
mutate(Left = -1*Left) %>%
select(-5) %>%
gather(Right, Left, - POS_group, - League) %>%
rename(Foot = Right, Number = Left) -> df6leagues_plot
my_label <- c("NED 1" = "Eredivisie", "GER 1" = "Bundesliga",
"ITA 1" = "Seri A", "ESP 1" = "La Liga",
"ENG 1" = "Premier League", "FRA 1" = "Ligue 1")
df6leagues_plot %>%
ggplot(aes(POS_group, Number, fill = Foot))+
geom_col(position = "Stack", alpha = 0.7, width = 0.7)+
coord_flip()+
scale_fill_manual(values =c('yellow','cyan'), name ="", labels = c("Left footed", "Right footed"))+
facet_wrap(.~League, labeller = labeller(League = my_label))+
scale_y_continuous(breaks = seq(- 80, 180, 20),
limits = c(-80, 200),
labels = c(seq(80, 0, -20), seq(20,180, 20)))+
theme_minimal()+
theme(panel.grid.major.x = element_line(linetype = "dotted", color = "grey80")) +
theme(panel.grid.major.y = element_blank()) +
theme(panel.grid.minor.y = element_blank()) +
theme(panel.grid.minor.x = element_blank()) +
theme(plot.background = element_rect(fill = "grey30"))+
theme(axis.text.x = element_text(color = "grey80", size = 13),
axis.title.x = element_blank()) +
theme(axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_text(face = "bold", color = "white", size = 19))+
theme(legend.text = element_text(face = "bold.italic", color = "grey100", size = 15),
legend.position = "bottom",
legend.key.size = unit(1, "cm"))+
theme(strip.text = element_text(face = "bold", color = "grey100", size = 17))+
theme(plot.margin = unit(c(2, 1, 1, 1), "cm")) +
theme(plot.title = element_text(size = 29, color = "gray100")) +
theme(plot.subtitle = element_text(size = 17, color = "gray80", face ="italic" )) +
theme(plot.caption = element_text(size = 15, color = "gray80", face = "italic"))+
labs(x = NULL, y = NULL,
subtitle = "Total number of players classified by foot preference in 6 European supper leagues \n Created by: Thinh Dao",
title = "FIFA 19 CAREER MODE PLAYERS",
caption = "Data Source: https://www.futwiz.com/en/fifa19/career-mode")