The project was conducted base on my interest in both FIFA video game and practicing R. I am beginner in R so any comment, advice would be highly appreciated.

DATA COLLECTION FROM FUTWIZ.COM

# Loading needed packages: 
library(rvest)
library(tidyverse)

# Checking method for getting data 

specific_link <- "https://www.futwiz.com/en/fifa19/career-mode/players?page=811"

my_xpath <- '//*[@id="panel"]/div[5]/div/div[2]/table'

specific_link %>% 
  read_html() %>% 
  html_nodes(xpath = my_xpath) %>% 
  html_table()  %>% 
  as.data.frame() %>% 
  select(-1) %>% 
  slice(-1) -> df811 

df811 %>% head()

# Create a vector of all links

fifa19_links <- c()

for (i in 1:811) {
  detail_link <- paste0("https://www.futwiz.com/en/fifa19/career-mode/players?page=",i)
  fifa19_links <- c(fifa19_links, detail_link)
}

fifa19_links <- c("https://www.futwiz.com/en/fifa19/career-mode/players", fifa19_links)
  
# Create a funtion to extract data from a link

get_infor <- function(link) {
  
  link %>% 
    read_html() %>% 
    html_nodes(xpath = my_xpath) %>% 
    html_table()  %>% 
    as.data.frame() -> df_infor
 
  Sys.sleep(1)
  
  return(df_infor)
}

# Getting data

lapply(fifa19_links, get_infor) -> list_fifa19 #Result is a list

do.call("rbind", list_fifa19) -> df_fifa19 #Convert from list to a dataframe

df_fifa19  %>% write.csv("C:\\Users\\Thinh Dao\\Desktop\\Mini R Project 7.7.19\\data_codes\\fifa19.scv")

DATA PROCESSING

rm(list = ls())

library(tidyverse)

df_fifa19 <- read_csv("C:\\Users\\Thinh Dao\\Desktop\\Mini R Project 7.7.19\\data_codes\\fifa19.scv")

df_fifa19 %>%
  slice(1) %>% 
  gather() %>% 
  pull(2)  %>% 
  .[-1] -> vec_name  # To get the name of 12 column apart from the first one

df_fifa19 %>% 
  select(-1) -> df_fifa19 # Remove the first column

names(df_fifa19) <- vec_name # Rename the dataframe

df_fifa19 %>% 
  filter(OVR != "OVR") -> df_fifa19 # Remove all titles of each df in list

df_fifa19$Player %>% duplicated()  %>% sum() #Check duplicate name in Players

df_fifa19[!duplicated(df_fifa19$Player),] -> df_fifa19 # Remove duplicate value in Player column

# Extract player name, club name and League from Column Player

df_fifa19 %>% 
  select(1) %>% 
  mutate(Player_name = Player %>%  
           str_split(pattern = "\r\n", n = 2) %>% 
           unlist() %>% 
           matrix(ncol = 2, byrow = TRUE) %>% 
           as.data.frame() %>%
           pull(1) %>% 
           str_squish()) %>% #Get the player_name
  mutate(Club_League = Player %>%  
           str_split(pattern = "\r\n", n = 2) %>% 
           unlist() %>% 
           matrix(ncol = 2, byrow = TRUE) %>% 
           as.data.frame() %>%
           pull(2) %>% 
           str_replace_all("\r\n", "") %>% 
           str_squish()) %>%
  mutate(Club = Club_League %>% 
           str_split("\\|") %>% 
           unlist() %>% 
           matrix(ncol = 2, byrow = TRUE) %>% 
           as.data.frame() %>%
           pull(1) %>% 
           str_squish()) %>% # Get the club name 
  mutate(League = Club_League %>% 
           str_split("\\|") %>% 
           unlist() %>% 
           matrix(ncol = 2, byrow = TRUE) %>% 
           as.data.frame() %>%
           pull(2) %>%
           str_replace_all("\\|", "") %>% 
           str_squish()) -> df_name #Get the League name and save as df_name

df_fifa19_final <- right_join(df_fifa19, df_name, by = "Player")

df_fifa19_final %>% head()

x <- c("OVR", "POT", "GROW", "Age", "Contract", "S/M", "W/F")

df_fifa19_final[x] <- sapply(df_fifa19_final[x], as.numeric) # Convert some column from character to numeric

df_fifa19_final %>% 
  rename(Total_Stats = `Total Stats`,
         Skill_move = `S/M`, 
         Weak_foot = `W/F`, 
         W_Rs = `W/Rs`) -> df_fifa19_final

df_fifa19_final$Total_Stats <- df_fifa19_final$Total_Stats %>% 
  str_replace_all(",", "") %>% 
  as.numeric()  # Convert the Total_stats from charater to numeric

df_fifa19_final %>% write.csv("C:\\Users\\Thinh Dao\\Desktop\\Mini R Project 7.7.19\\data_codes\\fifa19_tidy.scv")

VISUALISATION

Number of players in Fifa19 classified by position and foot preference

rm(list = ls())

library(tidyverse)

my_df <- read_csv("C:\\Users\\Thinh Dao\\Desktop\\Mini R Project 7.7.19\\data_codes\\fifa19_tidy.scv") 

my_df %>% 
  mutate(POS_group = case_when(POS %in% c("CB", "LB", "RB", "LWB", "RWB") ~ "DEF", 
                               POS %in% c("CM", "CDM", "CAM", "LM", "RM") ~ "MID",
                               POS == "GK" ~ "GK",
                               TRUE ~ "ATT")) -> df1

df1 %>% 
  group_by(POS_group, Foot) %>%
  count() %>% 
  ungroup() %>% 
  spread(Foot, n) %>% 
  mutate(Total = Left + Right) %>% 
  arrange(Total) %>% 
  mutate(POS_group = factor(POS_group, levels = POS_group)) %>%
  mutate(Left = -1*Left) %>%
  select(-4) %>% 
  gather(Right, Left, - POS_group) %>% 
  rename(Foot = Right, Number = Left) -> dffull_plot 
  
dffull_plot %>% 
  ggplot(aes(POS_group, Number, fill = Foot))+
  geom_col(position = "Stack", alpha = 0.7, width = 0.7)+
  coord_flip()+
  scale_y_continuous(breaks = seq(-2200, 6000, 200), 
                     limits = c(-2500, 6200), 
                     labels = c(seq(2200, 0, -200), seq(200,6000, 200)))+
  geom_text(aes(label = -1*Number), data = dffull_plot %>% filter(Number < 0), hjust = 1.1, color = "yellow", size = 7)+
  geom_text(aes(label = Number), data = dffull_plot %>% filter(Number > 0), hjust = -0.1, color = "cyan", size = 7)+
  theme_minimal()+
  theme(panel.grid.major.x = element_blank()) + 
  theme(panel.grid.major.y = element_blank()) + 
  theme(panel.grid.minor.y = element_blank()) + 
  theme(panel.grid.minor.x = element_blank()) +
  theme(axis.text.x = element_blank(),
        axis.title.x = element_blank(),
        axis.ticks.x = element_blank()) +
  theme(axis.title.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.y = element_text(face = "bold", color = "white", size = 19))+
  theme(plot.background = element_rect(fill = "grey30"))+
  theme(legend.text = element_text(face = "bold.italic", color = "grey100", size = 15),
        legend.position = c(0.9, 0.20),
        legend.key.size = unit(1.5, "cm"))+
  scale_fill_manual(values =c('yellow','cyan'), name ="", labels = c("Left footed", "Right footed"))+
  theme(plot.margin = unit(c(1, 1, 1, 1), "cm")) + 
  theme(plot.title = element_text(size = 29, color = "gray100")) + 
  theme(plot.subtitle = element_text(size = 17, color = "gray80", face ="italic" )) + 
  theme(plot.caption = element_text(size = 15, color = "gray80", face = "italic"))+ 
  labs(x = NULL, y = NULL, 
       subtitle = "Total number of players classified by foot preference \n Created by: Thinh Dao" , 
       title = "FIFA 19 CAREER MODE PLAYERS" , 
       caption = "Data Source: https://www.futwiz.com/en/fifa19/career-mode")

Number of players in 6 European supper leagues clasified by position and foot preference

df1 %>% 
  filter(League %in% c("ENG 1", "ITA 1", "FRA 1", "ESP 1", "GER 1", "NED 1")) %>% 
  group_by(League, POS_group, Foot) %>%
  count() %>% 
  ungroup() %>% 
  spread(Foot, n) %>% 
  mutate(Total = Left + Right) %>%
  arrange(Total) %>%
  mutate(POS_group = factor(POS_group, levels = POS_group %>% unique())) %>% 
  mutate(Left = -1*Left) %>%
  select(-5) %>%
  gather(Right, Left, - POS_group, - League) %>% 
  rename(Foot = Right, Number = Left) -> df6leagues_plot  

my_label <- c("NED 1" = "Eredivisie", "GER 1" = "Bundesliga",
              "ITA 1" = "Seri A",  "ESP 1" = "La Liga",
              "ENG 1" = "Premier League", "FRA 1" = "Ligue 1")  

df6leagues_plot %>% 
  ggplot(aes(POS_group, Number, fill = Foot))+
  geom_col(position = "Stack", alpha = 0.7, width = 0.7)+
  coord_flip()+
  scale_fill_manual(values =c('yellow','cyan'), name ="", labels = c("Left footed", "Right footed"))+
  facet_wrap(.~League, labeller = labeller(League = my_label))+
  scale_y_continuous(breaks = seq(- 80, 180, 20), 
                     limits = c(-80, 200), 
                     labels = c(seq(80, 0, -20), seq(20,180, 20)))+
  theme_minimal()+
  theme(panel.grid.major.x = element_line(linetype = "dotted", color = "grey80")) + 
  theme(panel.grid.major.y = element_blank()) + 
  theme(panel.grid.minor.y = element_blank()) + 
  theme(panel.grid.minor.x = element_blank()) +
  theme(plot.background = element_rect(fill = "grey30"))+
  theme(axis.text.x = element_text(color = "grey80", size = 13),
        axis.title.x = element_blank()) +
  theme(axis.title.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.y = element_text(face = "bold", color = "white", size = 19))+
   theme(legend.text = element_text(face = "bold.italic", color = "grey100", size = 15),
        legend.position = "bottom",
        legend.key.size = unit(1, "cm"))+
  theme(strip.text = element_text(face = "bold", color = "grey100", size = 17))+
  theme(plot.margin = unit(c(2, 1, 1, 1), "cm")) + 
  theme(plot.title = element_text(size = 29, color = "gray100")) + 
  theme(plot.subtitle = element_text(size = 17, color = "gray80", face ="italic" )) + 
  theme(plot.caption = element_text(size = 15, color = "gray80", face = "italic"))+ 
  labs(x = NULL, y = NULL, 
       subtitle = "Total number of players classified by foot preference in 6 European supper leagues \n Created by: Thinh Dao", 
       title = "FIFA 19 CAREER MODE PLAYERS", 
       caption = "Data Source: https://www.futwiz.com/en/fifa19/career-mode")