R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

getwd() list.files() path <- “/Users/wahabo/Desktop” knitr::opts_knit$set(root.dir = path) setwd(path) getwd() list.files(‘data/’)

library(tidyverse) library(lsa) library(magrittr) library(dplyr)

data <- read.csv( file = “data/FBREF_BigPlayers_2223.csv”, sep = “;”, encoding = ‘UTF-8’ )

select_players <- function(file, encoding, position, competition, primary){ data <- read.csv(file, sep = “;”, encoding = encoding) if (competition == “ALL”){ data_players <- data } else { data_players <- data %>% filter(Competition %in% competition) } if (primary){ cat(“We keep the players whose main position is:”, position) data_players <- data_players %>% filter(substr(Pos, 1, 2) == position) } else { data_players <- data_players %>% filter(grepl(position, Pos)) cat(“We keep the players whose position is:”, position) } return (data_players) }

df_forwards <- select_players( file = “data/FBREF_BigPlayers_2223.csv”, encoding = ‘UTF-8’, position = “FW”, competition = “ALL”, primary = TRUE)

unique(df_forwards$Pos) df_forwards <- df_forwards %>% filter(Pos == “FW”)

head(df_forwards) filter_players <- function( data, metrics, pct_min_minutes, age_max){ data_filter <- data %>% filter(Min > round((pct_min_minutes90MP_Squad) / 100), Age <= age_max) %>% select(c(“Player”, “Squad”, metrics)) rownames(data_filter) <- 1:nrow(data_filter) return (data_filter) }

list_metrics <- c(“Gls”, “xG”, “Gls.90”, “xG.90”, “SCA.90”, “GCA.90”, “Touches.90”, “AerialW.90”)

df_forwards_filter <- filter_players( data = df_forwards, metrics = list_metrics, pct_min_minutes = 50, age_max = 27 )

cat(“Duplicated players:”, df_forwards_filter[ duplicated(df_forwards_filter\(Player),]\)Player)

df_forwards_rename <- df_forwards_filter %>% rename(‘Goals’ = ‘Gls’, ‘Goals per 90’ = ‘Gls.90’, ‘xG per 90’ = ‘xG.90’, ‘SCA per 90’ = ‘SCA.90’, ‘GCA per 90’ = ‘GCA.90’, ‘Touches per 90’ = ‘Touches.90’, ‘Aerials won per 90’ = ‘AerialW.90’) head(df_forwards_rename)

normalize <- function(x, na.rm=TRUE){ return((x-min(x))/(max(x)-min(x))) }

df_forwards_norm <- data.frame(df_forwards_rename) for (i in 3:length(df_forwards_rename)){ df_forwards_norm[,i] <- normalize(df_forwards_rename[,i]) }

summary(df_forwards_norm)

calc_scoring <- function( data, weights, ind_metric, columns_return, n){ for (i in ind_metric:ncol(data)){ data[, i] <- data[,i]weights[i-(ind_metric-1)] } cat(“Weights sum:”, sum(weights)) data\(`Final Score` <- rowSums( data[, c(ind_metric:ncol(data))]) data\)Final Score <- round(10data\(`Final Score`, 3) data <- data[order(-data\)Final Score), c(columns_return, “Final Score”)] rownames(data) <- 1:nrow(data) return(data[1:n,]) }

colnames(df_forwards_norm)

df_score_forwards <- calc_scoring( data = df_forwards_norm, weights = c(0.2, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1), ind_metric = 3, columns_return = c(“Player”), n = 15 )

df_score_forwards

df_forwards_norm = subset(df_forwards_norm, select= -c(Squad))

df_forwards_norm

similarity_tool <- function( sample, data, player, metrics, metrics_rename, distance, n){ set.seed(123) }

ncol(df_forwards_norm)

w <- c(0.2, 0.2, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1)

df_forward <- as.data.frame(df_forwards_norm) for (i in 3:ncol(df_forward)) { df_forward [,i] <- df_forward[,i] * w [i-1] }

df_forward <- df_forward %>% mutate(Scoring = rowSums(df_forward %>% select(-Player)))

df_forward_sort <- df_forward %>% select(Player, Scoring) %>% arrange(-Scoring)

player <- “Erling Haaland” data_final_norm <- scale(df_forward %>% select(-Player)) rownames(data_final_norm) <- df_forward$Player

distance <- “cosine”

players_df <- t(data_final_norm)

sim_cosine <- cosine(players_df)

player_sim <- sim_cosine[, player]

df_sim <- as.data.frame(player_sim) colnames(df_sim) <- “Similarity”

df_sim\(Similarity <- normalize(df_sim\)Similarity)

df_sim\(Similarity <- 100*df_sim\)Similarity

df_sim\(Player <- df_forward\)Player final_df <- df_sim %>% arrange(-Similarity)

library(magrittr) library(tinytex)