the data

library(tidyverse)


#data
deneme <- read.csv("C:/Users/emretoros/OneDrive/Makale/futbol/instat/gk1.csv", stringsAsFactors=FALSE)

#seperate non-numerics
z <- deneme %>% 
  select(1:7)

#convert to numeric
#%w <- q %>%
  #select(8:37) %>%
  #mutate_if(is.character, as.numeric)

#combine
#deneme <- cbind(z,w)

 deneme[is.na(deneme)] <- 0


#clean variable names with janitor
library(janitor)
deneme <- clean_names(deneme)

The Factor Analysis

#change name of player col
deneme <- deneme %>%
  rename(Name = i_name)

#relocate club points
deneme <- deneme %>% relocate(club_point, .after = national_team)

With Sjt to check the stats

fa <- deneme %>% 
  select(9:38) 

library(sjPlot)
sjt.fa(fa)
## Parallel analysis suggests that the number of factors =  6  and the number of components =  NA
Factor Analysis
  Factor 1 Factor 2 Factor 3 Factor 4 Factor 5 Factor 6
in_stat_index 0.05 0.61 -0.00 0.05 0.19 -0.08
age 0.05 0.00 -0.08 -0.06 0.66 -0.06
height -0.13 0.05 0.10 -0.03 0.77 0.09
weight -0.07 -0.08 0.05 0.04 0.83 0.03
matches_played_total -0.04 1.07 -0.05 -0.10 -0.13 0.01
minutes_played_average_per_match -0.13 0.10 0.11 -0.01 -0.05 0.32
clean_sheets_total 0.03 0.64 -0.27 0.24 -0.14 -0.17
goals_conceded_average_per_match 0.27 0.10 0.78 -0.60 -0.02 0.12
shots_saved 0.08 -0.24 -0.00 1.05 -0.01 0.03
supersaves_average_per_match -0.04 0.01 0.69 0.14 0.06 -0.03
penalties_saved 0.02 0.22 0.04 0.00 0.04 -0.10
goalkeeper_interceptions 0.14 0.29 -0.07 0.16 0.08 0.13
passes_average_per_match 0.26 -0.06 0.05 0.10 -0.07 0.75
opponents_shots_on_target_average_per_match 0.06 0.02 0.94 -0.01 -0.01 0.11
good_interception_of_goalkeeper_average_per_match 0.05 0.02 0.03 0.10 0.01 0.48
accurate_passes 1.13 -0.08 -0.06 -0.08 -0.11 -0.04
key_passes_accuracy -0.05 0.26 -0.03 -0.03 -0.02 0.10
foot_passes_from_open_play_average_per_match 0.08 -0.15 -0.12 -0.01 0.13 0.56
accurate_foot_passes_from_open_play 0.47 0.21 -0.14 -0.11 0.11 0.06
hand_passes_accurate 0.72 -0.06 0.11 0.14 -0.03 0.10
set_piece_passes_accurate 0.95 -0.11 -0.03 0.05 -0.11 0.04
short_passes_accurate 0.51 0.18 0.06 0.09 -0.05 0.01
mid_range_passes_accurate 0.92 -0.05 0.02 0.02 -0.01 0.01
accurate_long_passes 0.70 -0.03 0.08 0.04 0.04 -0.05
d_lose_range_shots_saved 0.05 0.14 0.12 0.40 -0.10 -0.07
mid_range_shots_saved 0.03 -0.02 0.06 0.62 -0.03 0.10
x_of_long_range_shots_saved 0.20 0.02 0.20 0.44 0.12 -0.04
jumping_saves_average_per_match -0.06 -0.09 0.65 0.21 0.02 -0.04
stopped_shots -0.05 0.16 -0.14 0.42 0.04 0.13
x_g_conceded_total -0.08 0.98 0.23 -0.16 -0.04 0.06
Cronbach’s α 0.88 0.16 0.80 0.71 0.59 0.49

with factanal

#Perform factor analysis with factanal
res <- factanal(deneme[,-c(1,2,3,4,5,6,7)],6,scores = "regression")


#View results
res
## 
## Call:
## factanal(x = deneme[, -c(1, 2, 3, 4, 5, 6, 7)], factors = 6,     scores = "regression")
## 
## Uniquenesses:
##                                        club_point 
##                                             0.909 
##                                     in_stat_index 
##                                             0.488 
##                                               age 
##                                             0.536 
##                                            height 
##                                             0.465 
##                                            weight 
##                                             0.364 
##                              matches_played_total 
##                                             0.018 
##                  minutes_played_average_per_match 
##                                             0.859 
##                                clean_sheets_total 
##                                             0.391 
##                  goals_conceded_average_per_match 
##                                             0.123 
##                                       shots_saved 
##                                             0.005 
##                      supersaves_average_per_match 
##                                             0.498 
##                                   penalties_saved 
##                                             0.938 
##                          goalkeeper_interceptions 
##                                             0.679 
##                          passes_average_per_match 
##                                             0.241 
##       opponents_shots_on_target_average_per_match 
##                                             0.030 
## good_interception_of_goalkeeper_average_per_match 
##                                             0.699 
##                                   accurate_passes 
##                                             0.006 
##                               key_passes_accuracy 
##                                             0.929 
##      foot_passes_from_open_play_average_per_match 
##                                             0.656 
##               accurate_foot_passes_from_open_play 
##                                             0.653 
##                              hand_passes_accurate 
##                                             0.270 
##                         set_piece_passes_accurate 
##                                             0.172 
##                             short_passes_accurate 
##                                             0.558 
##                         mid_range_passes_accurate 
##                                             0.168 
##                              accurate_long_passes 
##                                             0.456 
##                          d_lose_range_shots_saved 
##                                             0.723 
##                             mid_range_shots_saved 
##                                             0.551 
##                       x_of_long_range_shots_saved 
##                                             0.529 
##                   jumping_saves_average_per_match 
##                                             0.545 
##                                     stopped_shots 
##                                             0.699 
##                                x_g_conceded_total 
##                                             0.141 
## 
## Loadings:
##                                                   Factor1 Factor2 Factor3
## club_point                                         0.239                 
## in_stat_index                                      0.198   0.617         
## age                                                0.150          -0.140 
## height                                                     0.115         
## weight                                             0.131                 
## matches_played_total                                       0.976         
## minutes_played_average_per_match                           0.103   0.135 
## clean_sheets_total                                 0.134   0.653  -0.233 
## goals_conceded_average_per_match                   0.170           0.789 
## shots_saved                                        0.385                 
## supersaves_average_per_match                       0.103           0.681 
## penalties_saved                                            0.213         
## goalkeeper_interceptions                           0.259   0.370         
## passes_average_per_match                           0.389           0.190 
## opponents_shots_on_target_average_per_match        0.175           0.939 
## good_interception_of_goalkeeper_average_per_match  0.164   0.110         
## accurate_passes                                    0.970                 
## key_passes_accuracy                                        0.241         
## foot_passes_from_open_play_average_per_match       0.155                 
## accurate_foot_passes_from_open_play                0.453   0.261         
## hand_passes_accurate                               0.725   0.105   0.233 
## set_piece_passes_accurate                          0.851           0.117 
## short_passes_accurate                              0.531   0.267   0.148 
## mid_range_passes_accurate                          0.845           0.150 
## accurate_long_passes                               0.668           0.170 
## d_lose_range_shots_saved                           0.186   0.239   0.153 
## mid_range_shots_saved                              0.255   0.176   0.115 
## x_of_long_range_shots_saved                        0.379   0.186   0.239 
## jumping_saves_average_per_match                                    0.640 
## stopped_shots                                      0.139   0.286  -0.105 
## x_g_conceded_total                                         0.877   0.236 
##                                                   Factor4 Factor5 Factor6
## club_point                                                        -0.145 
## in_stat_index                                      0.179   0.246         
## age                                                        0.645         
## height                                                     0.713         
## weight                                                     0.784         
## matches_played_total                                               0.101 
## minutes_played_average_per_match                                   0.325 
## clean_sheets_total                                 0.311          -0.114 
## goals_conceded_average_per_match                  -0.413           0.223 
## shots_saved                                        0.910                 
## supersaves_average_per_match                       0.157                 
## penalties_saved                                                          
## goalkeeper_interceptions                           0.240   0.137   0.201 
## passes_average_per_match                           0.170           0.730 
## opponents_shots_on_target_average_per_match                        0.216 
## good_interception_of_goalkeeper_average_per_match  0.128           0.486 
## accurate_passes                                    0.153           0.117 
## key_passes_accuracy                                                0.110 
## foot_passes_from_open_play_average_per_match               0.136   0.544 
## accurate_foot_passes_from_open_play                        0.205   0.151 
## hand_passes_accurate                               0.283           0.228 
## set_piece_passes_accurate                          0.229           0.185 
## short_passes_accurate                              0.224           0.122 
## mid_range_passes_accurate                          0.210   0.132   0.156 
## accurate_long_passes                               0.192   0.140         
## d_lose_range_shots_saved                           0.396                 
## mid_range_shots_saved                              0.566           0.136 
## x_of_long_range_shots_saved                        0.460   0.145         
## jumping_saves_average_per_match                    0.191                 
## stopped_shots                                      0.395           0.166 
## x_g_conceded_total                                                 0.165 
## 
##                Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
## SS loadings      4.775   3.215   2.847   2.446   1.790   1.629
## Proportion Var   0.154   0.104   0.092   0.079   0.058   0.053
## Cumulative Var   0.154   0.258   0.350   0.428   0.486   0.539
## 
## Test of the hypothesis that 6 factors are sufficient.
## The chi square statistic is 1105.84 on 294 degrees of freedom.
## The p-value is 2.3e-94
#Look at loadings
loadings(res,sort=TRUE)
## 
## Loadings:
##                                                   Factor1 Factor2 Factor3
## club_point                                         0.239                 
## in_stat_index                                      0.198   0.617         
## age                                                0.150          -0.140 
## height                                                     0.115         
## weight                                             0.131                 
## matches_played_total                                       0.976         
## minutes_played_average_per_match                           0.103   0.135 
## clean_sheets_total                                 0.134   0.653  -0.233 
## goals_conceded_average_per_match                   0.170           0.789 
## shots_saved                                        0.385                 
## supersaves_average_per_match                       0.103           0.681 
## penalties_saved                                            0.213         
## goalkeeper_interceptions                           0.259   0.370         
## passes_average_per_match                           0.389           0.190 
## opponents_shots_on_target_average_per_match        0.175           0.939 
## good_interception_of_goalkeeper_average_per_match  0.164   0.110         
## accurate_passes                                    0.970                 
## key_passes_accuracy                                        0.241         
## foot_passes_from_open_play_average_per_match       0.155                 
## accurate_foot_passes_from_open_play                0.453   0.261         
## hand_passes_accurate                               0.725   0.105   0.233 
## set_piece_passes_accurate                          0.851           0.117 
## short_passes_accurate                              0.531   0.267   0.148 
## mid_range_passes_accurate                          0.845           0.150 
## accurate_long_passes                               0.668           0.170 
## d_lose_range_shots_saved                           0.186   0.239   0.153 
## mid_range_shots_saved                              0.255   0.176   0.115 
## x_of_long_range_shots_saved                        0.379   0.186   0.239 
## jumping_saves_average_per_match                                    0.640 
## stopped_shots                                      0.139   0.286  -0.105 
## x_g_conceded_total                                         0.877   0.236 
##                                                   Factor4 Factor5 Factor6
## club_point                                                        -0.145 
## in_stat_index                                      0.179   0.246         
## age                                                        0.645         
## height                                                     0.713         
## weight                                                     0.784         
## matches_played_total                                               0.101 
## minutes_played_average_per_match                                   0.325 
## clean_sheets_total                                 0.311          -0.114 
## goals_conceded_average_per_match                  -0.413           0.223 
## shots_saved                                        0.910                 
## supersaves_average_per_match                       0.157                 
## penalties_saved                                                          
## goalkeeper_interceptions                           0.240   0.137   0.201 
## passes_average_per_match                           0.170           0.730 
## opponents_shots_on_target_average_per_match                        0.216 
## good_interception_of_goalkeeper_average_per_match  0.128           0.486 
## accurate_passes                                    0.153           0.117 
## key_passes_accuracy                                                0.110 
## foot_passes_from_open_play_average_per_match               0.136   0.544 
## accurate_foot_passes_from_open_play                        0.205   0.151 
## hand_passes_accurate                               0.283           0.228 
## set_piece_passes_accurate                          0.229           0.185 
## short_passes_accurate                              0.224           0.122 
## mid_range_passes_accurate                          0.210   0.132   0.156 
## accurate_long_passes                               0.192   0.140         
## d_lose_range_shots_saved                           0.396                 
## mid_range_shots_saved                              0.566           0.136 
## x_of_long_range_shots_saved                        0.460   0.145         
## jumping_saves_average_per_match                    0.191                 
## stopped_shots                                      0.395           0.166 
## x_g_conceded_total                                                 0.165 
## 
##                Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
## SS loadings      4.775   3.215   2.847   2.446   1.790   1.629
## Proportion Var   0.154   0.104   0.092   0.079   0.058   0.053
## Cumulative Var   0.154   0.258   0.350   0.428   0.486   0.539
#Look at uniqueness
res$uniquenesses
##                                        club_point 
##                                       0.909269332 
##                                     in_stat_index 
##                                       0.487731433 
##                                               age 
##                                       0.535777137 
##                                            height 
##                                       0.464880026 
##                                            weight 
##                                       0.364160319 
##                              matches_played_total 
##                                       0.017729927 
##                  minutes_played_average_per_match 
##                                       0.858529486 
##                                clean_sheets_total 
##                                       0.391137384 
##                  goals_conceded_average_per_match 
##                                       0.122653211 
##                                       shots_saved 
##                                       0.005000000 
##                      supersaves_average_per_match 
##                                       0.497739384 
##                                   penalties_saved 
##                                       0.938324993 
##                          goalkeeper_interceptions 
##                                       0.678724412 
##                          passes_average_per_match 
##                                       0.241063087 
##       opponents_shots_on_target_average_per_match 
##                                       0.030146460 
## good_interception_of_goalkeeper_average_per_match 
##                                       0.699253910 
##                                   accurate_passes 
##                                       0.005749569 
##                               key_passes_accuracy 
##                                       0.929259972 
##      foot_passes_from_open_play_average_per_match 
##                                       0.656017070 
##               accurate_foot_passes_from_open_play 
##                                       0.653467534 
##                              hand_passes_accurate 
##                                       0.269957747 
##                         set_piece_passes_accurate 
##                                       0.171510424 
##                             short_passes_accurate 
##                                       0.558023678 
##                         mid_range_passes_accurate 
##                                       0.167586267 
##                              accurate_long_passes 
##                                       0.455648795 
##                          d_lose_range_shots_saved 
##                                       0.723092436 
##                             mid_range_shots_saved 
##                                       0.551177257 
##                       x_of_long_range_shots_saved 
##                                       0.529456861 
##                   jumping_saves_average_per_match 
##                                       0.545416063 
##                                     stopped_shots 
##                                       0.698931848 
##                                x_g_conceded_total 
##                                       0.141365084

##Creating the new data

#Extract factor scores 
coefs <- as.data.frame(res$scores)


#bind coefs with data

gk <- cbind(deneme, coefs)

#creating the weighted factors variables
gk <-  gk %>% 
mutate_at(vars(Factor1:Factor6), list(wf = ~. * club_point))

#The Analysis with unweighted coefs

#Check example of similar players to Volkan Babacan ---- UNWEIGHTED
Similarity <- 1-(rowSums(abs(sweep(gk[,39:44],2,unlist(gk[gk["Name"]=="Volkan Babacan",39:44]))))/
                     max(rowSums(abs(sweep(gk[,39:44],2,unlist(gk[gk["Name"]=="Volkan Babacan",39:44]))))))


SimilarityResult <- cbind.data.frame(Name = gk[,1], club = gk[,2], league = gk[,3], Similarity)

#Order results
SimilarityResult<-SimilarityResult[with(SimilarityResult, order(-Similarity)), ]

#Look at most similar players
SimilarityResult[1:10,]
##                 Name           club                     league Similarity
## 276   Volkan Babacan       Istanbul Turkish Turkcell Super Lig  1.0000000
## 39     Ralf Fahrmann     Schalke 04          German Bundesliga  0.9367448
## 181      Ivan Villar          Celta   Spanish Primera Division  0.9269147
## 315 Dusan Melicharek          Malmo        Swedish Allsvenskan  0.9235908
## 145     Sven Ulreich         Bayern          German Bundesliga  0.9005396
## 263     Erwin Mulder     Heerenveen           Dutch Eredivisie  0.8973590
## 61          Tiago Sa Sporting Braga            Portuguese Liga  0.8945471
## 160   Gaetan Poussin       Bordeaux             French Ligue 1  0.8867010
## 320      Yohann Pele      Marseille             French Ligue 1  0.8847592
## 271  Daniele Padelli          Inter              Italy Serie A  0.8812565

#The Analysis with weighted coefs with club strenght

#Check example of similar players to Volkan Babacan ---- WEIGHTED
Similarity_w <- 1-(rowSums(abs(sweep(gk[,45:50],2,unlist(gk[gk["Name"]=="Volkan Babacan",45:50]))))/
                     max(rowSums(abs(sweep(gk[,45:50],2,unlist(gk[gk["Name"]=="Volkan Babacan",45:50]))))))


SimilarityResult_w <- cbind.data.frame(Name = gk[,1], club = gk[,2], league = gk[,3], Similarity_w)

#Order results
SimilarityResult_w <-SimilarityResult_w[with(SimilarityResult_w, order(-Similarity_w)), ]

#Look at most similar players
SimilarityResult_w[1:10,]
##                 Name           club                     league Similarity_w
## 276   Volkan Babacan       Istanbul Turkish Turkcell Super Lig    1.0000000
## 181      Ivan Villar          Celta   Spanish Primera Division    0.9261653
## 39     Ralf Fahrmann     Schalke 04          German Bundesliga    0.9165547
## 142             Neto      Barcelona   Spanish Primera Division    0.9134809
## 315 Dusan Melicharek          Malmo        Swedish Allsvenskan    0.9110077
## 61          Tiago Sa Sporting Braga            Portuguese Liga    0.8863322
## 160   Gaetan Poussin       Bordeaux             French Ligue 1    0.8717047
## 271  Daniele Padelli          Inter              Italy Serie A    0.8627830
## 320      Yohann Pele      Marseille             French Ligue 1    0.8572126
## 119        Jed Steer    Aston Villa    Barclays Premier League    0.8500897