library(tidyverse)
#data
deneme <- read.csv("C:/Users/emretoros/OneDrive/Makale/futbol/instat/gk1.csv", stringsAsFactors=FALSE)
#seperate non-numerics
z <- deneme %>%
select(1:7)
#convert to numeric
#%w <- q %>%
#select(8:37) %>%
#mutate_if(is.character, as.numeric)
#combine
#deneme <- cbind(z,w)
deneme[is.na(deneme)] <- 0
#clean variable names with janitor
library(janitor)
deneme <- clean_names(deneme)
#change name of player col
deneme <- deneme %>%
rename(Name = i_name)
#relocate club points
deneme <- deneme %>% relocate(club_point, .after = national_team)
fa <- deneme %>%
select(9:38)
library(sjPlot)
sjt.fa(fa)
## Parallel analysis suggests that the number of factors = 6 and the number of components = NA
| Factor 1 | Factor 2 | Factor 3 | Factor 4 | Factor 5 | Factor 6 | |
|---|---|---|---|---|---|---|
| in_stat_index | 0.05 | 0.61 | -0.00 | 0.05 | 0.19 | -0.08 |
| age | 0.05 | 0.00 | -0.08 | -0.06 | 0.66 | -0.06 |
| height | -0.13 | 0.05 | 0.10 | -0.03 | 0.77 | 0.09 |
| weight | -0.07 | -0.08 | 0.05 | 0.04 | 0.83 | 0.03 |
| matches_played_total | -0.04 | 1.07 | -0.05 | -0.10 | -0.13 | 0.01 |
| minutes_played_average_per_match | -0.13 | 0.10 | 0.11 | -0.01 | -0.05 | 0.32 |
| clean_sheets_total | 0.03 | 0.64 | -0.27 | 0.24 | -0.14 | -0.17 |
| goals_conceded_average_per_match | 0.27 | 0.10 | 0.78 | -0.60 | -0.02 | 0.12 |
| shots_saved | 0.08 | -0.24 | -0.00 | 1.05 | -0.01 | 0.03 |
| supersaves_average_per_match | -0.04 | 0.01 | 0.69 | 0.14 | 0.06 | -0.03 |
| penalties_saved | 0.02 | 0.22 | 0.04 | 0.00 | 0.04 | -0.10 |
| goalkeeper_interceptions | 0.14 | 0.29 | -0.07 | 0.16 | 0.08 | 0.13 |
| passes_average_per_match | 0.26 | -0.06 | 0.05 | 0.10 | -0.07 | 0.75 |
| opponents_shots_on_target_average_per_match | 0.06 | 0.02 | 0.94 | -0.01 | -0.01 | 0.11 |
| good_interception_of_goalkeeper_average_per_match | 0.05 | 0.02 | 0.03 | 0.10 | 0.01 | 0.48 |
| accurate_passes | 1.13 | -0.08 | -0.06 | -0.08 | -0.11 | -0.04 |
| key_passes_accuracy | -0.05 | 0.26 | -0.03 | -0.03 | -0.02 | 0.10 |
| foot_passes_from_open_play_average_per_match | 0.08 | -0.15 | -0.12 | -0.01 | 0.13 | 0.56 |
| accurate_foot_passes_from_open_play | 0.47 | 0.21 | -0.14 | -0.11 | 0.11 | 0.06 |
| hand_passes_accurate | 0.72 | -0.06 | 0.11 | 0.14 | -0.03 | 0.10 |
| set_piece_passes_accurate | 0.95 | -0.11 | -0.03 | 0.05 | -0.11 | 0.04 |
| short_passes_accurate | 0.51 | 0.18 | 0.06 | 0.09 | -0.05 | 0.01 |
| mid_range_passes_accurate | 0.92 | -0.05 | 0.02 | 0.02 | -0.01 | 0.01 |
| accurate_long_passes | 0.70 | -0.03 | 0.08 | 0.04 | 0.04 | -0.05 |
| d_lose_range_shots_saved | 0.05 | 0.14 | 0.12 | 0.40 | -0.10 | -0.07 |
| mid_range_shots_saved | 0.03 | -0.02 | 0.06 | 0.62 | -0.03 | 0.10 |
| x_of_long_range_shots_saved | 0.20 | 0.02 | 0.20 | 0.44 | 0.12 | -0.04 |
| jumping_saves_average_per_match | -0.06 | -0.09 | 0.65 | 0.21 | 0.02 | -0.04 |
| stopped_shots | -0.05 | 0.16 | -0.14 | 0.42 | 0.04 | 0.13 |
| x_g_conceded_total | -0.08 | 0.98 | 0.23 | -0.16 | -0.04 | 0.06 |
| Cronbach’s α | 0.88 | 0.16 | 0.80 | 0.71 | 0.59 | 0.49 |
#Perform factor analysis with factanal
res <- factanal(deneme[,-c(1,2,3,4,5,6,7)],6,scores = "regression")
#View results
res
##
## Call:
## factanal(x = deneme[, -c(1, 2, 3, 4, 5, 6, 7)], factors = 6, scores = "regression")
##
## Uniquenesses:
## club_point
## 0.909
## in_stat_index
## 0.488
## age
## 0.536
## height
## 0.465
## weight
## 0.364
## matches_played_total
## 0.018
## minutes_played_average_per_match
## 0.859
## clean_sheets_total
## 0.391
## goals_conceded_average_per_match
## 0.123
## shots_saved
## 0.005
## supersaves_average_per_match
## 0.498
## penalties_saved
## 0.938
## goalkeeper_interceptions
## 0.679
## passes_average_per_match
## 0.241
## opponents_shots_on_target_average_per_match
## 0.030
## good_interception_of_goalkeeper_average_per_match
## 0.699
## accurate_passes
## 0.006
## key_passes_accuracy
## 0.929
## foot_passes_from_open_play_average_per_match
## 0.656
## accurate_foot_passes_from_open_play
## 0.653
## hand_passes_accurate
## 0.270
## set_piece_passes_accurate
## 0.172
## short_passes_accurate
## 0.558
## mid_range_passes_accurate
## 0.168
## accurate_long_passes
## 0.456
## d_lose_range_shots_saved
## 0.723
## mid_range_shots_saved
## 0.551
## x_of_long_range_shots_saved
## 0.529
## jumping_saves_average_per_match
## 0.545
## stopped_shots
## 0.699
## x_g_conceded_total
## 0.141
##
## Loadings:
## Factor1 Factor2 Factor3
## club_point 0.239
## in_stat_index 0.198 0.617
## age 0.150 -0.140
## height 0.115
## weight 0.131
## matches_played_total 0.976
## minutes_played_average_per_match 0.103 0.135
## clean_sheets_total 0.134 0.653 -0.233
## goals_conceded_average_per_match 0.170 0.789
## shots_saved 0.385
## supersaves_average_per_match 0.103 0.681
## penalties_saved 0.213
## goalkeeper_interceptions 0.259 0.370
## passes_average_per_match 0.389 0.190
## opponents_shots_on_target_average_per_match 0.175 0.939
## good_interception_of_goalkeeper_average_per_match 0.164 0.110
## accurate_passes 0.970
## key_passes_accuracy 0.241
## foot_passes_from_open_play_average_per_match 0.155
## accurate_foot_passes_from_open_play 0.453 0.261
## hand_passes_accurate 0.725 0.105 0.233
## set_piece_passes_accurate 0.851 0.117
## short_passes_accurate 0.531 0.267 0.148
## mid_range_passes_accurate 0.845 0.150
## accurate_long_passes 0.668 0.170
## d_lose_range_shots_saved 0.186 0.239 0.153
## mid_range_shots_saved 0.255 0.176 0.115
## x_of_long_range_shots_saved 0.379 0.186 0.239
## jumping_saves_average_per_match 0.640
## stopped_shots 0.139 0.286 -0.105
## x_g_conceded_total 0.877 0.236
## Factor4 Factor5 Factor6
## club_point -0.145
## in_stat_index 0.179 0.246
## age 0.645
## height 0.713
## weight 0.784
## matches_played_total 0.101
## minutes_played_average_per_match 0.325
## clean_sheets_total 0.311 -0.114
## goals_conceded_average_per_match -0.413 0.223
## shots_saved 0.910
## supersaves_average_per_match 0.157
## penalties_saved
## goalkeeper_interceptions 0.240 0.137 0.201
## passes_average_per_match 0.170 0.730
## opponents_shots_on_target_average_per_match 0.216
## good_interception_of_goalkeeper_average_per_match 0.128 0.486
## accurate_passes 0.153 0.117
## key_passes_accuracy 0.110
## foot_passes_from_open_play_average_per_match 0.136 0.544
## accurate_foot_passes_from_open_play 0.205 0.151
## hand_passes_accurate 0.283 0.228
## set_piece_passes_accurate 0.229 0.185
## short_passes_accurate 0.224 0.122
## mid_range_passes_accurate 0.210 0.132 0.156
## accurate_long_passes 0.192 0.140
## d_lose_range_shots_saved 0.396
## mid_range_shots_saved 0.566 0.136
## x_of_long_range_shots_saved 0.460 0.145
## jumping_saves_average_per_match 0.191
## stopped_shots 0.395 0.166
## x_g_conceded_total 0.165
##
## Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
## SS loadings 4.775 3.215 2.847 2.446 1.790 1.629
## Proportion Var 0.154 0.104 0.092 0.079 0.058 0.053
## Cumulative Var 0.154 0.258 0.350 0.428 0.486 0.539
##
## Test of the hypothesis that 6 factors are sufficient.
## The chi square statistic is 1105.84 on 294 degrees of freedom.
## The p-value is 2.3e-94
#Look at loadings
loadings(res,sort=TRUE)
##
## Loadings:
## Factor1 Factor2 Factor3
## club_point 0.239
## in_stat_index 0.198 0.617
## age 0.150 -0.140
## height 0.115
## weight 0.131
## matches_played_total 0.976
## minutes_played_average_per_match 0.103 0.135
## clean_sheets_total 0.134 0.653 -0.233
## goals_conceded_average_per_match 0.170 0.789
## shots_saved 0.385
## supersaves_average_per_match 0.103 0.681
## penalties_saved 0.213
## goalkeeper_interceptions 0.259 0.370
## passes_average_per_match 0.389 0.190
## opponents_shots_on_target_average_per_match 0.175 0.939
## good_interception_of_goalkeeper_average_per_match 0.164 0.110
## accurate_passes 0.970
## key_passes_accuracy 0.241
## foot_passes_from_open_play_average_per_match 0.155
## accurate_foot_passes_from_open_play 0.453 0.261
## hand_passes_accurate 0.725 0.105 0.233
## set_piece_passes_accurate 0.851 0.117
## short_passes_accurate 0.531 0.267 0.148
## mid_range_passes_accurate 0.845 0.150
## accurate_long_passes 0.668 0.170
## d_lose_range_shots_saved 0.186 0.239 0.153
## mid_range_shots_saved 0.255 0.176 0.115
## x_of_long_range_shots_saved 0.379 0.186 0.239
## jumping_saves_average_per_match 0.640
## stopped_shots 0.139 0.286 -0.105
## x_g_conceded_total 0.877 0.236
## Factor4 Factor5 Factor6
## club_point -0.145
## in_stat_index 0.179 0.246
## age 0.645
## height 0.713
## weight 0.784
## matches_played_total 0.101
## minutes_played_average_per_match 0.325
## clean_sheets_total 0.311 -0.114
## goals_conceded_average_per_match -0.413 0.223
## shots_saved 0.910
## supersaves_average_per_match 0.157
## penalties_saved
## goalkeeper_interceptions 0.240 0.137 0.201
## passes_average_per_match 0.170 0.730
## opponents_shots_on_target_average_per_match 0.216
## good_interception_of_goalkeeper_average_per_match 0.128 0.486
## accurate_passes 0.153 0.117
## key_passes_accuracy 0.110
## foot_passes_from_open_play_average_per_match 0.136 0.544
## accurate_foot_passes_from_open_play 0.205 0.151
## hand_passes_accurate 0.283 0.228
## set_piece_passes_accurate 0.229 0.185
## short_passes_accurate 0.224 0.122
## mid_range_passes_accurate 0.210 0.132 0.156
## accurate_long_passes 0.192 0.140
## d_lose_range_shots_saved 0.396
## mid_range_shots_saved 0.566 0.136
## x_of_long_range_shots_saved 0.460 0.145
## jumping_saves_average_per_match 0.191
## stopped_shots 0.395 0.166
## x_g_conceded_total 0.165
##
## Factor1 Factor2 Factor3 Factor4 Factor5 Factor6
## SS loadings 4.775 3.215 2.847 2.446 1.790 1.629
## Proportion Var 0.154 0.104 0.092 0.079 0.058 0.053
## Cumulative Var 0.154 0.258 0.350 0.428 0.486 0.539
#Look at uniqueness
res$uniquenesses
## club_point
## 0.909269332
## in_stat_index
## 0.487731433
## age
## 0.535777137
## height
## 0.464880026
## weight
## 0.364160319
## matches_played_total
## 0.017729927
## minutes_played_average_per_match
## 0.858529486
## clean_sheets_total
## 0.391137384
## goals_conceded_average_per_match
## 0.122653211
## shots_saved
## 0.005000000
## supersaves_average_per_match
## 0.497739384
## penalties_saved
## 0.938324993
## goalkeeper_interceptions
## 0.678724412
## passes_average_per_match
## 0.241063087
## opponents_shots_on_target_average_per_match
## 0.030146460
## good_interception_of_goalkeeper_average_per_match
## 0.699253910
## accurate_passes
## 0.005749569
## key_passes_accuracy
## 0.929259972
## foot_passes_from_open_play_average_per_match
## 0.656017070
## accurate_foot_passes_from_open_play
## 0.653467534
## hand_passes_accurate
## 0.269957747
## set_piece_passes_accurate
## 0.171510424
## short_passes_accurate
## 0.558023678
## mid_range_passes_accurate
## 0.167586267
## accurate_long_passes
## 0.455648795
## d_lose_range_shots_saved
## 0.723092436
## mid_range_shots_saved
## 0.551177257
## x_of_long_range_shots_saved
## 0.529456861
## jumping_saves_average_per_match
## 0.545416063
## stopped_shots
## 0.698931848
## x_g_conceded_total
## 0.141365084
##Creating the new data
#Extract factor scores
coefs <- as.data.frame(res$scores)
#bind coefs with data
gk <- cbind(deneme, coefs)
#creating the weighted factors variables
gk <- gk %>%
mutate_at(vars(Factor1:Factor6), list(wf = ~. * club_point))
#The Analysis with unweighted coefs
#Check example of similar players to Volkan Babacan ---- UNWEIGHTED
Similarity <- 1-(rowSums(abs(sweep(gk[,39:44],2,unlist(gk[gk["Name"]=="Volkan Babacan",39:44]))))/
max(rowSums(abs(sweep(gk[,39:44],2,unlist(gk[gk["Name"]=="Volkan Babacan",39:44]))))))
SimilarityResult <- cbind.data.frame(Name = gk[,1], club = gk[,2], league = gk[,3], Similarity)
#Order results
SimilarityResult<-SimilarityResult[with(SimilarityResult, order(-Similarity)), ]
#Look at most similar players
SimilarityResult[1:10,]
## Name club league Similarity
## 276 Volkan Babacan Istanbul Turkish Turkcell Super Lig 1.0000000
## 39 Ralf Fahrmann Schalke 04 German Bundesliga 0.9367448
## 181 Ivan Villar Celta Spanish Primera Division 0.9269147
## 315 Dusan Melicharek Malmo Swedish Allsvenskan 0.9235908
## 145 Sven Ulreich Bayern German Bundesliga 0.9005396
## 263 Erwin Mulder Heerenveen Dutch Eredivisie 0.8973590
## 61 Tiago Sa Sporting Braga Portuguese Liga 0.8945471
## 160 Gaetan Poussin Bordeaux French Ligue 1 0.8867010
## 320 Yohann Pele Marseille French Ligue 1 0.8847592
## 271 Daniele Padelli Inter Italy Serie A 0.8812565
#The Analysis with weighted coefs with club strenght
#Check example of similar players to Volkan Babacan ---- WEIGHTED
Similarity_w <- 1-(rowSums(abs(sweep(gk[,45:50],2,unlist(gk[gk["Name"]=="Volkan Babacan",45:50]))))/
max(rowSums(abs(sweep(gk[,45:50],2,unlist(gk[gk["Name"]=="Volkan Babacan",45:50]))))))
SimilarityResult_w <- cbind.data.frame(Name = gk[,1], club = gk[,2], league = gk[,3], Similarity_w)
#Order results
SimilarityResult_w <-SimilarityResult_w[with(SimilarityResult_w, order(-Similarity_w)), ]
#Look at most similar players
SimilarityResult_w[1:10,]
## Name club league Similarity_w
## 276 Volkan Babacan Istanbul Turkish Turkcell Super Lig 1.0000000
## 181 Ivan Villar Celta Spanish Primera Division 0.9261653
## 39 Ralf Fahrmann Schalke 04 German Bundesliga 0.9165547
## 142 Neto Barcelona Spanish Primera Division 0.9134809
## 315 Dusan Melicharek Malmo Swedish Allsvenskan 0.9110077
## 61 Tiago Sa Sporting Braga Portuguese Liga 0.8863322
## 160 Gaetan Poussin Bordeaux French Ligue 1 0.8717047
## 271 Daniele Padelli Inter Italy Serie A 0.8627830
## 320 Yohann Pele Marseille French Ligue 1 0.8572126
## 119 Jed Steer Aston Villa Barclays Premier League 0.8500897