This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.

setwd("C:/Users/aziza/Documents/Data Mining")
install.packages("dplyr")
Error in install.packages : Updating loaded packages
library(dplyr)
install.packages("ggplot2")
Error in install.packages : Updating loaded packages
library(ggplot2)
install.packages("factoextra")
Error in install.packages : Updating loaded packages
library(factoextra)
install.packages("caret")
Error in install.packages : Updating loaded packages
library("caret")

Importation des données :

data = read.csv("fifa_players.csv")

Description des variables :

str(data)
'data.frame':   17954 obs. of  51 variables:
 $ name                         : chr  "L. Messi" "C. Eriksen" "P. Pogba" "L. Insigne" ...
 $ full_name                    : chr  "Lionel Andrés Messi Cuccittini" "Christian  Dannemann Eriksen" "Paul Pogba" "Lorenzo Insigne" ...
 $ birth_date                   : chr  "6/24/1987" "2/14/1992" "3/15/1993" "6/4/1991" ...
 $ age                          : int  31 27 25 27 27 27 20 30 32 32 ...
 $ height_cm                    : num  170 155 190 163 188 ...
 $ weight_kgs                   : num  72.1 76.2 83.9 59 88.9 92.1 73 69.9 92.1 77.1 ...
 $ positions                    : chr  "CF,RW,ST" "CAM,RM,CM" "CM,CAM" "LW,ST" ...
 $ nationality                  : chr  "Argentina" "Denmark" "France" "Italy" ...
 $ overall_rating               : int  94 88 88 88 88 88 88 89 89 89 ...
 $ potential                    : int  94 89 91 88 91 90 95 89 89 89 ...
 $ value_euro                   : int  110500000 69500000 73000000 62000000 60000000 59500000 81000000 64500000 38000000 60000000 ...
 $ wage_euro                    : int  565000 205000 255000 165000 135000 215000 100000 300000 130000 200000 ...
 $ preferred_foot               : chr  "Left" "Right" "Right" "Right" ...
 $ international_reputation.1.5.: int  5 3 4 3 3 3 3 4 5 4 ...
 $ weak_foot.1.5.               : int  4 5 4 4 3 3 4 4 4 4 ...
 $ skill_moves.1.5.             : int  4 4 5 4 2 2 5 4 1 3 ...
 $ body_type                    : chr  "Messi" "Lean" "Normal" "Normal" ...
 $ release_clause_euro          : int  226500000 133800000 144200000 105400000 106500000 114500000 166100000 119300000 62700000 111000000 ...
 $ national_team                : chr  "Argentina" "Denmark" "France" "Italy" ...
 $ national_rating              : int  82 78 84 83 NA 81 84 82 85 81 ...
 $ national_team_position       : chr  "RF" "CAM" "RDM" "LW" ...
 $ national_jersey_number       : int  10 10 6 10 NA 4 10 11 1 21 ...
 $ crossing                     : int  86 88 80 86 30 53 77 70 15 70 ...
 $ finishing                    : int  95 81 75 77 22 52 88 93 13 89 ...
 $ heading_accuracy             : int  70 52 75 56 83 83 77 77 25 89 ...
 $ short_passing                : int  92 91 86 85 68 79 82 81 55 78 ...
 $ volleys                      : int  86 80 85 74 14 45 78 85 11 90 ...
 $ dribbling                    : int  97 84 87 90 69 70 90 89 30 80 ...
 $ curve                        : int  93 86 85 87 28 60 77 82 14 77 ...
 $ freekick_accuracy            : int  94 87 82 77 28 70 63 73 11 76 ...
 $ long_passing                 : int  89 89 90 78 60 81 73 64 59 52 ...
 $ ball_control                 : int  96 91 90 93 63 76 91 89 46 82 ...
 $ acceleration                 : int  91 76 71 94 70 74 96 88 54 75 ...
 $ sprint_speed                 : int  86 73 79 86 75 77 96 80 60 76 ...
 $ agility                      : int  93 80 76 94 50 61 92 86 51 77 ...
 $ reactions                    : int  95 88 82 83 82 87 87 90 84 91 ...
 $ balance                      : int  95 81 66 93 40 49 83 91 35 59 ...
 $ shot_power                   : int  85 84 90 75 55 81 79 88 25 87 ...
 $ jumping                      : int  68 50 83 53 81 88 75 81 77 88 ...
 $ stamina                      : int  72 92 88 75 75 75 83 76 43 92 ...
 $ strength                     : int  66 58 87 44 94 92 71 73 80 78 ...
 $ long_shots                   : int  94 89 82 84 15 64 78 83 16 79 ...
 $ aggression                   : int  48 46 78 34 87 82 62 65 29 84 ...
 $ interceptions                : int  22 56 64 26 88 88 38 24 30 48 ...
 $ positioning                  : int  94 84 82 83 24 41 88 92 12 93 ...
 $ vision                       : int  94 91 88 87 49 60 82 83 70 77 ...
 $ penalties                    : int  75 67 82 61 33 62 70 83 47 85 ...
 $ composure                    : int  96 88 87 83 80 87 86 90 70 82 ...
 $ marking                      : int  33 59 63 51 91 90 34 30 17 52 ...
 $ standing_tackle              : int  28 57 67 24 88 89 34 20 10 45 ...
 $ sliding_tackle               : int  26 22 67 22 87 84 32 12 11 39 ...
mod.pos = unique(data$positions)
mod.pos
  [1] "CF,RW,ST"      "CAM,RM,CM"     "CM,CAM"        "LW,ST"         "CB"            "RW,ST,RM"     
  [7] "ST"            "GK"            "CDM,CM"        "CF,ST"         "RW,ST"         "CAM,RW"       
 [13] "CDM"           "CM,CDM"        "LB"            "CM,CAM,CDM"    "CAM,CM,LW"     "CAM,CM,RM"    
 [19] "LW,ST,LM"      "CAM,CM"        "CAM"           "LW,RW"         "RW,LW"         "CM,LM"        
 [25] "CM"            "RM,RW"         "LM,RM,CAM,LW"  "LM"            "RW,RM"         "LM,ST,RM"     
 [31] "RB"            "LM,RM,LW,RW"   "LM,CAM,RM"     "RM,LM,ST"      "RW"            "LM,ST,RM,LW"  
 [37] "CM,CDM,CAM"    "CM,CDM,CB"     "CB,CDM"        "CF,ST,CAM"     "CDM,CB"        "RB,RWB"       
 [43] "RM"            "LM,CAM"        "CM,LW"         "LB,LWB,LM"     "ST,LW"         "LM,LB,CM"     
 [49] "RB,RM"         "RM,LM,CM"      "LM,RM,CAM"     "RM,RW,ST"      "LW,LM,RW"      "LB,LM"        
 [55] "ST,LW,LM"      "RM,CM"         "CM,RM"         "CB,RB"         "RM,RW,LM"      "LM,RM"        
 [61] "CDM,CB,CM"     "CAM,CDM"       "LB,CDM"        "CAM,RM,LM"     "RM,CAM,CM"     "LWB"          
 [67] "CDM,CM,LM"     "CDM,CM,RM"     "CDM,CM,CB"     "CAM,LW"        "RM,LM"         "ST,CAM"       
 [73] "LM,CM"         "ST,CF,CAM,LM"  "LWB,LB,CB,LM"  "RB,LB"         "LW"            "CAM,ST"       
 [79] "LM,RM,LW"      "LWB,LM,LB"     "ST,CF,LM,CAM"  "LM,LW,ST"      "LB,LWB"        "RWB,RB,RM"    
 [85] "ST,RW"         "CAM,CM,LM"     "RB,RW"         "RM,CAM,LM"     "ST,RM"         "RM,ST"        
 [91] "CM,LM,RM"      "CM,CAM,LM"     "RW,RWB"        "LM,RB"         "RM,LM,RW,CAM"  "ST,RM,RWB,LM" 
 [97] "LB,RB"         "CM,CDM,RM"     "LM,LW"         "CAM,ST,RM"     "ST,CF"         "CB,LB"        
[103] "RWB,RM"        "LM,CM,LW"      "CF,LW"         "CAM,CM,RW"     "CAM,LM,RW"     "ST,RW,LW,CF"  
[109] "CM,RM,RB"      "RM,CM,LM"      "LW,RM"         "CB,RB,LB"      "CAM,CF,ST"     "CAM,CF"       
[115] "CAM,CM,ST"     "RB,CB"         "ST,LM,RM"      "CM,RM,LM"      "LWB,LB,LM,CDM" "RM,RW,CM,CAM" 
[121] "LB,RB,RWB"     "RB,RWB,RM"     "CAM,RM"        "LWB,LB,LM"     "CM,LM,CAM"     "CAM,LM,RM,RW" 
[127] "ST,LM"         "RM,CAM"        "CAM,LM,RM"     "LW,RW,CAM"     "CF,ST,LW"      "LM,RW"        
[133] "RW,LW,LM"      "RWB,LB"        "LM,ST"         "RB,RM,LB,LM"   "LWB,LM"        "LW,CF"        
[139] "ST,RW,LW,CAM"  "CAM,CDM,RM"    "LWB,LB,RB,RWB" "CM,RW,CAM"     "RB,CM"         "CM,CDM,CAM,RM"
[145] "RM,RB"         "CM,LM,CDM"     "LM,CM,CAM"     "LM,RM,CM"      "RM,LM,RW"      "LM,RM,RW"     
[151] "RW,LW,ST"      "LW,RW,RM"      "LWB,LB"        "CDM,RB"        "RW,RM,CAM"     "CB,LB,RB"     
[157] "CM,CAM,ST"     "CF,CAM,ST"     "RW,CAM,RM"     "CDM,LB,CM"     "CAM,LM,CM"     "LB,LWB,CDM"   
[163] "RM,RB,RW"      "RM,CAM,LM,CM"  "RB,RWB,LB"     "CAM,CM,CDM"    "LM,RM,ST,LB"   "CDM,CM,CAM"   
[169] "CF,CAM"        "RM,LM,CAM"     "RW,LW,RM"      "LW,CAM"        "RB,CB,RWB,RM"  "ST,LW,RW"     
[175] "CAM,RW,ST"     "ST,CAM,RW,LW"  "LB,CB,LM"      "CM,LM,LB"      "RB,LB,RWB"     "ST,RM,LM"     
[181] "LB,CB"         "ST,CAM,CF"     "CDM,CAM,CM,LM" "CAM,LM"        "LB,CM,LWB"     "RW,CAM"       
[187] "LM,CM,RM,CAM"  "CAM,RW,LW"     "RM,LM,RW,RWB"  "CDM,RB,RM"     "LW,LM,RM,RW"   "ST,RW,LW"     
[193] "RB,CB,CDM"     "CAM,CF,RW,RM"  "LM,CF,CAM"     "ST,RW,CF"      "CM,RB"         "CF,ST,CAM,LM" 
[199] "RM,LM,CM,CDM"  "ST,RM,LM,CAM"  "LM,LW,CAM"     "CAM,LB,CM"     "RM,RW,ST,CAM"  "LM,LB"        
[205] "LW,CM,CAM"     "LW,LM"         "RM,LM,CAM,ST"  "LW,ST,RW"      "LM,LWB,LB,LW"  "LW,LM,CF"     
[211] "CAM,CM,RWB"    "CAM,CF,CM,RM"  "LM,CAM,LW"     "LM,RM,ST"      "RB,CDM,CM"     "CM,CDM,LM"    
[217] "LW,LM,RW,CAM"  "LM,LW,RM"      "RM,LW,CAM"     "CM,LB"         "CM,CDM,LW"     "CM,CAM,LM,CDM"
[223] "ST,LM,CAM"     "CF,LM,ST,CAM"  "RB,CB,RM"      "CAM,RB"        "RB,CB,RWB"     "LB,LW"        
[229] "CDM,RM,RB"     "RB,CB,LB"      "LM,CAM,RM,CF"  "ST,RM,CAM,LM"  "LM,RM,CM,CAM"  "RB,RM,RWB"    
[235] "LB,CB,LWB"     "CB,RB,CDM"     "RM,ST,RW"      "CM,CF"         "CB,CDM,CM"     "CDM,CAM"      
[241] "LM,LW,RW"      "LM,CF"         "LM,LW,RM,ST"   "LB,LM,LWB"     "ST,LM,RW,RM"   "RW,RM,CM"     
[247] "RM,RW,LW"      "CAM,RM,LM,CM"  "CAM,CM,RM,LM"  "RB,LB,CDM,CM"  "RB,LB,CB"      "CM,RM,CDM"    
[253] "LW,RW,LM"      "ST,CF,CAM"     "CM,LB,CDM"     "RW,CM"         "CDM,CAM,CM"    "LM,RM,CF"     
[259] "LW,CAM,ST"     "RB,LB,RM"      "RB,LB,CDM"     "RB,CDM"        "CDM,CM,CB,LM"  "ST,RM,CAM"    
[265] "CM,RM,CAM"     "CB,LB,CDM"     "RB,CM,RM"      "LB,CB,CDM"     "RM,RWB"        "CAM,LB,CM,RM" 
[271] "CB,CM"         "RM,CM,RB"      "CAM,LM,ST"     "LB,LWB,CB,LM"  "RM,CAM,ST"     "LB,RB,RM"     
[277] "RWB,RB,LWB"    "RM,RWB,LM,CAM" "RWB,CB"        "LM,CM,ST"      "CM,CB,CDM"     "RW,LW,CM"     
[283] "CDM,CM,CAM,RM" "RW,RM,LW,ST"   "CAM,LM,RM,LB"  "LB,RB,LM"      "RM,ST,LM"      "ST,RW,CAM"    
[289] "LM,ST,CAM"     "RW,CAM,LW"     "RB,RM,LB"      "RB,LM"         "RB,RW,LW"      "CM,RW"        
[295] "LB,LW,RW"      "CDM,RB,CM"     "LM,CAM,CM"     "RM,RW,LM,LW"   "LM,LWB,CM"     "RW,RM,LM,ST"  
[301] "CAM,LW,RB"     "CM,LW,RW,LM"   "ST,CF,RW"      "ST,RW,CF,RM"   "RB,CM,CDM"     "LW,LB,RW,LM"  
[307] "LM,ST,LW"      "LB,LM,RM"      "CAM,CM,RM,ST"  "RW,LB"         "CAM,RM,RW"     "CDM,LWB"      
[313] "CM,CAM,CF"     "CAM,ST,LM"     "CM,CDM,CAM,ST" "CDM,LM,CAM,CM" "ST,CAM,LM"     "CF,ST,CAM,LW" 
[319] "RM,LM,RW,LW"   "CB,RB,LB,CDM"  "RB,RM,LM"      "ST,LW,CAM"     "CDM,RM,CAM"    "CB,CDM,RB"    
[325] "LM,LWB"        "RB,LB,LWB"     "CAM,CM,LM,RM"  "LM,CAM,RW,CF"  "CAM,ST,CM"     "LM,CM,RM"     
[331] "LM,ST,CM"      "RM,RB,CB"      "LB,CDM,CM"     "LB,RB,CB"      "LW,RW,CM,RB"   "RW,RM,CAM,ST" 
[337] "RM,RWB,ST"     "LM,CAM,RM,CDM" "CM,CDM,CAM,LM" "CB,RB,CM"      "CAM,CDM,CM"    "RM,CAM,CM,LM" 
[343] "LB,LM,LW"      "ST,CF,RM"      "RB,LM,RM"      "RW,LM,CAM"     "LB,CB,LM,CM"   "RB,RM,CB"     
[349] "CF,RW,CM"      "LB,LM,RB"      "RWB,RB"        "ST,LM,LW"      "LB,CM"         "LB,LM,ST"     
[355] "CM,CDM,RB,RWB" "LB,RWB,LWB"    "RM,RB,LB"      "LW,RW,ST"      "RB,CDM,CB,LB"  "LM,RM,CM,CDM" 
[361] "LM,CAM,CDM"    "RM,LM,CF,CAM"  "CDM,RM,CM"     "CB,LWB,LM"     "RM,ST,LM,RW"   "RM,LM,RB,CF"  
[367] "CF,LM"         "RM,RB,RWB"     "RB,LB,LWB,RWB" "RM,RWB,RB"     "CM,LW,LWB"     "LW,RB,LB"     
[373] "LB,RM"         "LW,LM,ST,RM"   "RM,ST,CAM"     "CM,ST"         "LM,RW,LW"      "CAM,CM,CF"    
[379] "CM,LM,LW,CAM"  "LW,CAM,RW"     "ST,CF,LW"      "RM,CM,ST,LM"   "ST,RW,RM"      "LW,LWB,LB,RW" 
[385] "RWB,RM,RB,LB"  "RW,CAM,ST"     "RWB,RB,RM,CM"  "ST,RM,RW"      "RM,LW,CAM,ST"  "CM,CAM,CDM,RM"
[391] "CDM,CM,RB,CB"  "RB,CDM,LB"     "RWB,RB,RW"     "RWB"           "CM,CDM,LB"     "ST,CM"        
[397] "RB,LB,CM"      "RM,LM,RB"      "CM,CAM,LW"     "CF,ST,RM,CAM"  "RM,RB,LM"      "RM,LW"        
[403] "ST,LM,CF"      "CF"            "RB,RW,CAM,LW"  "LWB,LM,LB,RWB" "RWB,RM,RB"     "CM,CAM,RM"    
[409] "LWB,RWB"       "CM,ST,RM"      "CAM,CF,CM"     "RW,RM,LM"      "LW,RM,RW"      "RW,CM,CAM,LW" 
[415] "LM,RM,CAM,ST"  "CM,RM,LM,CAM"  "CDM,LB"        "RB,LW"         "RW,RB"         "ST,LW,RW,LM"  
[421] "CDM,CM,RB"     "CAM,LB"        "CM,ST,RW"      "LB,LM,CM"      "CM,CDM,RB"     "CF,ST,RW"     
[427] "RB,RM,LB,RWB"  "LW,CM"         "RWB,LWB"       "CAM,LM,RM,ST"  "RM,LM,CF"      "LB,CB,RB"     
[433] "CB,LB,LM"      "RM,RW,CAM"     "LB,RW,LW"      "RB,LM,LB,RM"   "LM,CAM,LB,LW"  "RB,LB,RWB,LWB"
[439] "LW,RW,LB,CM"   "CM,LM,CB"      "LM,RM,LB"      "LB,RB,CDM"     "CB,RB,RWB"     "CDM,LM"       
[445] "LM,LW,CM"      "RM,CF"         "CB,CDM,CM,CAM" "LM,CM,CDM,LB"  "LWB,LW"        "LM,RM,RB"     
[451] "RWB,RB,CDM"    "RM,LM,LWB"     "LB,CAM,LM"     "LW,LM,RM"      "LB,CDM,CB"     "LB,RB,LM,RM"  
[457] "LB,CB,CM"      "CB,LB,CM"      "LW,LM,ST"      "LW,LB"         "ST,CAM,RW"     "CDM,CB,RB"    
[463] "CM,CF,RB"      "CM,CDM,CB,RB"  "CM,RWB,CDM"    "RB,RM,CDM"     "CAM,LM,LB"     "CM,CB"        
[469] "RB,RW,RM"      "LM,LB,CB"      "CAM,RW,CM"     "LB,LWB,CAM,LM" "ST,LW,CDM"     "ST,RW,LM"     
[475] "CB,LWB"        "RM,RB,CAM,CM"  "RM,CF,LM"      "RW,LW,CAM"     "ST,LM,LWB"     "CAM,RW,RWB"   
[481] "LB,LW,ST"      "RB,RWB,CB"     "CM,CAM,RW"     "CB,CDM,LB"     "RM,LM,ST,CAM"  "ST,RM,LW"     
[487] "RM,RB,CDM"     "CM,RB,RM"      "LM,CAM,LB"     "CDM,RM"        "LB,LWB,CB"     "RB,RWB,CDM"   
[493] "CF,RM,CM"      "LM,CM,LB"      "CM,RWB,RM"     "LWB,LB,CB"     "RB,CB,LB,CDM"  "CF,RM,LM"     
[499] "RM,CAM,CF,LB"  "RM,RW,RWB"     "CDM,CM,RB,LM"  "LM,RM,LB,RB"   "RW,ST,CM"      "RB,RWB,RM,RW" 
[505] "CDM,CM,LB"     "CAM,LW,RW"     "LB,RB,LW"      "CAM,RW,LW,CF"  "CAM,RM,RWB"    "CAM,ST,CF"    
[511] "RW,LW,CAM,CM"  "LM,LB,LW"      "CAM,LM,LW"     "CM,CDM,RWB"    "CB,CM,CDM"     "ST,CB"        
[517] "LM,RM,LWB,CM"  "LM,LB,ST"      "LW,CM,RW"      "LW,CAM,CM,RB"  "RB,CAM"        "LM,LW,CF"     
[523] "CF,RW"         "CM,RWB"        "RB,ST"         "CB,ST"         "CF,CM,ST"      "LB,LM,CB"     
[529] "CDM,CM,CF"     "ST,LM,LW,RW"   "CM,LB,RM"      "LM,CM,RM,RB"   "RM,RW,RB,RWB"  "RW,LM"        
[535] "RW,RM,CF"      "CM,RM,LW"      "CAM,LW,CF"     "ST,LM,RM,CF"   "CAM,RM,CM,ST"  "LB,LM,LWB,CB" 
[541] "RM,CB,RB,RWB"  "LM,LWB,RM,RWB" "CM,CAM,RM,CF"  "CM,LM,LB,CF"   "RM,LM,LB"      "LM,ST,LB"     
[547] "RM,CAM,CF"     "CB,RWB,RB"     "RW,LW,RM,ST"   "LW,RW,CM"      "RM,CAM,LM,CDM" "LW,RB"        
[553] "LB,LWB,CM"     "CDM,RB,CB"     "LM,CDM,LB"     "LB,LW,LWB"     "LB,RB,CM"      "RB,RM,RW"     
[559] "CF,CAM,RM"     "RB,CDM,CM,RM"  "RWB,LM,LWB,RB" "CAM,RW,RM"     "RM,RB,CM"      "CB,LB,LWB"    
[565] "CDM,LB,LM,CB"  "ST,CAM,RM"     "LWB,CB"        "CB,CM,RWB"     "LM,LB,CDM,CM"  "CB,RWB"       
[571] "CM,ST,LM"      "RWB,LW,RW"     "CM,CB,CAM"     "CDM,RB,LB"     "LB,LM,CAM"     "RM,LM,CAM,CM" 
[577] "LM,RM,LWB"     "CDM,CB,LB"     "LB,CM,RB"      "RW,ST,LW,CF"   "CDM,CM,CB,ST"  "LM,LW,RM,RW"  
[583] "RW,CM,CAM"     "LM,ST,RM,CAM"  "RW,ST,LW"      "LWB,LB,CDM"    "ST,CAM,LM,CM"  "CAM,LM,LWB,LB"
[589] "CB,ST,CM"      "CAM,CF,ST,RM"  "RM,RB,ST"      "RWB,LM,ST"     "CM,LWB,LB"     "RB,ST,RM"     
[595] "CAM,RM,ST"     "LM,CDM"        "CM,CDM,LM,LB"  "CM,CB,LB"      "RB,CDM,RM"     "RW,RM,LW,LM"  
[601] "RM,LM,LW"      "CDM,RM,CM,CAM" "CB,RM"         "LM,LB,LWB"     "LM,LW,CDM,LWB" "RM,CB,RB"     
[607] "LW,RW,CF,ST"   "ST,LM,RW"      "LB,CM,LM"      "CAM,RM,CDM"    "RM,RW,CF"      "CF,RM"        
[613] "CM,CDM,CAM,CF" "LB,RB,RM,CDM"  "CF,ST,CM"      "RM,LM,RB,CM"   "LM,RM,ST,CAM"  "ST,RM,RW,LM"  
[619] "CAM,CF,LM"     "RB,CDM,RWB"    "LM,ST,CF"      "CF,RM,CAM"     "LW,LM,CAM"     "RM,ST,LM,LW"  
[625] "ST,RM,LM,LW"   "CM,LW,LM,CAM"  "ST,RM,RW,LW"   "RB,CDM,CB"     "LW,LM,RW,RM"   "CB,LM,LB,LWB" 
[631] "RM,RWB,LM"     "ST,RB"         "CM,LM,RM,CAM"  "RB,CB,CM,CDM"  "RM,LW,LM"      "CB,RB,RM"     
[637] "RW,CF"         "CF,CAM,LW"     "RW,ST,CAM,RM"  "RM,CM,CDM"     "LM,LB,RM"      "LW,LM,ST,LWB" 
[643] "CAM,CF,RM"     "CAM,RM,ST,LM"  "CM,LB,LM"      "LM,CM,CDM,RM"  "RW,RM,LB,CM"   "CM,RW,RM"     
[649] "LB,CB,CDM,RB"  "CM,RM,CDM,LM"  "CM,RW,LM"      "CM,RB,CDM,LB"  "CM,LWB"        "RWB,RM,LM"    
[655] "CAM,RM,CM,LM"  "RB,CDM,CB,CM"  "ST,LW,CM"      "RB,CDM,RM,LB"  "RW,RM,CM,CAM"  "LM,RM,RW,CM"  
[661] "CDM,RM,LM"     "LW,RW,CF"      "CAM,RM,RB"     "CF,ST,RW,CAM"  "LM,RM,ST,CF"   "LW,RW,CAM,ST" 
[667] "RW,CF,LW"      "CF,CM"         "CDM,RM,LM,RB"  "CM,RW,LW"      "RM,CM,RB,RWB"  "CF,CM,LW"     
[673] "LB,CB,CDM,LM"  "RM,LW,RW"      "CAM,ST,CF,LM"  "CDM,LB,CM,RB"  "RB,RM,ST"      "LB,RB,RM,LM"  
[679] "RM,LM,RB,LB"   "CDM,CM,LM,CAM" "CDM,CM,RM,RB"  "CM,CAM,RM,CDM" "LM,LW,RW,ST"   "LB,LM,LWB,RM" 
[685] "CAM,LM,CDM"    "CAM,ST,RM,LM"  "LM,CAM,ST"     "LB,LM,RB,CM"   "CAM,CDM,CM,RM" "CAM,RM,LM,ST" 
[691] "LM,CM,RM,CDM"  "LW,CAM,LM"     "CF,CAM,LM"     "LWB,RWB,LM,RM" "LM,LW,RW,RM"   "RM,CAM,LM,ST" 
[697] "RM,CF,CAM"     "LW,LM,RM,CAM"  "LM,LWB,LB"     "CM,RM,CDM,RB"  "CB,CDM,CAM"    "LW,RM,ST,LM"  
[703] "ST,LM,RM,CM"   "CM,RB,CDM"     "LB,CB,LM,LWB"  "RM,CM,LM,CAM"  "RW,RM,ST"      "CB,LM,LB"     
[709] "LW,RW,CAM,LM"  "CM,CDM,RM,LM"  "RM,ST,LM,CAM"  "ST,RM,LM,CF"   "RB,RM,LM,CM"   "CAM,LW,ST"    
[715] "CAM,CM,LM,ST"  "LM,LWB,LW"     "CM,RM,LM,CDM"  "CM,RB,LB"      "RM,LM,LW,RW"   "LW,CAM,RW,RM" 
[721] "LM,RM,CAM,CM"  "LM,RM,LW,CM"   "CB,CM,CDM,CAM" "LB,LM,CDM"     "RB,CB,LB,RWB"  "LWB,LB,RB"    
[727] "LM,LB,RB"      "RB,RWB,LWB,LB" "CAM,LW,CM"     "LWB,LM,CB,RWB" "LM,CM,RM,ST"   "ST,CF,LM,RW"  
[733] "RW,CAM,CF"     "RM,RW,CAM,ST"  "RWB,RB,CDM,RM" "LW,LM,CAM,ST"  "CM,RB,CAM"     "CM,ST,CAM"    
[739] "LW,RW,ST,RM"   "LM,RM,CM,ST"   "CF,LW,RW"      "CM,RM,LB"      "LW,LM,CAM,CF"  "CB,RB,CAM"    
[745] "LM,ST,CAM,LW"  "LM,CAM,CM,CF"  "RB,RW,LB"      "RM,LM,RW,LWB"  "RM,RW,RB,LM"   "CDM,RB,CB,CM" 
[751] "LM,ST,RW"      "RM,LM,RW,ST"   "LM,LW,RM,RB"   "CAM,RW,CF,RM"  "LB,CM,LW"      "ST,LW,LM,CF"  
[757] "RM,LM,CDM,CM"  "LW,CAM,RW,LM"  "LM,RM,LW,ST"   "RW,CAM,CM"     "ST,CAM,LW,RW"  "LW,LB,CM,LM"  
[763] "LM,LB,CAM"     "CAM,LW,LM,ST"  "CB,LWB,LB"     "CAM,RW,CF"     "CB,CM,CAM"     "RM,RW,ST,LM"  
[769] "LW,CF,RM,RW"   "RW,CM,RM"      "CAM,CM,CF,RM"  "CM,CF,LW"      "CDM,CM,CB,RM"  "CF,ST,LM,CAM" 
[775] "CM,RM,LB,RB"   "RM,CM,RW,CAM"  "RM,RB,CDM,CM"  "CAM,LM,ST,CM"  "CAM,LM,CM,CF"  "RM,CM,CAM,RB" 
[781] "LB,RB,LM,CM"   "RW,RM,ST,LW"   "RM,CM,LM,LB"   "RW,LM,RM,ST"   "RM,LM,CAM,RW"  "LB,LWB,LM,CB" 
[787] "CAM,RM,CF"     "CAM,CM,RM,CF"  "CM,RM,RWB"     "LM,ST,RB"      "ST,CAM,RM,LM"  "RM,LM,RWB,RB" 
[793] "RW,CAM,RM,CF"  "CM,LM,RB,RM"   "LB,RM,LM"      "RM,LM,RWB"     "RM,CM,CDM,RWB" "CAM,CDM,RM,CM"
[799] "RW,LM,RM"      "CAM,LW,LM,CM"  "RW,CAM,ST,LW"  "LM,CDM,CM,RM"  "RB,LB,ST"      "LM,LB,CDM"    
[805] "CDM,RWB"       "CAM,ST,LW"     "CAM,CF,RW"     "RM,CAM,RW"     "ST,RWB,RM"     "LM,CM,LWB"    
[811] "CF,ST,LM"      "CM,RB,CB"      "CAM,LM,ST,RM"  "ST,RW,LM,LW"   "RB,LB,LM"      "RM,RB,LM,LB"  
[817] "LM,RM,RWB"     "CF,CAM,ST,LW"  "CM,LM,CDM,CAM" "ST,CAM,LM,CF"  "CAM,CM,LM,CDM" "LM,LW,LB"     
[823] "CAM,LM,CM,ST"  "RM,CM,RW"      "CAM,LM,RM,CM"  "CAM,LM,CF"     "RW,RM,CF,CAM"  "CM,RM,CDM,CAM"
[829] "RM,RW,CM"      "CAM,CM,CDM,RM" "CAM,ST,LM,RM"  "CF,CAM,CM,ST"  "CAM,LM,LW,CM"  "LM,RM,ST,LW"  
[835] "LW,CAM,LM,CM"  "ST,CM,RB"      "RB,LW,LB,RW"   "CM,CDM,LWB"    "ST,LW,RW,CAM"  "RB,RM,CAM,LM" 
[841] "RB,CB,CM"      "RW,ST,CAM"     "CM,CAM,LM,RM"  "CDM,CAM,LM,LB" "CDM,LWB,LB"    "LWB,CM,LM"    
[847] "LW,LB,LM"      "RM,CF,LM,CAM"  "RWB,LWB,LB,RB" "CAM,CM,CF,CDM" "CM,LW,RW"      "LW,RM,LM,RW"  
[853] "LW,LM,CM"      "LM,CAM,CF,RM"  "LWB,LM,LB,CM"  "CM,CAM,RM,LM"  "LW,ST,LM,CAM"  "RM,RWB,RW,CM" 
[859] "CAM,ST,CF,CM"  "CAM,ST,CDM"    "RM,CM,LM,RB"   "RB,LB,LM,RM"   "ST,LW,RM"      "CAM,LM,RM,CF" 
[865] "RB,RWB,RM,LWB" "CDM,CM,RB,RM"  "RM,LWB"        "CM,RM,CF"      "RB,RW,CM"      "RWB,RM,RB,LWB"
[871] "CF,RW,RM,LW"   "RW,ST,RM,LW"   "LW,CF,ST"      "LB,LW,CM"      "CM,LM,CAM,CDM" "CM,CAM,CDM,LM"
[877] "RW,LW,CAM,ST"  "CM,ST,CAM,CF"  "CAM,LW,CM,RM"  "CF,ST,LM,RM"   "CF,CAM,CM"     "CF,LM,RM"     
[883] "CAM,CM,RW,LW"  "CAM,CM,RM,RW"  "RB,RM,CM"      "RW,LW,CF"      "CF,LW,ST"      "CF,LW,CAM"    
[889] "CAM,RM,RW,CF"  "CAM,CM,RW,RM" 
positions.list <- unique(unlist(strsplit(as.character(data$positions), ",")))
positions.list
 [1] "CF"  "RW"  "ST"  "CAM" "RM"  "CM"  "LW"  "CB"  "GK"  "CDM" "LB"  "LM"  "RB"  "RWB" "LWB"
  1. Goalkeeper GK: Goalkeeper
  2. Defense CB: Center Back LB: Left Back RB: Right Back LWB: Left Wing Back RWB: Right Wing Back
  3. Midfield CDM: Central Defensive Midfielder CM: Central Midfielder CAM: Central Attacking Midfielder RM: Right Midfielder LM: Left Midfielder
  4. Attack CF: Center Forward ST: Striker LW: Left Winger RW: Right Winger
data$is_goalkeeper <- sapply(data$positions, function(x) as.integer("GK" %in% unlist(strsplit(x, ","))))
install.packages("ggplot2")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:

https://cran.rstudio.com/bin/windows/Rtools/
Warning in install.packages :
  le package ‘ggplot2’ est en cours d'utilisation et ne sera pas installé
install.packages("factoextra")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:

https://cran.rstudio.com/bin/windows/Rtools/
Warning in install.packages :
  le package ‘factoextra’ est en cours d'utilisation et ne sera pas installé
data$is_defense <- sapply(data$positions, function(x) as.integer(any(unlist(strsplit(x, ",")) %in% c("CB", "LB", "RB", "LWB", "RWB"))))
install.packages("dplyr")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:

https://cran.rstudio.com/bin/windows/Rtools/
Warning in install.packages :
  le package ‘dplyr’ est en cours d'utilisation et ne sera pas installé
data$is_midfield <- sapply(data$positions, function(x) as.integer(any(unlist(strsplit(x, ",")) %in% c("CDM", "CM", "CAM", "RM", "LM"))))

data$is_attack <- sapply(data$positions, function(x) as.integer(any(unlist(strsplit(x, ",")) %in% c("CF", "ST", "LW", "RW"))))
data$positions = NULL
unique(data$national_team_position)
 [1] "RF"  "CAM" "RDM" "LW"  ""    "LCB" "RM"  "SUB" "GK"  "LS"  "CDM" "LF"  "RCB" "LB"  "ST"  "LAM" "RW"  "CB" 
[19] "LCM" "CM"  "LM"  "RB"  "RCM" "LDM" "RWB" "LWB" "RS"  "RES" "CF"  "RAM"
data$national_team_position = NULL
data$national_team = NULL
data$birth_date = NULL
data$full_name = NULL
data$name = NULL
nb_lignes_vides <- sum(apply(data, 1, function(row) {
  all(is.na(row) | row == "" | trimws(row) == "")
}))

# Afficher le résultat
cat("Nombre de lignes vides :", nb_lignes_vides, "\n")
Nombre de lignes vides : 0 
nb_lignes_avec_vide <- sum(apply(data, 1, function(row) {
  any(is.na(row) | row == "" | trimws(row) == "")
}))

# Afficher le résultat
cat("Nombre de lignes avec au moins une colonne vide :", nb_lignes_avec_vide, "\n")
Nombre de lignes avec au moins une colonne vide : 17165 
# Calculer le pourcentage de valeurs manquantes par colonne
pourcentage_manquants <- sapply(data, function(col) {
  mean(is.na(col) | col == "" | trimws(col) == "") * 100
})

# Trier les colonnes par pourcentage de valeurs manquantes (du plus grand au plus petit)
pourcentage_manquants_tries <- sort(pourcentage_manquants, decreasing = TRUE)

# Afficher le résultat
cat("Pourcentage de valeurs manquantes par colonne (trié) :\n")
Pourcentage de valeurs manquantes par colonne (trié) :
print(pourcentage_manquants_tries)
              national_rating        national_jersey_number           release_clause_euro 
                    95.226690                     95.226690                     10.231703 
                   value_euro                     wage_euro                           age 
                     1.420296                      1.370168                      0.000000 
                    height_cm                    weight_kgs                   nationality 
                     0.000000                      0.000000                      0.000000 
               overall_rating                     potential                preferred_foot 
                     0.000000                      0.000000                      0.000000 
international_reputation.1.5.                weak_foot.1.5.              skill_moves.1.5. 
                     0.000000                      0.000000                      0.000000 
                    body_type                      crossing                     finishing 
                     0.000000                      0.000000                      0.000000 
             heading_accuracy                 short_passing                       volleys 
                     0.000000                      0.000000                      0.000000 
                    dribbling                         curve             freekick_accuracy 
                     0.000000                      0.000000                      0.000000 
                 long_passing                  ball_control                  acceleration 
                     0.000000                      0.000000                      0.000000 
                 sprint_speed                       agility                     reactions 
                     0.000000                      0.000000                      0.000000 
                      balance                    shot_power                       jumping 
                     0.000000                      0.000000                      0.000000 
                      stamina                      strength                    long_shots 
                     0.000000                      0.000000                      0.000000 
                   aggression                 interceptions                   positioning 
                     0.000000                      0.000000                      0.000000 
                       vision                     penalties                     composure 
                     0.000000                      0.000000                      0.000000 
                      marking               standing_tackle                sliding_tackle 
                     0.000000                      0.000000                      0.000000 
                is_goalkeeper                    is_defense                   is_midfield 
                     0.000000                      0.000000                      0.000000 
                    is_attack 
                     0.000000 
data$international_reputation.1.5.=NULL
data$national_jersey_number=NULL
data$release_clause_euro=NULL
data.num = data[sapply(data,is.numeric)==1]
data.quali = data[sapply(data,is.numeric)==0]
data.quali$is_goalkeeper <- data$is_goalkeeper

data.quali$is_defense <- data$is_defense

data.quali$is_midfield <- data$is_midfield

data.quali$is_attack <- data$is_attack
data.num$is_goalkeeper <- NULL

data.num$is_defense <- NULL

data.num$is_midfield <- NULL

data.num$is_attack <- NULL

Nous allons commencer par l’étude des variables numériques :

for (x in seq(1,length(data.num),1)) hist(data.num[,x],
                                     xlab = names(data.num[x]),
                                     col = rainbow(10), main = names(data.num[x]))

for (x in seq(1,length(data.num),1)) boxplot(data.num[,x],
                                     xlab = names(data.num[x]),
                                     col = "cyan", main = names(data.num[x]))

Pour Wage_euro et value_euro : Il y a plusieurs outliers. Pour strength nous avons une population équitablement distribué avec la présence d’outliers. Pour Penalties, on remarque que la population est équitablement répartie autour de la moyenne et il n’ y a pas d’outliers.

On va essayer de voir les corrélations entre ces variables numériques.

library(corrplot)
M <- cor(data.num)
M
                          age    height_cm   weight_kgs overall_rating    potential value_euro wage_euro
age                1.00000000  0.059579475  0.233819426     0.46152922 -0.259578566         NA        NA
height_cm          0.05957947  1.000000000  0.495320359     0.03807987  0.008525838         NA        NA
weight_kgs         0.23381943  0.495320359  1.000000000     0.15052093 -0.015098950         NA        NA
overall_rating     0.46152922  0.038079873  0.150520928     1.00000000  0.647249240         NA        NA
potential         -0.25957857  0.008525838 -0.015098950     0.64724924  1.000000000         NA        NA
value_euro                 NA           NA           NA             NA           NA          1        NA
wage_euro                  NA           NA           NA             NA           NA         NA         1
weak_foot.1.5.     0.06110803 -0.130463702 -0.128875336     0.21639350  0.167735495         NA        NA
skill_moves.1.5.   0.02968110 -0.320140714 -0.353601120     0.41871802  0.356241084         NA        NA
national_rating            NA           NA           NA             NA           NA         NA        NA
crossing           0.13335725 -0.388804511 -0.394145351     0.39834317  0.244438236         NA        NA
finishing          0.07476055 -0.296793996 -0.294710244     0.34233114  0.247552068         NA        NA
heading_accuracy   0.15172315 -0.043058013  0.027697604     0.34767032  0.204058724         NA        NA
short_passing      0.13715078 -0.290321276 -0.291826210     0.50681904  0.368105526         NA        NA
volleys            0.14775211 -0.279701660 -0.263662471     0.39692210  0.255953677         NA        NA
dribbling          0.01633362 -0.382180531 -0.414171729     0.37871796  0.315429352         NA        NA
curve              0.14762754 -0.345927472 -0.347161017     0.42389039  0.278243214         NA        NA
freekick_accuracy  0.19600330 -0.316407282 -0.305299273     0.40087831  0.230355260         NA        NA
long_passing       0.18501807 -0.262812586 -0.260839826     0.48999766  0.322550167         NA        NA
ball_control       0.09155228 -0.328643104 -0.339170217     0.46433631  0.350661625         NA        NA
acceleration      -0.15166450 -0.406415045 -0.481583486     0.20485306  0.239457057         NA        NA
sprint_speed      -0.14426512 -0.350073549 -0.413237762     0.21844262  0.241553385         NA        NA
agility           -0.01317678 -0.438373386 -0.533049744     0.27526028  0.225621322         NA        NA
reactions          0.46342071 -0.019999998  0.082796362     0.85576710  0.505011408         NA        NA
balance           -0.08360953 -0.532395628 -0.663188478     0.11466715  0.143428690         NA        NA
                  weak_foot.1.5. skill_moves.1.5. national_rating   crossing     finishing heading_accuracy
age                   0.06110803       0.02968110              NA  0.1333573  0.0747605504       0.15172315
height_cm            -0.13046370      -0.32014071              NA -0.3888045 -0.2967939961      -0.04305801
weight_kgs           -0.12887534      -0.35360112              NA -0.3941454 -0.2947102440       0.02769760
overall_rating        0.21639350       0.41871802              NA  0.3983432  0.3423311424       0.34767032
potential             0.16773550       0.35624108              NA  0.2444382  0.2475520682       0.20405872
value_euro                    NA               NA              NA         NA            NA               NA
wage_euro                     NA               NA              NA         NA            NA               NA
weak_foot.1.5.        1.00000000       0.34528383              NA  0.3116387  0.3696626010       0.19234650
skill_moves.1.5.      0.34528383       1.00000000              NA  0.7464664  0.7484922901       0.45478345
national_rating               NA               NA               1         NA            NA               NA
crossing              0.31163871       0.74646640              NA  1.0000000  0.6641825951       0.48400582
finishing             0.36966260       0.74849229              NA  0.6641826  1.0000000000       0.48281325
heading_accuracy      0.19234650       0.45478345              NA  0.4840058  0.4828132465       1.00000000
short_passing         0.32876561       0.73554364              NA  0.8124045  0.6718369584       0.65362141
volleys               0.36528831       0.74922958              NA  0.6965110  0.8864689171       0.51625955
dribbling             0.35881127       0.84182734              NA  0.8609119  0.8277433090       0.56487810
curve                 0.35036054       0.77566582              NA  0.8377860  0.7656688766       0.45339583
freekick_accuracy     0.33357491       0.70483242              NA  0.7644982  0.7048280938       0.42091877
long_passing          0.28441514       0.62929786              NA  0.7600857  0.5254918645       0.52724108
ball_control          0.36185036       0.82126837              NA  0.8452110  0.7945573616       0.66932993
acceleration          0.27120125       0.66058759              NA  0.6779973  0.6167960811       0.35517422
sprint_speed          0.25930360       0.63366261              NA  0.6562626  0.6033525903       0.40461454
agility               0.30774521       0.68756220              NA  0.7025163  0.6515510463       0.28176027
reactions             0.20426802       0.38537376              NA  0.3975736  0.3420252447       0.33754128
balance               0.25648653       0.58437059              NA  0.6252702  0.5327378012       0.18801036
                  short_passing     volleys   dribbling       curve freekick_accuracy long_passing ball_control
age                   0.1371508  0.14775211  0.01633362  0.14762754       0.196003299    0.1850181   0.09155228
height_cm            -0.2903213 -0.27970166 -0.38218053 -0.34592747      -0.316407282   -0.2628126  -0.32864310
weight_kgs           -0.2918262 -0.26366247 -0.41417173 -0.34716102      -0.305299273   -0.2608398  -0.33917022
overall_rating        0.5068190  0.39692210  0.37871796  0.42389039       0.400878312    0.4899977   0.46433631
potential             0.3681055  0.25595368  0.31542935  0.27824321       0.230355260    0.3225502   0.35066162
value_euro                   NA          NA          NA          NA                NA           NA           NA
wage_euro                    NA          NA          NA          NA                NA           NA           NA
weak_foot.1.5.        0.3287656  0.36528831  0.35881127  0.35036054       0.333574910    0.2844151   0.36185036
skill_moves.1.5.      0.7355436  0.74922958  0.84182734  0.77566582       0.704832423    0.6292979   0.82126837
national_rating              NA          NA          NA          NA                NA           NA           NA
crossing              0.8124045  0.69651102  0.86091186  0.83778599       0.764498156    0.7600857   0.84521097
finishing             0.6718370  0.88646892  0.82774331  0.76566888       0.704828094    0.5254919   0.79455736
heading_accuracy      0.6536214  0.51625955  0.56487810  0.45339583       0.420918766    0.5272411   0.66932993
short_passing         1.0000000  0.70500073  0.84926489  0.77946797       0.739337850    0.8987303   0.91492205
volleys               0.7050007  1.00000000  0.81211167  0.81077209       0.754154485    0.5791321   0.79905208
dribbling             0.8492649  0.81211167  1.00000000  0.84613569       0.757183926    0.7298700   0.94155615
curve                 0.7794680  0.81077209  0.84613569  1.00000000       0.862054392    0.7163587   0.83378148
freekick_accuracy     0.7393378  0.75415448  0.75718393  0.86205439       1.000000000    0.7075056   0.76396485
long_passing          0.8987303  0.57913213  0.72987004  0.71635875       0.707505569    1.0000000   0.79595861
ball_control          0.9149221  0.79905208  0.94155615  0.83378148       0.763964852    0.7959586   1.00000000
acceleration          0.5820243  0.58269747  0.75934826  0.61681071       0.507970193    0.4576578   0.68987616
sprint_speed          0.5734883  0.56833676  0.73848869  0.58924118       0.478210858    0.4440151   0.67949276
agility               0.6217308  0.63103091  0.77008723  0.68640432       0.594473617    0.5313295   0.71238704
reactions             0.4916754  0.40088751  0.37938656  0.42011253       0.401976103    0.4711807   0.45203202
balance               0.5419388  0.52084928  0.66894019  0.59455209       0.526996011    0.4700364   0.60901322
                  acceleration sprint_speed     agility   reactions     balance shot_power      jumping    stamina
age                 -0.1516645  -0.14426512 -0.01317678  0.46342071 -0.08360953  0.1588746  0.183815869  0.1038039
height_cm           -0.4064150  -0.35007355 -0.43837339 -0.02000000 -0.53239563 -0.2399605 -0.052169438 -0.2547940
weight_kgs          -0.4815835  -0.41323776 -0.53304974  0.08279636 -0.66318848 -0.1984279  0.005740647 -0.2325543
overall_rating       0.2048531   0.21844262  0.27526028  0.85576710  0.11466715  0.4453064  0.271007648  0.3687488
potential            0.2394571   0.24155339  0.22562132  0.50501141  0.14342869  0.2891574  0.106848778  0.1991511
value_euro                  NA           NA          NA          NA          NA         NA           NA         NA
wage_euro                   NA           NA          NA          NA          NA         NA           NA         NA
weak_foot.1.5.       0.2712012   0.25930360  0.30774521  0.20426802  0.25648653  0.3414199  0.071491165  0.2414905
skill_moves.1.5.     0.6605876   0.63366261  0.68756220  0.38537376  0.58437059  0.7237465  0.109956451  0.5829703
national_rating             NA           NA          NA          NA          NA         NA           NA         NA
crossing             0.6779973   0.65626257  0.70251633  0.39757356  0.62527025  0.7139692  0.143083334  0.6858946
finishing            0.6167961   0.60335259  0.65155105  0.34202524  0.53273780  0.8207395  0.102385336  0.5272932
heading_accuracy     0.3551742   0.40461454  0.28176027  0.33754128  0.18801036  0.6175126  0.390032646  0.6503556
short_passing        0.5820243   0.57348825  0.62173078  0.49167538  0.54193880  0.7797748  0.210346580  0.7304599
volleys              0.5826975   0.56833676  0.63103091  0.40088751  0.52084928  0.8359407  0.132197020  0.5426657
dribbling            0.7593483   0.73848869  0.77008723  0.37938656  0.66894019  0.8095024  0.152883728  0.7027476
curve                0.6168107   0.58924118  0.68640432  0.42011253  0.59455209  0.7973994  0.115951492  0.6051306
freekick_accuracy    0.5079702   0.47821086  0.59447362  0.40197610  0.52699601  0.7606529  0.086728246  0.5517808
long_passing         0.4576578   0.44401514  0.53132951  0.47118069  0.47003639  0.6826800  0.166372682  0.6490225
ball_control         0.6898762   0.67949276  0.71238704  0.45203202  0.60901322  0.8355716  0.207110082  0.7432019
acceleration         1.0000000   0.92615050  0.81660282  0.20085136  0.71648518  0.5530209  0.225820617  0.6269849
sprint_speed         0.9261505   1.00000000  0.77020047  0.20417015  0.65156155  0.5572879  0.242447334  0.6394081
agility              0.8166028   0.77020047  1.00000000  0.28797619  0.77470410  0.5843563  0.222428688  0.5856175
reactions            0.2008514   0.20417015  0.28797619  1.00000000  0.16185568  0.4260197  0.264821950  0.3793778
balance              0.7164852   0.65156155  0.77470410  0.16185568  1.00000000  0.4717666  0.197026987  0.4930764
                       strength  long_shots  aggression interceptions positioning      vision   penalties
age                0.3396713146  0.16465481  0.26485867   0.198130657  0.08551035  0.19320089  0.14792392
height_cm          0.3187690659 -0.30560129 -0.07399600  -0.078730189 -0.35337973 -0.28725398 -0.27213427
weight_kgs         0.6104311847 -0.27855190  0.02192552  -0.034607195 -0.35528055 -0.28223447 -0.25145263
overall_rating     0.3607251725  0.42945805  0.39707198   0.322030319  0.36211586  0.50681180  0.35090157
potential          0.0779175561  0.26532562  0.17000485   0.151945768  0.24767728  0.34817942  0.22674561
value_euro                   NA          NA          NA            NA          NA          NA          NA
wage_euro                    NA          NA          NA            NA          NA          NA          NA
weak_foot.1.5.    -0.0036093204  0.36559889  0.13703500   0.056743576  0.35532646  0.34368382  0.33757258
skill_moves.1.5.  -0.0334149733  0.75705356  0.36034168   0.221912376  0.78661749  0.67608914  0.69588882
national_rating              NA          NA          NA            NA          NA          NA          NA
crossing          -0.0195133014  0.74921606  0.48627748   0.438164348  0.79143162  0.68786352  0.65516580
finishing         -0.0006046457  0.88008050  0.26214175  -0.003939154  0.89004927  0.70286511  0.84050403
heading_accuracy   0.4902487257  0.51668376  0.70535601   0.559925566  0.54409651  0.28600261  0.56213520
short_passing      0.1476783896  0.76916678  0.62210761   0.549949797  0.76614512  0.71543180  0.68478314
volleys            0.0401962768  0.87120133  0.34428780   0.101064250  0.85166103  0.70291947  0.83426307
dribbling         -0.0213919946  0.84613562  0.45966370   0.312198474  0.90165604  0.73133090  0.77329295
curve             -0.0247818936  0.83906793  0.41168283   0.284055066  0.81769468  0.74875462  0.75682205
freekick_accuracy -0.0068103937  0.80845170  0.40588679   0.303410596  0.73608806  0.71861384  0.74031185
long_passing       0.1287097929  0.67849368  0.59963676   0.602028141  0.62590776  0.70160812  0.55443861
ball_control       0.0992848641  0.83984148  0.56352917   0.429608582  0.86949244  0.72066544  0.77506448
acceleration      -0.1533685301  0.58940409  0.27347353   0.175684176  0.69457627  0.46950231  0.54456389
sprint_speed      -0.0698021696  0.57124628  0.30406484   0.189405554  0.67787601  0.43860229  0.53282682
agility           -0.2198615982  0.65162944  0.25910140   0.155746873  0.71505226  0.60115750  0.57334112
reactions          0.3009674412  0.43224500  0.41019741   0.344903214  0.39355449  0.51213952  0.35634492
balance           -0.3773102555  0.54020194  0.20206645   0.167352072  0.60377760  0.49619018  0.49080402
                    composure     marking standing_tackle sliding_tackle
age                0.39471684  0.14672473      0.11836161     0.10300828
height_cm         -0.11184925 -0.08978321     -0.08145237    -0.08609258
weight_kgs        -0.04453798 -0.05755933     -0.05631799    -0.06596499
overall_rating     0.72927827  0.29253832      0.25448066     0.22546619
potential          0.43411329  0.16299879      0.14392661     0.12975464
value_euro                 NA          NA              NA             NA
wage_euro                  NA          NA              NA             NA
weak_foot.1.5.     0.28556032  0.06700224      0.04571589     0.02784522
skill_moves.1.5.   0.59859513  0.25560083      0.22553172     0.19343774
national_rating            NA          NA              NA             NA
crossing           0.58972576  0.45740058      0.44057767     0.42240711
finishing          0.55052001  0.04205804     -0.01342694    -0.05143236
heading_accuracy   0.52557791  0.59314046      0.57301458     0.54645721
short_passing      0.70134094  0.57018650      0.54890710     0.51676133
volleys            0.60704824  0.13556991      0.08776972     0.05063970
dribbling          0.61382137  0.35333331      0.31957915     0.29228908
curve              0.62864390  0.30379909      0.27425040     0.24599100
freekick_accuracy  0.59569324  0.31073081      0.28908937     0.25801118
long_passing       0.66165070  0.59837689      0.59346445     0.56859354
ball_control       0.69047311  0.46621358      0.43095903     0.39856091
acceleration       0.36893191  0.21944821      0.18859012     0.18191434
sprint_speed       0.37400932  0.23720147      0.20545134     0.19855442
agility            0.45070741  0.18698461      0.14914548     0.13596240
reactions          0.69089393  0.29536045      0.26287910     0.23599235
balance            0.33170817  0.19737004      0.17273552     0.17005972
 [ getOption("max.print") est atteint -- 14 lignes omises ]

On remarque des coefficients de corrélation élevés entre plusieurs variables.

corr_matrix <- cor(data.num, use="complete.obs")
corrplot(corr_matrix, method="circle", tl.cex=0.7, tl.col="black")

Maintenant, on transforme les variables qualitatives en factors :

data.quali$nationality = as.factor(data.quali$nationality)
data.quali$preferred_foot = as.factor(data.quali$preferred_foot)
data.quali$body_type = as.factor(data.quali$body_type)
data.quali$is_goalkeeper = as.factor(data.quali$is_goalkeeper)
data.quali$is_defense = as.factor(data.quali$is_defense)
data.quali$is_midfield = as.factor(data.quali$is_midfield)
data.quali$is_attack = as.factor(data.quali$is_attack)
summary(data.quali)
    nationality    preferred_foot      body_type     is_goalkeeper is_defense is_midfield is_attack
 England  : 1658   Left : 4173    Normal    :10393   0:15889       0:11383    0:9458      0:13621  
 Germany  : 1199   Right:13781    Lean      : 6468   1: 2065       1: 6571    1:8496      1: 4333  
 Spain    : 1070                  Stocky    : 1086                                                 
 France   :  925                  Akinfenwa :    1                                                 
 Argentina:  904                  C. Ronaldo:    1                                                 
 Brazil   :  832                  Courtois  :    1                                                 
 (Other)  :11366                  (Other)   :    4                                                 

On remarque que dans body type nous avons des valeurs erronés qui n’ont pas de vrai signification. Il faudra donc supprimer ces lignes pour ne pas influencer notre analyse :

# Liste de valeurs à exclure
valeurs_a_exclure <- c("Akinfenwa", "C. Ronaldo", "Courtois")

# Filtrer les données en excluant ces valeurs
df_filtre <- subset(data, !(body_type %in% valeurs_a_exclure))

# Afficher le résultat
print(df_filtre)
NA
#Vérification de la distribution des catégories
df_filtre %>%
  count(nationality, sort = TRUE) %>%
  top_n(20) %>%
  ggplot(aes(x=reorder(nationality, n), y=n)) +
  geom_bar(stat="identity", fill="steelblue") +
  coord_flip() +
  theme_minimal() +
  ggtitle("Top 20 des nationalités les plus représentées")
Selecting by n

# Calculer le pourcentage de valeurs manquantes par colonne
pourcentage_manquants <- sapply(df_filtre, function(col) {
  mean(is.na(col) | col == "" | trimws(col) == "") * 100
})

# Trier les colonnes par pourcentage de valeurs manquantes (du plus grand au plus petit)
pourcentage_manquants_tries <- sort(pourcentage_manquants, decreasing = TRUE)

# Afficher le résultat
cat("Pourcentage de valeurs manquantes par colonne (trié) :\n")
Pourcentage de valeurs manquantes par colonne (trié) :
print(pourcentage_manquants_tries)
  national_rating        value_euro         wage_euro               age         height_cm        weight_kgs 
        95.237034          1.420534          1.370397          0.000000          0.000000          0.000000 
      nationality    overall_rating         potential    preferred_foot    weak_foot.1.5.  skill_moves.1.5. 
         0.000000          0.000000          0.000000          0.000000          0.000000          0.000000 
        body_type          crossing         finishing  heading_accuracy     short_passing           volleys 
         0.000000          0.000000          0.000000          0.000000          0.000000          0.000000 
        dribbling             curve freekick_accuracy      long_passing      ball_control      acceleration 
         0.000000          0.000000          0.000000          0.000000          0.000000          0.000000 
     sprint_speed           agility         reactions           balance        shot_power           jumping 
         0.000000          0.000000          0.000000          0.000000          0.000000          0.000000 
          stamina          strength        long_shots        aggression     interceptions       positioning 
         0.000000          0.000000          0.000000          0.000000          0.000000          0.000000 
           vision         penalties         composure           marking   standing_tackle    sliding_tackle 
         0.000000          0.000000          0.000000          0.000000          0.000000          0.000000 
    is_goalkeeper        is_defense       is_midfield         is_attack 
         0.000000          0.000000          0.000000          0.000000 
df_filtre$national_rating = NULL
df_final <- df_filtre[!(df_filtre$value_euro == "" | is.na(df_filtre$value_euro) | df_filtre$wage_euro == "" | is.na(df_filtre$wage_euro)), ]
# Calculer le pourcentage de valeurs manquantes par colonne
pourcentage_manquants <- sapply(df_final, function(col) {
  mean(is.na(col) | col == "" | trimws(col) == "") * 100
})

# Trier les colonnes par pourcentage de valeurs manquantes (du plus grand au plus petit)
pourcentage_manquants_tries <- sort(pourcentage_manquants, decreasing = TRUE)

# Afficher le résultat
cat("Pourcentage de valeurs manquantes par colonne (trié) :\n")
Pourcentage de valeurs manquantes par colonne (trié) :
print(pourcentage_manquants_tries)
              age         height_cm        weight_kgs       nationality    overall_rating         potential 
                0                 0                 0                 0                 0                 0 
       value_euro         wage_euro    preferred_foot    weak_foot.1.5.  skill_moves.1.5.         body_type 
                0                 0                 0                 0                 0                 0 
         crossing         finishing  heading_accuracy     short_passing           volleys         dribbling 
                0                 0                 0                 0                 0                 0 
            curve freekick_accuracy      long_passing      ball_control      acceleration      sprint_speed 
                0                 0                 0                 0                 0                 0 
          agility         reactions           balance        shot_power           jumping           stamina 
                0                 0                 0                 0                 0                 0 
         strength        long_shots        aggression     interceptions       positioning            vision 
                0                 0                 0                 0                 0                 0 
        penalties         composure           marking   standing_tackle    sliding_tackle     is_goalkeeper 
                0                 0                 0                 0                 0                 0 
       is_defense       is_midfield         is_attack 
                0                 0                 0 
nums = df_final[sapply(df_final,is.numeric)==1]
df_scaled <- scale(nums)
########################### ANALYSE NON SUPERVISÉE (CLUSTERING) ########################################

# MÉTHODE K-MEANS
set.seed(123)
fviz_nbclust(df_scaled, kmeans, method = "wss") # Détermination du nombre optimal de clusters
install.packages("caret")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:

https://cran.rstudio.com/bin/windows/Rtools/
Warning in install.packages :
  le package ‘caret’ est en cours d'utilisation et ne sera pas installé

kmeans_result <- kmeans(df_scaled, centers = 4, nstart = 25)
df_final$Cluster_KMeans <- as.factor(kmeans_result$cluster)
fviz_cluster(kmeans_result, data = df_scaled) + ggtitle("Clusters K-Means")


# ANALYSE EN COMPOSANTES PRINCIPALES (PCA)
pca_result <- prcomp(df_scaled, center = TRUE, scale. = TRUE)
summary(pca_result) # Variance expliquée
Importance of components:
                          PC1    PC2     PC3     PC4     PC5     PC6     PC7     PC8     PC9    PC10   PC11
Standard deviation     4.2979 2.4441 1.94081 1.48314 1.33192 1.13563 0.91860 0.89256 0.84671 0.80892 0.7530
Proportion of Variance 0.4398 0.1422 0.08968 0.05237 0.04224 0.03071 0.02009 0.01897 0.01707 0.01558 0.0135
Cumulative Proportion  0.4398 0.5820 0.67173 0.72410 0.76634 0.79705 0.81714 0.83611 0.85318 0.86876 0.8823
                          PC12    PC13    PC14    PC15    PC16   PC17    PC18    PC19    PC20    PC21    PC22
Standard deviation     0.67309 0.56745 0.55197 0.52505 0.51127 0.5020 0.48358 0.47674 0.46491 0.44768 0.44582
Proportion of Variance 0.01079 0.00767 0.00725 0.00656 0.00622 0.0060 0.00557 0.00541 0.00515 0.00477 0.00473
Cumulative Proportion  0.89304 0.90071 0.90796 0.91453 0.92075 0.9267 0.93232 0.93773 0.94288 0.94765 0.95238
                          PC23    PC24    PC25    PC26    PC27    PC28    PC29    PC30    PC31   PC32   PC33
Standard deviation     0.43447 0.41679 0.40870 0.40157 0.37441 0.36515 0.35812 0.33980 0.33235 0.3175 0.2970
Proportion of Variance 0.00449 0.00414 0.00398 0.00384 0.00334 0.00317 0.00305 0.00275 0.00263 0.0024 0.0021
Cumulative Proportion  0.95688 0.96101 0.96499 0.96883 0.97217 0.97534 0.97840 0.98114 0.98377 0.9862 0.9883
                          PC34    PC35    PC36   PC37    PC38    PC39    PC40   PC41    PC42
Standard deviation     0.28132 0.26304 0.25480 0.2507 0.24809 0.24141 0.20122 0.1837 0.14924
Proportion of Variance 0.00188 0.00165 0.00155 0.0015 0.00147 0.00139 0.00096 0.0008 0.00053
Cumulative Proportion  0.99016 0.99181 0.99335 0.9949 0.99631 0.99770 0.99867 0.9995 1.00000
fviz_eig(pca_result) # Graphique des variances expliquées

fviz_pca_ind(pca_result, col.ind = "cos2", gradient.cols = c("blue", "red"), repel = TRUE) + ggtitle("PCA des joueurs FIFA")

Au bout de 4 clusters on commence à diminuer lentement. Donc il est judicieux de travailler sur 4 clusters.

library(FactoMineR)
library(factoextra)
# Affichage du biplot
fviz_pca_biplot(pca_result, 
                repel = TRUE,   
                col.var = "blue", # Couleur des variables
                col.ind = "red"   # Couleur des individus
)

Les deux premières composantes principales expliquent 58.2% de la variance. On remarque bien que le fait d’être gardien de but est négativement corrélé avec des caractéristiques comme passings (short et long), crossing et value_euro ce qui est logique. La ^remière composante est plus expliquée par des caractéristiques techniques du joueurs alors que la deuxième par les caractéristiques physique.

LS0tDQp0aXRsZTogIkZvb3RiYWxsIERhdGEgTWluaW5nIFByb2plY3QiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpUaGlzIGlzIGFuIFtSIE1hcmtkb3duXShodHRwOi8vcm1hcmtkb3duLnJzdHVkaW8uY29tKSBOb3RlYm9vay4gV2hlbiB5b3UgZXhlY3V0ZSBjb2RlIHdpdGhpbiB0aGUgbm90ZWJvb2ssIHRoZSByZXN1bHRzIGFwcGVhciBiZW5lYXRoIHRoZSBjb2RlLiANCg0KVHJ5IGV4ZWN1dGluZyB0aGlzIGNodW5rIGJ5IGNsaWNraW5nIHRoZSAqUnVuKiBidXR0b24gd2l0aGluIHRoZSBjaHVuayBvciBieSBwbGFjaW5nIHlvdXIgY3Vyc29yIGluc2lkZSBpdCBhbmQgcHJlc3NpbmcgKkN0cmwrU2hpZnQrRW50ZXIqLiANCg0KYGBge3J9DQpzZXR3ZCgiQzovVXNlcnMvYXppemEvRG9jdW1lbnRzL0RhdGEgTWluaW5nIikNCmBgYA0KYGBge3J9DQppbnN0YWxsLnBhY2thZ2VzKCJkcGx5ciIpDQpsaWJyYXJ5KGRwbHlyKQ0KaW5zdGFsbC5wYWNrYWdlcygiZ2dwbG90MiIpDQpsaWJyYXJ5KGdncGxvdDIpDQppbnN0YWxsLnBhY2thZ2VzKCJmYWN0b2V4dHJhIikNCmxpYnJhcnkoZmFjdG9leHRyYSkNCmluc3RhbGwucGFja2FnZXMoImNhcmV0IikNCmxpYnJhcnkoImNhcmV0IikNCg0KYGBgDQoNCkltcG9ydGF0aW9uIGRlcyBkb25uw6llcyA6IA0KDQpgYGB7cn0NCmRhdGEgPSByZWFkLmNzdigiZmlmYV9wbGF5ZXJzLmNzdiIpDQpgYGANCg0KRGVzY3JpcHRpb24gZGVzIHZhcmlhYmxlcyA6IA0KDQpgYGB7cn0NCnN0cihkYXRhKQ0KYGBgDQoNCg0KYGBge3J9DQptb2QucG9zID0gdW5pcXVlKGRhdGEkcG9zaXRpb25zKQ0KbW9kLnBvcw0KYGBgDQpgYGB7cn0NCnBvc2l0aW9ucy5saXN0IDwtIHVuaXF1ZSh1bmxpc3Qoc3Ryc3BsaXQoYXMuY2hhcmFjdGVyKGRhdGEkcG9zaXRpb25zKSwgIiwiKSkpDQpwb3NpdGlvbnMubGlzdA0KYGBgDQoxLiBHb2Fsa2VlcGVyDQpHSzogR29hbGtlZXBlcg0KMi4gRGVmZW5zZQ0KQ0I6IENlbnRlciBCYWNrDQpMQjogTGVmdCBCYWNrDQpSQjogUmlnaHQgQmFjaw0KTFdCOiBMZWZ0IFdpbmcgQmFjaw0KUldCOiBSaWdodCBXaW5nIEJhY2sNCjMuIE1pZGZpZWxkDQpDRE06IENlbnRyYWwgRGVmZW5zaXZlIE1pZGZpZWxkZXINCkNNOiBDZW50cmFsIE1pZGZpZWxkZXINCkNBTTogQ2VudHJhbCBBdHRhY2tpbmcgTWlkZmllbGRlcg0KUk06IFJpZ2h0IE1pZGZpZWxkZXINCkxNOiBMZWZ0IE1pZGZpZWxkZXINCjQuIEF0dGFjaw0KQ0Y6IENlbnRlciBGb3J3YXJkDQpTVDogU3RyaWtlcg0KTFc6IExlZnQgV2luZ2VyDQpSVzogUmlnaHQgV2luZ2VyDQoNCmBgYHtyfQ0KZGF0YSRpc19nb2Fsa2VlcGVyIDwtIHNhcHBseShkYXRhJHBvc2l0aW9ucywgZnVuY3Rpb24oeCkgYXMuaW50ZWdlcigiR0siICVpbiUgdW5saXN0KHN0cnNwbGl0KHgsICIsIikpKSkNCg0KZGF0YSRpc19kZWZlbnNlIDwtIHNhcHBseShkYXRhJHBvc2l0aW9ucywgZnVuY3Rpb24oeCkgYXMuaW50ZWdlcihhbnkodW5saXN0KHN0cnNwbGl0KHgsICIsIikpICVpbiUgYygiQ0IiLCAiTEIiLCAiUkIiLCAiTFdCIiwgIlJXQiIpKSkpDQoNCmRhdGEkaXNfbWlkZmllbGQgPC0gc2FwcGx5KGRhdGEkcG9zaXRpb25zLCBmdW5jdGlvbih4KSBhcy5pbnRlZ2VyKGFueSh1bmxpc3Qoc3Ryc3BsaXQoeCwgIiwiKSkgJWluJSBjKCJDRE0iLCAiQ00iLCAiQ0FNIiwgIlJNIiwgIkxNIikpKSkNCg0KZGF0YSRpc19hdHRhY2sgPC0gc2FwcGx5KGRhdGEkcG9zaXRpb25zLCBmdW5jdGlvbih4KSBhcy5pbnRlZ2VyKGFueSh1bmxpc3Qoc3Ryc3BsaXQoeCwgIiwiKSkgJWluJSBjKCJDRiIsICJTVCIsICJMVyIsICJSVyIpKSkpDQoNCmBgYA0KDQoNCmBgYHtyfQ0KZGF0YSRwb3NpdGlvbnMgPSBOVUxMDQpgYGANCg0KDQpgYGB7cn0NCnVuaXF1ZShkYXRhJG5hdGlvbmFsX3RlYW1fcG9zaXRpb24pDQpgYGANCmBgYHtyfQ0KZGF0YSRuYXRpb25hbF90ZWFtX3Bvc2l0aW9uID0gTlVMTA0KYGBgDQoNCg0KDQoNCmBgYHtyfQ0KZGF0YSRuYXRpb25hbF90ZWFtID0gTlVMTA0KZGF0YSRiaXJ0aF9kYXRlID0gTlVMTA0KZGF0YSRmdWxsX25hbWUgPSBOVUxMDQpkYXRhJG5hbWUgPSBOVUxMDQpgYGANCiANCg0KDQpgYGB7cn0NCm5iX2xpZ25lc192aWRlcyA8LSBzdW0oYXBwbHkoZGF0YSwgMSwgZnVuY3Rpb24ocm93KSB7DQogIGFsbChpcy5uYShyb3cpIHwgcm93ID09ICIiIHwgdHJpbXdzKHJvdykgPT0gIiIpDQp9KSkNCg0KIyBBZmZpY2hlciBsZSByw6lzdWx0YXQNCmNhdCgiTm9tYnJlIGRlIGxpZ25lcyB2aWRlcyA6IiwgbmJfbGlnbmVzX3ZpZGVzLCAiXG4iKQ0KYGBgDQpgYGB7cn0NCm5iX2xpZ25lc19hdmVjX3ZpZGUgPC0gc3VtKGFwcGx5KGRhdGEsIDEsIGZ1bmN0aW9uKHJvdykgew0KICBhbnkoaXMubmEocm93KSB8IHJvdyA9PSAiIiB8IHRyaW13cyhyb3cpID09ICIiKQ0KfSkpDQoNCiMgQWZmaWNoZXIgbGUgcsOpc3VsdGF0DQpjYXQoIk5vbWJyZSBkZSBsaWduZXMgYXZlYyBhdSBtb2lucyB1bmUgY29sb25uZSB2aWRlIDoiLCBuYl9saWduZXNfYXZlY192aWRlLCAiXG4iKQ0KYGBgDQoNCmBgYHtyfQ0KIyBDYWxjdWxlciBsZSBwb3VyY2VudGFnZSBkZSB2YWxldXJzIG1hbnF1YW50ZXMgcGFyIGNvbG9ubmUNCnBvdXJjZW50YWdlX21hbnF1YW50cyA8LSBzYXBwbHkoZGF0YSwgZnVuY3Rpb24oY29sKSB7DQogIG1lYW4oaXMubmEoY29sKSB8IGNvbCA9PSAiIiB8IHRyaW13cyhjb2wpID09ICIiKSAqIDEwMA0KfSkNCg0KIyBUcmllciBsZXMgY29sb25uZXMgcGFyIHBvdXJjZW50YWdlIGRlIHZhbGV1cnMgbWFucXVhbnRlcyAoZHUgcGx1cyBncmFuZCBhdSBwbHVzIHBldGl0KQ0KcG91cmNlbnRhZ2VfbWFucXVhbnRzX3RyaWVzIDwtIHNvcnQocG91cmNlbnRhZ2VfbWFucXVhbnRzLCBkZWNyZWFzaW5nID0gVFJVRSkNCg0KIyBBZmZpY2hlciBsZSByw6lzdWx0YXQNCmNhdCgiUG91cmNlbnRhZ2UgZGUgdmFsZXVycyBtYW5xdWFudGVzIHBhciBjb2xvbm5lICh0cmnDqSkgOlxuIikNCnByaW50KHBvdXJjZW50YWdlX21hbnF1YW50c190cmllcykNCmBgYA0KDQpgYGB7cn0NCmRhdGEkaW50ZXJuYXRpb25hbF9yZXB1dGF0aW9uLjEuNS49TlVMTA0KZGF0YSRuYXRpb25hbF9qZXJzZXlfbnVtYmVyPU5VTEwNCmRhdGEkcmVsZWFzZV9jbGF1c2VfZXVybz1OVUxMDQpgYGANCg0KYGBge3J9DQpkYXRhLm51bSA9IGRhdGFbc2FwcGx5KGRhdGEsaXMubnVtZXJpYyk9PTFdDQpkYXRhLnF1YWxpID0gZGF0YVtzYXBwbHkoZGF0YSxpcy5udW1lcmljKT09MF0NCmBgYA0KDQpgYGB7cn0NCmRhdGEucXVhbGkkaXNfZ29hbGtlZXBlciA8LSBkYXRhJGlzX2dvYWxrZWVwZXINCg0KZGF0YS5xdWFsaSRpc19kZWZlbnNlIDwtIGRhdGEkaXNfZGVmZW5zZQ0KDQpkYXRhLnF1YWxpJGlzX21pZGZpZWxkIDwtIGRhdGEkaXNfbWlkZmllbGQNCg0KZGF0YS5xdWFsaSRpc19hdHRhY2sgPC0gZGF0YSRpc19hdHRhY2sNCg0KYGBgDQoNCmBgYHtyfQ0KZGF0YS5udW0kaXNfZ29hbGtlZXBlciA8LSBOVUxMDQoNCmRhdGEubnVtJGlzX2RlZmVuc2UgPC0gTlVMTA0KDQpkYXRhLm51bSRpc19taWRmaWVsZCA8LSBOVUxMDQoNCmRhdGEubnVtJGlzX2F0dGFjayA8LSBOVUxMDQoNCmBgYA0KDQpOb3VzIGFsbG9ucyBjb21tZW5jZXIgcGFyIGwnw6l0dWRlIGRlcyB2YXJpYWJsZXMgbnVtw6lyaXF1ZXMgOiANCg0KYGBge3J9DQpmb3IgKHggaW4gc2VxKDEsbGVuZ3RoKGRhdGEubnVtKSwxKSkgaGlzdChkYXRhLm51bVsseF0sDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgeGxhYiA9IG5hbWVzKGRhdGEubnVtW3hdKSwNCiAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICBjb2wgPSByYWluYm93KDEwKSwgbWFpbiA9IG5hbWVzKGRhdGEubnVtW3hdKSkNCmBgYA0KDQoNCg0KDQoNCmBgYHtyfQ0KZm9yICh4IGluIHNlcSgxLGxlbmd0aChkYXRhLm51bSksMSkpIGJveHBsb3QoZGF0YS5udW1bLHhdLA0KICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgIHhsYWIgPSBuYW1lcyhkYXRhLm51bVt4XSksDQogICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgICAgY29sID0gImN5YW4iLCBtYWluID0gbmFtZXMoZGF0YS5udW1beF0pKQ0KYGBgDQpQb3VyIFdhZ2VfZXVybyBldCB2YWx1ZV9ldXJvIDogSWwgeSBhIHBsdXNpZXVycyBvdXRsaWVycy4gUG91ciBzdHJlbmd0aCBub3VzIGF2b25zIHVuZSBwb3B1bGF0aW9uIMOpcXVpdGFibGVtZW50IGRpc3RyaWJ1w6kgYXZlYyBsYSBwcsOpc2VuY2UgZCdvdXRsaWVycy4gUG91ciBQZW5hbHRpZXMsIG9uIHJlbWFycXVlIHF1ZSBsYSBwb3B1bGF0aW9uIGVzdCDDqXF1aXRhYmxlbWVudCByw6lwYXJ0aWUgYXV0b3VyIGRlIGxhIG1veWVubmUgZXQgaWwgbicgeSBhIHBhcyBkJ291dGxpZXJzLiANCg0KT24gdmEgZXNzYXllciBkZSB2b2lyIGxlcyBjb3Jyw6lsYXRpb25zIGVudHJlIGNlcyB2YXJpYWJsZXMgbnVtw6lyaXF1ZXMuIA0KDQpgYGB7cn0NCmxpYnJhcnkoY29ycnBsb3QpDQpNIDwtIGNvcihkYXRhLm51bSkNCk0NCmBgYA0KT24gcmVtYXJxdWUgZGVzIGNvZWZmaWNpZW50cyBkZSBjb3Jyw6lsYXRpb24gw6lsZXbDqXMgZW50cmUgcGx1c2lldXJzIHZhcmlhYmxlcy4NCg0KYGBge3J9DQpjb3JyX21hdHJpeCA8LSBjb3IoZGF0YS5udW0sIHVzZT0iY29tcGxldGUub2JzIikNCmNvcnJwbG90KGNvcnJfbWF0cml4LCBtZXRob2Q9ImNpcmNsZSIsIHRsLmNleD0wLjcsIHRsLmNvbD0iYmxhY2siKQ0KYGBgDQpNYWludGVuYW50LCBvbiB0cmFuc2Zvcm1lIGxlcyB2YXJpYWJsZXMgcXVhbGl0YXRpdmVzIGVuIGZhY3RvcnMgOiANCg0KYGBge3J9DQpkYXRhLnF1YWxpJG5hdGlvbmFsaXR5ID0gYXMuZmFjdG9yKGRhdGEucXVhbGkkbmF0aW9uYWxpdHkpDQpkYXRhLnF1YWxpJHByZWZlcnJlZF9mb290ID0gYXMuZmFjdG9yKGRhdGEucXVhbGkkcHJlZmVycmVkX2Zvb3QpDQpkYXRhLnF1YWxpJGJvZHlfdHlwZSA9IGFzLmZhY3RvcihkYXRhLnF1YWxpJGJvZHlfdHlwZSkNCmRhdGEucXVhbGkkaXNfZ29hbGtlZXBlciA9IGFzLmZhY3RvcihkYXRhLnF1YWxpJGlzX2dvYWxrZWVwZXIpDQpkYXRhLnF1YWxpJGlzX2RlZmVuc2UgPSBhcy5mYWN0b3IoZGF0YS5xdWFsaSRpc19kZWZlbnNlKQ0KZGF0YS5xdWFsaSRpc19taWRmaWVsZCA9IGFzLmZhY3RvcihkYXRhLnF1YWxpJGlzX21pZGZpZWxkKQ0KZGF0YS5xdWFsaSRpc19hdHRhY2sgPSBhcy5mYWN0b3IoZGF0YS5xdWFsaSRpc19hdHRhY2spDQpgYGANCmBgYHtyfQ0Kc3VtbWFyeShkYXRhLnF1YWxpKQ0KYGBgDQpPbiByZW1hcnF1ZSBxdWUgZGFucyBib2R5IHR5cGUgbm91cyBhdm9ucyBkZXMgdmFsZXVycyBlcnJvbsOpcyBxdWkgbidvbnQgcGFzIGRlIHZyYWkgc2lnbmlmaWNhdGlvbi4gSWwgZmF1ZHJhIGRvbmMgc3VwcHJpbWVyIGNlcyBsaWduZXMgcG91ciBuZSBwYXMgaW5mbHVlbmNlciBub3RyZSBhbmFseXNlIDogDQoNCmBgYHtyfQ0KIyBMaXN0ZSBkZSB2YWxldXJzIMOgIGV4Y2x1cmUNCnZhbGV1cnNfYV9leGNsdXJlIDwtIGMoIkFraW5mZW53YSIsICJDLiBSb25hbGRvIiwgIkNvdXJ0b2lzIikNCg0KIyBGaWx0cmVyIGxlcyBkb25uw6llcyBlbiBleGNsdWFudCBjZXMgdmFsZXVycw0KZGZfZmlsdHJlIDwtIHN1YnNldChkYXRhLCAhKGJvZHlfdHlwZSAlaW4lIHZhbGV1cnNfYV9leGNsdXJlKSkNCg0KIyBBZmZpY2hlciBsZSByw6lzdWx0YXQNCnByaW50KGRmX2ZpbHRyZSkNCg0KYGBgDQoNCmBgYHtyfQ0KI1bDqXJpZmljYXRpb24gZGUgbGEgZGlzdHJpYnV0aW9uIGRlcyBjYXTDqWdvcmllcw0KZGZfZmlsdHJlICU+JQ0KICBjb3VudChuYXRpb25hbGl0eSwgc29ydCA9IFRSVUUpICU+JQ0KICB0b3BfbigyMCkgJT4lDQogIGdncGxvdChhZXMoeD1yZW9yZGVyKG5hdGlvbmFsaXR5LCBuKSwgeT1uKSkgKw0KICBnZW9tX2JhcihzdGF0PSJpZGVudGl0eSIsIGZpbGw9InN0ZWVsYmx1ZSIpICsNCiAgY29vcmRfZmxpcCgpICsNCiAgdGhlbWVfbWluaW1hbCgpICsNCiAgZ2d0aXRsZSgiVG9wIDIwIGRlcyBuYXRpb25hbGl0w6lzIGxlcyBwbHVzIHJlcHLDqXNlbnTDqWVzIikNCmBgYA0KYGBge3J9DQojIENhbGN1bGVyIGxlIHBvdXJjZW50YWdlIGRlIHZhbGV1cnMgbWFucXVhbnRlcyBwYXIgY29sb25uZQ0KcG91cmNlbnRhZ2VfbWFucXVhbnRzIDwtIHNhcHBseShkZl9maWx0cmUsIGZ1bmN0aW9uKGNvbCkgew0KICBtZWFuKGlzLm5hKGNvbCkgfCBjb2wgPT0gIiIgfCB0cmltd3MoY29sKSA9PSAiIikgKiAxMDANCn0pDQoNCiMgVHJpZXIgbGVzIGNvbG9ubmVzIHBhciBwb3VyY2VudGFnZSBkZSB2YWxldXJzIG1hbnF1YW50ZXMgKGR1IHBsdXMgZ3JhbmQgYXUgcGx1cyBwZXRpdCkNCnBvdXJjZW50YWdlX21hbnF1YW50c190cmllcyA8LSBzb3J0KHBvdXJjZW50YWdlX21hbnF1YW50cywgZGVjcmVhc2luZyA9IFRSVUUpDQoNCiMgQWZmaWNoZXIgbGUgcsOpc3VsdGF0DQpjYXQoIlBvdXJjZW50YWdlIGRlIHZhbGV1cnMgbWFucXVhbnRlcyBwYXIgY29sb25uZSAodHJpw6kpIDpcbiIpDQpwcmludChwb3VyY2VudGFnZV9tYW5xdWFudHNfdHJpZXMpDQpgYGANCg0KYGBge3J9DQpkZl9maWx0cmUkbmF0aW9uYWxfcmF0aW5nID0gTlVMTA0KYGBgDQpgYGB7cn0NCmRmX2ZpbmFsIDwtIGRmX2ZpbHRyZVshKGRmX2ZpbHRyZSR2YWx1ZV9ldXJvID09ICIiIHwgaXMubmEoZGZfZmlsdHJlJHZhbHVlX2V1cm8pIHwgZGZfZmlsdHJlJHdhZ2VfZXVybyA9PSAiIiB8IGlzLm5hKGRmX2ZpbHRyZSR3YWdlX2V1cm8pKSwgXQ0KYGBgDQoNCg0KDQpgYGB7cn0NCiMgQ2FsY3VsZXIgbGUgcG91cmNlbnRhZ2UgZGUgdmFsZXVycyBtYW5xdWFudGVzIHBhciBjb2xvbm5lDQpwb3VyY2VudGFnZV9tYW5xdWFudHMgPC0gc2FwcGx5KGRmX2ZpbmFsLCBmdW5jdGlvbihjb2wpIHsNCiAgbWVhbihpcy5uYShjb2wpIHwgY29sID09ICIiIHwgdHJpbXdzKGNvbCkgPT0gIiIpICogMTAwDQp9KQ0KDQojIFRyaWVyIGxlcyBjb2xvbm5lcyBwYXIgcG91cmNlbnRhZ2UgZGUgdmFsZXVycyBtYW5xdWFudGVzIChkdSBwbHVzIGdyYW5kIGF1IHBsdXMgcGV0aXQpDQpwb3VyY2VudGFnZV9tYW5xdWFudHNfdHJpZXMgPC0gc29ydChwb3VyY2VudGFnZV9tYW5xdWFudHMsIGRlY3JlYXNpbmcgPSBUUlVFKQ0KDQojIEFmZmljaGVyIGxlIHLDqXN1bHRhdA0KY2F0KCJQb3VyY2VudGFnZSBkZSB2YWxldXJzIG1hbnF1YW50ZXMgcGFyIGNvbG9ubmUgKHRyacOpKSA6XG4iKQ0KcHJpbnQocG91cmNlbnRhZ2VfbWFucXVhbnRzX3RyaWVzKQ0KYGBgDQpgYGB7cn0NCm51bXMgPSBkZl9maW5hbFtzYXBwbHkoZGZfZmluYWwsaXMubnVtZXJpYyk9PTFdDQpkZl9zY2FsZWQgPC0gc2NhbGUobnVtcykNCmBgYA0KDQpgYGB7cn0NCiMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyBBTkFMWVNFIE5PTiBTVVBFUlZJU8OJRSAoQ0xVU1RFUklORykgIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIyMjIw0KDQojIE3DiVRIT0RFIEstTUVBTlMNCnNldC5zZWVkKDEyMykNCmZ2aXpfbmJjbHVzdChkZl9zY2FsZWQsIGttZWFucywgbWV0aG9kID0gIndzcyIpICMgRMOpdGVybWluYXRpb24gZHUgbm9tYnJlIG9wdGltYWwgZGUgY2x1c3RlcnMNCmttZWFuc19yZXN1bHQgPC0ga21lYW5zKGRmX3NjYWxlZCwgY2VudGVycyA9IDQsIG5zdGFydCA9IDI1KQ0KZGZfZmluYWwkQ2x1c3Rlcl9LTWVhbnMgPC0gYXMuZmFjdG9yKGttZWFuc19yZXN1bHQkY2x1c3RlcikNCmZ2aXpfY2x1c3RlcihrbWVhbnNfcmVzdWx0LCBkYXRhID0gZGZfc2NhbGVkKSArIGdndGl0bGUoIkNsdXN0ZXJzIEstTWVhbnMiKQ0KDQojIEFOQUxZU0UgRU4gQ09NUE9TQU5URVMgUFJJTkNJUEFMRVMgKFBDQSkNCnBjYV9yZXN1bHQgPC0gcHJjb21wKGRmX3NjYWxlZCwgY2VudGVyID0gVFJVRSwgc2NhbGUuID0gVFJVRSkNCnN1bW1hcnkocGNhX3Jlc3VsdCkgIyBWYXJpYW5jZSBleHBsaXF1w6llDQpmdml6X2VpZyhwY2FfcmVzdWx0KSAjIEdyYXBoaXF1ZSBkZXMgdmFyaWFuY2VzIGV4cGxpcXXDqWVzDQpmdml6X3BjYV9pbmQocGNhX3Jlc3VsdCwgY29sLmluZCA9ICJjb3MyIiwgZ3JhZGllbnQuY29scyA9IGMoImJsdWUiLCAicmVkIiksIHJlcGVsID0gVFJVRSkgKyBnZ3RpdGxlKCJQQ0EgZGVzIGpvdWV1cnMgRklGQSIpDQoNCmBgYA0KQXUgYm91dCBkZSA0IGNsdXN0ZXJzIG9uIGNvbW1lbmNlIMOgIGRpbWludWVyIGxlbnRlbWVudC4gRG9uYyBpbCBlc3QganVkaWNpZXV4IGRlIHRyYXZhaWxsZXIgc3VyIDQgY2x1c3RlcnMuDQoNCg0KYGBge3J9DQpsaWJyYXJ5KEZhY3RvTWluZVIpDQpsaWJyYXJ5KGZhY3RvZXh0cmEpDQpgYGANCg0KYGBge3J9DQojIEFmZmljaGFnZSBkdSBiaXBsb3QNCmZ2aXpfcGNhX2JpcGxvdChwY2FfcmVzdWx0LCANCiAgICAgICAgICAgICAgICByZXBlbCA9IFRSVUUsICAgDQogICAgICAgICAgICAgICAgY29sLnZhciA9ICJibHVlIiwgIyBDb3VsZXVyIGRlcyB2YXJpYWJsZXMNCiAgICAgICAgICAgICAgICBjb2wuaW5kID0gInJlZCIgICAjIENvdWxldXIgZGVzIGluZGl2aWR1cw0KKQ0KYGBgDQoNCkxlcyBkZXV4IHByZW1pw6hyZXMgY29tcG9zYW50ZXMgcHJpbmNpcGFsZXMgZXhwbGlxdWVudCA1OC4yJSBkZSBsYSB2YXJpYW5jZS4gT24gcmVtYXJxdWUgYmllbiBxdWUgbGUgZmFpdCBkJ8OqdHJlIGdhcmRpZW4gZGUgYnV0IGVzdCBuw6lnYXRpdmVtZW50IGNvcnLDqWzDqSBhdmVjIGRlcyBjYXJhY3TDqXJpc3RpcXVlcyBjb21tZSBwYXNzaW5ncyAoc2hvcnQgZXQgbG9uZyksIGNyb3NzaW5nIGV0IHZhbHVlX2V1cm8gY2UgcXVpIGVzdCBsb2dpcXVlLg0KTGEgXnJlbWnDqHJlIGNvbXBvc2FudGUgZXN0IHBsdXMgZXhwbGlxdcOpZSBwYXIgZGVzIGNhcmFjdMOpcmlzdGlxdWVzIHRlY2huaXF1ZXMgZHUgam91ZXVycyBhbG9ycyBxdWUgbGEgZGV1eGnDqG1lIHBhciBsZXMgY2FyYWN0w6lyaXN0aXF1ZXMgcGh5c2lxdWUuIA0K