This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
setwd("C:/Users/aziza/Documents/Data Mining")
install.packages("dplyr")
Error in install.packages : Updating loaded packages
library(dplyr)
install.packages("ggplot2")
Error in install.packages : Updating loaded packages
library(ggplot2)
install.packages("factoextra")
Error in install.packages : Updating loaded packages
library(factoextra)
install.packages("caret")
Error in install.packages : Updating loaded packages
library("caret")
Importation des données :
data = read.csv("fifa_players.csv")
Description des variables :
str(data)
'data.frame': 17954 obs. of 51 variables:
$ name : chr "L. Messi" "C. Eriksen" "P. Pogba" "L. Insigne" ...
$ full_name : chr "Lionel Andrés Messi Cuccittini" "Christian Dannemann Eriksen" "Paul Pogba" "Lorenzo Insigne" ...
$ birth_date : chr "6/24/1987" "2/14/1992" "3/15/1993" "6/4/1991" ...
$ age : int 31 27 25 27 27 27 20 30 32 32 ...
$ height_cm : num 170 155 190 163 188 ...
$ weight_kgs : num 72.1 76.2 83.9 59 88.9 92.1 73 69.9 92.1 77.1 ...
$ positions : chr "CF,RW,ST" "CAM,RM,CM" "CM,CAM" "LW,ST" ...
$ nationality : chr "Argentina" "Denmark" "France" "Italy" ...
$ overall_rating : int 94 88 88 88 88 88 88 89 89 89 ...
$ potential : int 94 89 91 88 91 90 95 89 89 89 ...
$ value_euro : int 110500000 69500000 73000000 62000000 60000000 59500000 81000000 64500000 38000000 60000000 ...
$ wage_euro : int 565000 205000 255000 165000 135000 215000 100000 300000 130000 200000 ...
$ preferred_foot : chr "Left" "Right" "Right" "Right" ...
$ international_reputation.1.5.: int 5 3 4 3 3 3 3 4 5 4 ...
$ weak_foot.1.5. : int 4 5 4 4 3 3 4 4 4 4 ...
$ skill_moves.1.5. : int 4 4 5 4 2 2 5 4 1 3 ...
$ body_type : chr "Messi" "Lean" "Normal" "Normal" ...
$ release_clause_euro : int 226500000 133800000 144200000 105400000 106500000 114500000 166100000 119300000 62700000 111000000 ...
$ national_team : chr "Argentina" "Denmark" "France" "Italy" ...
$ national_rating : int 82 78 84 83 NA 81 84 82 85 81 ...
$ national_team_position : chr "RF" "CAM" "RDM" "LW" ...
$ national_jersey_number : int 10 10 6 10 NA 4 10 11 1 21 ...
$ crossing : int 86 88 80 86 30 53 77 70 15 70 ...
$ finishing : int 95 81 75 77 22 52 88 93 13 89 ...
$ heading_accuracy : int 70 52 75 56 83 83 77 77 25 89 ...
$ short_passing : int 92 91 86 85 68 79 82 81 55 78 ...
$ volleys : int 86 80 85 74 14 45 78 85 11 90 ...
$ dribbling : int 97 84 87 90 69 70 90 89 30 80 ...
$ curve : int 93 86 85 87 28 60 77 82 14 77 ...
$ freekick_accuracy : int 94 87 82 77 28 70 63 73 11 76 ...
$ long_passing : int 89 89 90 78 60 81 73 64 59 52 ...
$ ball_control : int 96 91 90 93 63 76 91 89 46 82 ...
$ acceleration : int 91 76 71 94 70 74 96 88 54 75 ...
$ sprint_speed : int 86 73 79 86 75 77 96 80 60 76 ...
$ agility : int 93 80 76 94 50 61 92 86 51 77 ...
$ reactions : int 95 88 82 83 82 87 87 90 84 91 ...
$ balance : int 95 81 66 93 40 49 83 91 35 59 ...
$ shot_power : int 85 84 90 75 55 81 79 88 25 87 ...
$ jumping : int 68 50 83 53 81 88 75 81 77 88 ...
$ stamina : int 72 92 88 75 75 75 83 76 43 92 ...
$ strength : int 66 58 87 44 94 92 71 73 80 78 ...
$ long_shots : int 94 89 82 84 15 64 78 83 16 79 ...
$ aggression : int 48 46 78 34 87 82 62 65 29 84 ...
$ interceptions : int 22 56 64 26 88 88 38 24 30 48 ...
$ positioning : int 94 84 82 83 24 41 88 92 12 93 ...
$ vision : int 94 91 88 87 49 60 82 83 70 77 ...
$ penalties : int 75 67 82 61 33 62 70 83 47 85 ...
$ composure : int 96 88 87 83 80 87 86 90 70 82 ...
$ marking : int 33 59 63 51 91 90 34 30 17 52 ...
$ standing_tackle : int 28 57 67 24 88 89 34 20 10 45 ...
$ sliding_tackle : int 26 22 67 22 87 84 32 12 11 39 ...
mod.pos = unique(data$positions)
mod.pos
[1] "CF,RW,ST" "CAM,RM,CM" "CM,CAM" "LW,ST" "CB" "RW,ST,RM"
[7] "ST" "GK" "CDM,CM" "CF,ST" "RW,ST" "CAM,RW"
[13] "CDM" "CM,CDM" "LB" "CM,CAM,CDM" "CAM,CM,LW" "CAM,CM,RM"
[19] "LW,ST,LM" "CAM,CM" "CAM" "LW,RW" "RW,LW" "CM,LM"
[25] "CM" "RM,RW" "LM,RM,CAM,LW" "LM" "RW,RM" "LM,ST,RM"
[31] "RB" "LM,RM,LW,RW" "LM,CAM,RM" "RM,LM,ST" "RW" "LM,ST,RM,LW"
[37] "CM,CDM,CAM" "CM,CDM,CB" "CB,CDM" "CF,ST,CAM" "CDM,CB" "RB,RWB"
[43] "RM" "LM,CAM" "CM,LW" "LB,LWB,LM" "ST,LW" "LM,LB,CM"
[49] "RB,RM" "RM,LM,CM" "LM,RM,CAM" "RM,RW,ST" "LW,LM,RW" "LB,LM"
[55] "ST,LW,LM" "RM,CM" "CM,RM" "CB,RB" "RM,RW,LM" "LM,RM"
[61] "CDM,CB,CM" "CAM,CDM" "LB,CDM" "CAM,RM,LM" "RM,CAM,CM" "LWB"
[67] "CDM,CM,LM" "CDM,CM,RM" "CDM,CM,CB" "CAM,LW" "RM,LM" "ST,CAM"
[73] "LM,CM" "ST,CF,CAM,LM" "LWB,LB,CB,LM" "RB,LB" "LW" "CAM,ST"
[79] "LM,RM,LW" "LWB,LM,LB" "ST,CF,LM,CAM" "LM,LW,ST" "LB,LWB" "RWB,RB,RM"
[85] "ST,RW" "CAM,CM,LM" "RB,RW" "RM,CAM,LM" "ST,RM" "RM,ST"
[91] "CM,LM,RM" "CM,CAM,LM" "RW,RWB" "LM,RB" "RM,LM,RW,CAM" "ST,RM,RWB,LM"
[97] "LB,RB" "CM,CDM,RM" "LM,LW" "CAM,ST,RM" "ST,CF" "CB,LB"
[103] "RWB,RM" "LM,CM,LW" "CF,LW" "CAM,CM,RW" "CAM,LM,RW" "ST,RW,LW,CF"
[109] "CM,RM,RB" "RM,CM,LM" "LW,RM" "CB,RB,LB" "CAM,CF,ST" "CAM,CF"
[115] "CAM,CM,ST" "RB,CB" "ST,LM,RM" "CM,RM,LM" "LWB,LB,LM,CDM" "RM,RW,CM,CAM"
[121] "LB,RB,RWB" "RB,RWB,RM" "CAM,RM" "LWB,LB,LM" "CM,LM,CAM" "CAM,LM,RM,RW"
[127] "ST,LM" "RM,CAM" "CAM,LM,RM" "LW,RW,CAM" "CF,ST,LW" "LM,RW"
[133] "RW,LW,LM" "RWB,LB" "LM,ST" "RB,RM,LB,LM" "LWB,LM" "LW,CF"
[139] "ST,RW,LW,CAM" "CAM,CDM,RM" "LWB,LB,RB,RWB" "CM,RW,CAM" "RB,CM" "CM,CDM,CAM,RM"
[145] "RM,RB" "CM,LM,CDM" "LM,CM,CAM" "LM,RM,CM" "RM,LM,RW" "LM,RM,RW"
[151] "RW,LW,ST" "LW,RW,RM" "LWB,LB" "CDM,RB" "RW,RM,CAM" "CB,LB,RB"
[157] "CM,CAM,ST" "CF,CAM,ST" "RW,CAM,RM" "CDM,LB,CM" "CAM,LM,CM" "LB,LWB,CDM"
[163] "RM,RB,RW" "RM,CAM,LM,CM" "RB,RWB,LB" "CAM,CM,CDM" "LM,RM,ST,LB" "CDM,CM,CAM"
[169] "CF,CAM" "RM,LM,CAM" "RW,LW,RM" "LW,CAM" "RB,CB,RWB,RM" "ST,LW,RW"
[175] "CAM,RW,ST" "ST,CAM,RW,LW" "LB,CB,LM" "CM,LM,LB" "RB,LB,RWB" "ST,RM,LM"
[181] "LB,CB" "ST,CAM,CF" "CDM,CAM,CM,LM" "CAM,LM" "LB,CM,LWB" "RW,CAM"
[187] "LM,CM,RM,CAM" "CAM,RW,LW" "RM,LM,RW,RWB" "CDM,RB,RM" "LW,LM,RM,RW" "ST,RW,LW"
[193] "RB,CB,CDM" "CAM,CF,RW,RM" "LM,CF,CAM" "ST,RW,CF" "CM,RB" "CF,ST,CAM,LM"
[199] "RM,LM,CM,CDM" "ST,RM,LM,CAM" "LM,LW,CAM" "CAM,LB,CM" "RM,RW,ST,CAM" "LM,LB"
[205] "LW,CM,CAM" "LW,LM" "RM,LM,CAM,ST" "LW,ST,RW" "LM,LWB,LB,LW" "LW,LM,CF"
[211] "CAM,CM,RWB" "CAM,CF,CM,RM" "LM,CAM,LW" "LM,RM,ST" "RB,CDM,CM" "CM,CDM,LM"
[217] "LW,LM,RW,CAM" "LM,LW,RM" "RM,LW,CAM" "CM,LB" "CM,CDM,LW" "CM,CAM,LM,CDM"
[223] "ST,LM,CAM" "CF,LM,ST,CAM" "RB,CB,RM" "CAM,RB" "RB,CB,RWB" "LB,LW"
[229] "CDM,RM,RB" "RB,CB,LB" "LM,CAM,RM,CF" "ST,RM,CAM,LM" "LM,RM,CM,CAM" "RB,RM,RWB"
[235] "LB,CB,LWB" "CB,RB,CDM" "RM,ST,RW" "CM,CF" "CB,CDM,CM" "CDM,CAM"
[241] "LM,LW,RW" "LM,CF" "LM,LW,RM,ST" "LB,LM,LWB" "ST,LM,RW,RM" "RW,RM,CM"
[247] "RM,RW,LW" "CAM,RM,LM,CM" "CAM,CM,RM,LM" "RB,LB,CDM,CM" "RB,LB,CB" "CM,RM,CDM"
[253] "LW,RW,LM" "ST,CF,CAM" "CM,LB,CDM" "RW,CM" "CDM,CAM,CM" "LM,RM,CF"
[259] "LW,CAM,ST" "RB,LB,RM" "RB,LB,CDM" "RB,CDM" "CDM,CM,CB,LM" "ST,RM,CAM"
[265] "CM,RM,CAM" "CB,LB,CDM" "RB,CM,RM" "LB,CB,CDM" "RM,RWB" "CAM,LB,CM,RM"
[271] "CB,CM" "RM,CM,RB" "CAM,LM,ST" "LB,LWB,CB,LM" "RM,CAM,ST" "LB,RB,RM"
[277] "RWB,RB,LWB" "RM,RWB,LM,CAM" "RWB,CB" "LM,CM,ST" "CM,CB,CDM" "RW,LW,CM"
[283] "CDM,CM,CAM,RM" "RW,RM,LW,ST" "CAM,LM,RM,LB" "LB,RB,LM" "RM,ST,LM" "ST,RW,CAM"
[289] "LM,ST,CAM" "RW,CAM,LW" "RB,RM,LB" "RB,LM" "RB,RW,LW" "CM,RW"
[295] "LB,LW,RW" "CDM,RB,CM" "LM,CAM,CM" "RM,RW,LM,LW" "LM,LWB,CM" "RW,RM,LM,ST"
[301] "CAM,LW,RB" "CM,LW,RW,LM" "ST,CF,RW" "ST,RW,CF,RM" "RB,CM,CDM" "LW,LB,RW,LM"
[307] "LM,ST,LW" "LB,LM,RM" "CAM,CM,RM,ST" "RW,LB" "CAM,RM,RW" "CDM,LWB"
[313] "CM,CAM,CF" "CAM,ST,LM" "CM,CDM,CAM,ST" "CDM,LM,CAM,CM" "ST,CAM,LM" "CF,ST,CAM,LW"
[319] "RM,LM,RW,LW" "CB,RB,LB,CDM" "RB,RM,LM" "ST,LW,CAM" "CDM,RM,CAM" "CB,CDM,RB"
[325] "LM,LWB" "RB,LB,LWB" "CAM,CM,LM,RM" "LM,CAM,RW,CF" "CAM,ST,CM" "LM,CM,RM"
[331] "LM,ST,CM" "RM,RB,CB" "LB,CDM,CM" "LB,RB,CB" "LW,RW,CM,RB" "RW,RM,CAM,ST"
[337] "RM,RWB,ST" "LM,CAM,RM,CDM" "CM,CDM,CAM,LM" "CB,RB,CM" "CAM,CDM,CM" "RM,CAM,CM,LM"
[343] "LB,LM,LW" "ST,CF,RM" "RB,LM,RM" "RW,LM,CAM" "LB,CB,LM,CM" "RB,RM,CB"
[349] "CF,RW,CM" "LB,LM,RB" "RWB,RB" "ST,LM,LW" "LB,CM" "LB,LM,ST"
[355] "CM,CDM,RB,RWB" "LB,RWB,LWB" "RM,RB,LB" "LW,RW,ST" "RB,CDM,CB,LB" "LM,RM,CM,CDM"
[361] "LM,CAM,CDM" "RM,LM,CF,CAM" "CDM,RM,CM" "CB,LWB,LM" "RM,ST,LM,RW" "RM,LM,RB,CF"
[367] "CF,LM" "RM,RB,RWB" "RB,LB,LWB,RWB" "RM,RWB,RB" "CM,LW,LWB" "LW,RB,LB"
[373] "LB,RM" "LW,LM,ST,RM" "RM,ST,CAM" "CM,ST" "LM,RW,LW" "CAM,CM,CF"
[379] "CM,LM,LW,CAM" "LW,CAM,RW" "ST,CF,LW" "RM,CM,ST,LM" "ST,RW,RM" "LW,LWB,LB,RW"
[385] "RWB,RM,RB,LB" "RW,CAM,ST" "RWB,RB,RM,CM" "ST,RM,RW" "RM,LW,CAM,ST" "CM,CAM,CDM,RM"
[391] "CDM,CM,RB,CB" "RB,CDM,LB" "RWB,RB,RW" "RWB" "CM,CDM,LB" "ST,CM"
[397] "RB,LB,CM" "RM,LM,RB" "CM,CAM,LW" "CF,ST,RM,CAM" "RM,RB,LM" "RM,LW"
[403] "ST,LM,CF" "CF" "RB,RW,CAM,LW" "LWB,LM,LB,RWB" "RWB,RM,RB" "CM,CAM,RM"
[409] "LWB,RWB" "CM,ST,RM" "CAM,CF,CM" "RW,RM,LM" "LW,RM,RW" "RW,CM,CAM,LW"
[415] "LM,RM,CAM,ST" "CM,RM,LM,CAM" "CDM,LB" "RB,LW" "RW,RB" "ST,LW,RW,LM"
[421] "CDM,CM,RB" "CAM,LB" "CM,ST,RW" "LB,LM,CM" "CM,CDM,RB" "CF,ST,RW"
[427] "RB,RM,LB,RWB" "LW,CM" "RWB,LWB" "CAM,LM,RM,ST" "RM,LM,CF" "LB,CB,RB"
[433] "CB,LB,LM" "RM,RW,CAM" "LB,RW,LW" "RB,LM,LB,RM" "LM,CAM,LB,LW" "RB,LB,RWB,LWB"
[439] "LW,RW,LB,CM" "CM,LM,CB" "LM,RM,LB" "LB,RB,CDM" "CB,RB,RWB" "CDM,LM"
[445] "LM,LW,CM" "RM,CF" "CB,CDM,CM,CAM" "LM,CM,CDM,LB" "LWB,LW" "LM,RM,RB"
[451] "RWB,RB,CDM" "RM,LM,LWB" "LB,CAM,LM" "LW,LM,RM" "LB,CDM,CB" "LB,RB,LM,RM"
[457] "LB,CB,CM" "CB,LB,CM" "LW,LM,ST" "LW,LB" "ST,CAM,RW" "CDM,CB,RB"
[463] "CM,CF,RB" "CM,CDM,CB,RB" "CM,RWB,CDM" "RB,RM,CDM" "CAM,LM,LB" "CM,CB"
[469] "RB,RW,RM" "LM,LB,CB" "CAM,RW,CM" "LB,LWB,CAM,LM" "ST,LW,CDM" "ST,RW,LM"
[475] "CB,LWB" "RM,RB,CAM,CM" "RM,CF,LM" "RW,LW,CAM" "ST,LM,LWB" "CAM,RW,RWB"
[481] "LB,LW,ST" "RB,RWB,CB" "CM,CAM,RW" "CB,CDM,LB" "RM,LM,ST,CAM" "ST,RM,LW"
[487] "RM,RB,CDM" "CM,RB,RM" "LM,CAM,LB" "CDM,RM" "LB,LWB,CB" "RB,RWB,CDM"
[493] "CF,RM,CM" "LM,CM,LB" "CM,RWB,RM" "LWB,LB,CB" "RB,CB,LB,CDM" "CF,RM,LM"
[499] "RM,CAM,CF,LB" "RM,RW,RWB" "CDM,CM,RB,LM" "LM,RM,LB,RB" "RW,ST,CM" "RB,RWB,RM,RW"
[505] "CDM,CM,LB" "CAM,LW,RW" "LB,RB,LW" "CAM,RW,LW,CF" "CAM,RM,RWB" "CAM,ST,CF"
[511] "RW,LW,CAM,CM" "LM,LB,LW" "CAM,LM,LW" "CM,CDM,RWB" "CB,CM,CDM" "ST,CB"
[517] "LM,RM,LWB,CM" "LM,LB,ST" "LW,CM,RW" "LW,CAM,CM,RB" "RB,CAM" "LM,LW,CF"
[523] "CF,RW" "CM,RWB" "RB,ST" "CB,ST" "CF,CM,ST" "LB,LM,CB"
[529] "CDM,CM,CF" "ST,LM,LW,RW" "CM,LB,RM" "LM,CM,RM,RB" "RM,RW,RB,RWB" "RW,LM"
[535] "RW,RM,CF" "CM,RM,LW" "CAM,LW,CF" "ST,LM,RM,CF" "CAM,RM,CM,ST" "LB,LM,LWB,CB"
[541] "RM,CB,RB,RWB" "LM,LWB,RM,RWB" "CM,CAM,RM,CF" "CM,LM,LB,CF" "RM,LM,LB" "LM,ST,LB"
[547] "RM,CAM,CF" "CB,RWB,RB" "RW,LW,RM,ST" "LW,RW,CM" "RM,CAM,LM,CDM" "LW,RB"
[553] "LB,LWB,CM" "CDM,RB,CB" "LM,CDM,LB" "LB,LW,LWB" "LB,RB,CM" "RB,RM,RW"
[559] "CF,CAM,RM" "RB,CDM,CM,RM" "RWB,LM,LWB,RB" "CAM,RW,RM" "RM,RB,CM" "CB,LB,LWB"
[565] "CDM,LB,LM,CB" "ST,CAM,RM" "LWB,CB" "CB,CM,RWB" "LM,LB,CDM,CM" "CB,RWB"
[571] "CM,ST,LM" "RWB,LW,RW" "CM,CB,CAM" "CDM,RB,LB" "LB,LM,CAM" "RM,LM,CAM,CM"
[577] "LM,RM,LWB" "CDM,CB,LB" "LB,CM,RB" "RW,ST,LW,CF" "CDM,CM,CB,ST" "LM,LW,RM,RW"
[583] "RW,CM,CAM" "LM,ST,RM,CAM" "RW,ST,LW" "LWB,LB,CDM" "ST,CAM,LM,CM" "CAM,LM,LWB,LB"
[589] "CB,ST,CM" "CAM,CF,ST,RM" "RM,RB,ST" "RWB,LM,ST" "CM,LWB,LB" "RB,ST,RM"
[595] "CAM,RM,ST" "LM,CDM" "CM,CDM,LM,LB" "CM,CB,LB" "RB,CDM,RM" "RW,RM,LW,LM"
[601] "RM,LM,LW" "CDM,RM,CM,CAM" "CB,RM" "LM,LB,LWB" "LM,LW,CDM,LWB" "RM,CB,RB"
[607] "LW,RW,CF,ST" "ST,LM,RW" "LB,CM,LM" "CAM,RM,CDM" "RM,RW,CF" "CF,RM"
[613] "CM,CDM,CAM,CF" "LB,RB,RM,CDM" "CF,ST,CM" "RM,LM,RB,CM" "LM,RM,ST,CAM" "ST,RM,RW,LM"
[619] "CAM,CF,LM" "RB,CDM,RWB" "LM,ST,CF" "CF,RM,CAM" "LW,LM,CAM" "RM,ST,LM,LW"
[625] "ST,RM,LM,LW" "CM,LW,LM,CAM" "ST,RM,RW,LW" "RB,CDM,CB" "LW,LM,RW,RM" "CB,LM,LB,LWB"
[631] "RM,RWB,LM" "ST,RB" "CM,LM,RM,CAM" "RB,CB,CM,CDM" "RM,LW,LM" "CB,RB,RM"
[637] "RW,CF" "CF,CAM,LW" "RW,ST,CAM,RM" "RM,CM,CDM" "LM,LB,RM" "LW,LM,ST,LWB"
[643] "CAM,CF,RM" "CAM,RM,ST,LM" "CM,LB,LM" "LM,CM,CDM,RM" "RW,RM,LB,CM" "CM,RW,RM"
[649] "LB,CB,CDM,RB" "CM,RM,CDM,LM" "CM,RW,LM" "CM,RB,CDM,LB" "CM,LWB" "RWB,RM,LM"
[655] "CAM,RM,CM,LM" "RB,CDM,CB,CM" "ST,LW,CM" "RB,CDM,RM,LB" "RW,RM,CM,CAM" "LM,RM,RW,CM"
[661] "CDM,RM,LM" "LW,RW,CF" "CAM,RM,RB" "CF,ST,RW,CAM" "LM,RM,ST,CF" "LW,RW,CAM,ST"
[667] "RW,CF,LW" "CF,CM" "CDM,RM,LM,RB" "CM,RW,LW" "RM,CM,RB,RWB" "CF,CM,LW"
[673] "LB,CB,CDM,LM" "RM,LW,RW" "CAM,ST,CF,LM" "CDM,LB,CM,RB" "RB,RM,ST" "LB,RB,RM,LM"
[679] "RM,LM,RB,LB" "CDM,CM,LM,CAM" "CDM,CM,RM,RB" "CM,CAM,RM,CDM" "LM,LW,RW,ST" "LB,LM,LWB,RM"
[685] "CAM,LM,CDM" "CAM,ST,RM,LM" "LM,CAM,ST" "LB,LM,RB,CM" "CAM,CDM,CM,RM" "CAM,RM,LM,ST"
[691] "LM,CM,RM,CDM" "LW,CAM,LM" "CF,CAM,LM" "LWB,RWB,LM,RM" "LM,LW,RW,RM" "RM,CAM,LM,ST"
[697] "RM,CF,CAM" "LW,LM,RM,CAM" "LM,LWB,LB" "CM,RM,CDM,RB" "CB,CDM,CAM" "LW,RM,ST,LM"
[703] "ST,LM,RM,CM" "CM,RB,CDM" "LB,CB,LM,LWB" "RM,CM,LM,CAM" "RW,RM,ST" "CB,LM,LB"
[709] "LW,RW,CAM,LM" "CM,CDM,RM,LM" "RM,ST,LM,CAM" "ST,RM,LM,CF" "RB,RM,LM,CM" "CAM,LW,ST"
[715] "CAM,CM,LM,ST" "LM,LWB,LW" "CM,RM,LM,CDM" "CM,RB,LB" "RM,LM,LW,RW" "LW,CAM,RW,RM"
[721] "LM,RM,CAM,CM" "LM,RM,LW,CM" "CB,CM,CDM,CAM" "LB,LM,CDM" "RB,CB,LB,RWB" "LWB,LB,RB"
[727] "LM,LB,RB" "RB,RWB,LWB,LB" "CAM,LW,CM" "LWB,LM,CB,RWB" "LM,CM,RM,ST" "ST,CF,LM,RW"
[733] "RW,CAM,CF" "RM,RW,CAM,ST" "RWB,RB,CDM,RM" "LW,LM,CAM,ST" "CM,RB,CAM" "CM,ST,CAM"
[739] "LW,RW,ST,RM" "LM,RM,CM,ST" "CF,LW,RW" "CM,RM,LB" "LW,LM,CAM,CF" "CB,RB,CAM"
[745] "LM,ST,CAM,LW" "LM,CAM,CM,CF" "RB,RW,LB" "RM,LM,RW,LWB" "RM,RW,RB,LM" "CDM,RB,CB,CM"
[751] "LM,ST,RW" "RM,LM,RW,ST" "LM,LW,RM,RB" "CAM,RW,CF,RM" "LB,CM,LW" "ST,LW,LM,CF"
[757] "RM,LM,CDM,CM" "LW,CAM,RW,LM" "LM,RM,LW,ST" "RW,CAM,CM" "ST,CAM,LW,RW" "LW,LB,CM,LM"
[763] "LM,LB,CAM" "CAM,LW,LM,ST" "CB,LWB,LB" "CAM,RW,CF" "CB,CM,CAM" "RM,RW,ST,LM"
[769] "LW,CF,RM,RW" "RW,CM,RM" "CAM,CM,CF,RM" "CM,CF,LW" "CDM,CM,CB,RM" "CF,ST,LM,CAM"
[775] "CM,RM,LB,RB" "RM,CM,RW,CAM" "RM,RB,CDM,CM" "CAM,LM,ST,CM" "CAM,LM,CM,CF" "RM,CM,CAM,RB"
[781] "LB,RB,LM,CM" "RW,RM,ST,LW" "RM,CM,LM,LB" "RW,LM,RM,ST" "RM,LM,CAM,RW" "LB,LWB,LM,CB"
[787] "CAM,RM,CF" "CAM,CM,RM,CF" "CM,RM,RWB" "LM,ST,RB" "ST,CAM,RM,LM" "RM,LM,RWB,RB"
[793] "RW,CAM,RM,CF" "CM,LM,RB,RM" "LB,RM,LM" "RM,LM,RWB" "RM,CM,CDM,RWB" "CAM,CDM,RM,CM"
[799] "RW,LM,RM" "CAM,LW,LM,CM" "RW,CAM,ST,LW" "LM,CDM,CM,RM" "RB,LB,ST" "LM,LB,CDM"
[805] "CDM,RWB" "CAM,ST,LW" "CAM,CF,RW" "RM,CAM,RW" "ST,RWB,RM" "LM,CM,LWB"
[811] "CF,ST,LM" "CM,RB,CB" "CAM,LM,ST,RM" "ST,RW,LM,LW" "RB,LB,LM" "RM,RB,LM,LB"
[817] "LM,RM,RWB" "CF,CAM,ST,LW" "CM,LM,CDM,CAM" "ST,CAM,LM,CF" "CAM,CM,LM,CDM" "LM,LW,LB"
[823] "CAM,LM,CM,ST" "RM,CM,RW" "CAM,LM,RM,CM" "CAM,LM,CF" "RW,RM,CF,CAM" "CM,RM,CDM,CAM"
[829] "RM,RW,CM" "CAM,CM,CDM,RM" "CAM,ST,LM,RM" "CF,CAM,CM,ST" "CAM,LM,LW,CM" "LM,RM,ST,LW"
[835] "LW,CAM,LM,CM" "ST,CM,RB" "RB,LW,LB,RW" "CM,CDM,LWB" "ST,LW,RW,CAM" "RB,RM,CAM,LM"
[841] "RB,CB,CM" "RW,ST,CAM" "CM,CAM,LM,RM" "CDM,CAM,LM,LB" "CDM,LWB,LB" "LWB,CM,LM"
[847] "LW,LB,LM" "RM,CF,LM,CAM" "RWB,LWB,LB,RB" "CAM,CM,CF,CDM" "CM,LW,RW" "LW,RM,LM,RW"
[853] "LW,LM,CM" "LM,CAM,CF,RM" "LWB,LM,LB,CM" "CM,CAM,RM,LM" "LW,ST,LM,CAM" "RM,RWB,RW,CM"
[859] "CAM,ST,CF,CM" "CAM,ST,CDM" "RM,CM,LM,RB" "RB,LB,LM,RM" "ST,LW,RM" "CAM,LM,RM,CF"
[865] "RB,RWB,RM,LWB" "CDM,CM,RB,RM" "RM,LWB" "CM,RM,CF" "RB,RW,CM" "RWB,RM,RB,LWB"
[871] "CF,RW,RM,LW" "RW,ST,RM,LW" "LW,CF,ST" "LB,LW,CM" "CM,LM,CAM,CDM" "CM,CAM,CDM,LM"
[877] "RW,LW,CAM,ST" "CM,ST,CAM,CF" "CAM,LW,CM,RM" "CF,ST,LM,RM" "CF,CAM,CM" "CF,LM,RM"
[883] "CAM,CM,RW,LW" "CAM,CM,RM,RW" "RB,RM,CM" "RW,LW,CF" "CF,LW,ST" "CF,LW,CAM"
[889] "CAM,RM,RW,CF" "CAM,CM,RW,RM"
positions.list <- unique(unlist(strsplit(as.character(data$positions), ",")))
positions.list
[1] "CF" "RW" "ST" "CAM" "RM" "CM" "LW" "CB" "GK" "CDM" "LB" "LM" "RB" "RWB" "LWB"
data$is_goalkeeper <- sapply(data$positions, function(x) as.integer("GK" %in% unlist(strsplit(x, ","))))
install.packages("ggplot2")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Warning in install.packages :
le package ‘ggplot2’ est en cours d'utilisation et ne sera pas installé
install.packages("factoextra")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Warning in install.packages :
le package ‘factoextra’ est en cours d'utilisation et ne sera pas installé
data$is_defense <- sapply(data$positions, function(x) as.integer(any(unlist(strsplit(x, ",")) %in% c("CB", "LB", "RB", "LWB", "RWB"))))
install.packages("dplyr")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Warning in install.packages :
le package ‘dplyr’ est en cours d'utilisation et ne sera pas installé
data$is_midfield <- sapply(data$positions, function(x) as.integer(any(unlist(strsplit(x, ",")) %in% c("CDM", "CM", "CAM", "RM", "LM"))))
data$is_attack <- sapply(data$positions, function(x) as.integer(any(unlist(strsplit(x, ",")) %in% c("CF", "ST", "LW", "RW"))))
data$positions = NULL
unique(data$national_team_position)
[1] "RF" "CAM" "RDM" "LW" "" "LCB" "RM" "SUB" "GK" "LS" "CDM" "LF" "RCB" "LB" "ST" "LAM" "RW" "CB"
[19] "LCM" "CM" "LM" "RB" "RCM" "LDM" "RWB" "LWB" "RS" "RES" "CF" "RAM"
data$national_team_position = NULL
data$national_team = NULL
data$birth_date = NULL
data$full_name = NULL
data$name = NULL
nb_lignes_vides <- sum(apply(data, 1, function(row) {
all(is.na(row) | row == "" | trimws(row) == "")
}))
# Afficher le résultat
cat("Nombre de lignes vides :", nb_lignes_vides, "\n")
Nombre de lignes vides : 0
nb_lignes_avec_vide <- sum(apply(data, 1, function(row) {
any(is.na(row) | row == "" | trimws(row) == "")
}))
# Afficher le résultat
cat("Nombre de lignes avec au moins une colonne vide :", nb_lignes_avec_vide, "\n")
Nombre de lignes avec au moins une colonne vide : 17165
# Calculer le pourcentage de valeurs manquantes par colonne
pourcentage_manquants <- sapply(data, function(col) {
mean(is.na(col) | col == "" | trimws(col) == "") * 100
})
# Trier les colonnes par pourcentage de valeurs manquantes (du plus grand au plus petit)
pourcentage_manquants_tries <- sort(pourcentage_manquants, decreasing = TRUE)
# Afficher le résultat
cat("Pourcentage de valeurs manquantes par colonne (trié) :\n")
Pourcentage de valeurs manquantes par colonne (trié) :
print(pourcentage_manquants_tries)
national_rating national_jersey_number release_clause_euro
95.226690 95.226690 10.231703
value_euro wage_euro age
1.420296 1.370168 0.000000
height_cm weight_kgs nationality
0.000000 0.000000 0.000000
overall_rating potential preferred_foot
0.000000 0.000000 0.000000
international_reputation.1.5. weak_foot.1.5. skill_moves.1.5.
0.000000 0.000000 0.000000
body_type crossing finishing
0.000000 0.000000 0.000000
heading_accuracy short_passing volleys
0.000000 0.000000 0.000000
dribbling curve freekick_accuracy
0.000000 0.000000 0.000000
long_passing ball_control acceleration
0.000000 0.000000 0.000000
sprint_speed agility reactions
0.000000 0.000000 0.000000
balance shot_power jumping
0.000000 0.000000 0.000000
stamina strength long_shots
0.000000 0.000000 0.000000
aggression interceptions positioning
0.000000 0.000000 0.000000
vision penalties composure
0.000000 0.000000 0.000000
marking standing_tackle sliding_tackle
0.000000 0.000000 0.000000
is_goalkeeper is_defense is_midfield
0.000000 0.000000 0.000000
is_attack
0.000000
data$international_reputation.1.5.=NULL
data$national_jersey_number=NULL
data$release_clause_euro=NULL
data.num = data[sapply(data,is.numeric)==1]
data.quali = data[sapply(data,is.numeric)==0]
data.quali$is_goalkeeper <- data$is_goalkeeper
data.quali$is_defense <- data$is_defense
data.quali$is_midfield <- data$is_midfield
data.quali$is_attack <- data$is_attack
data.num$is_goalkeeper <- NULL
data.num$is_defense <- NULL
data.num$is_midfield <- NULL
data.num$is_attack <- NULL
Nous allons commencer par l’étude des variables numériques :
for (x in seq(1,length(data.num),1)) hist(data.num[,x],
xlab = names(data.num[x]),
col = rainbow(10), main = names(data.num[x]))
for (x in seq(1,length(data.num),1)) boxplot(data.num[,x],
xlab = names(data.num[x]),
col = "cyan", main = names(data.num[x]))
Pour Wage_euro et value_euro : Il y a plusieurs outliers. Pour strength nous avons une population équitablement distribué avec la présence d’outliers. Pour Penalties, on remarque que la population est équitablement répartie autour de la moyenne et il n’ y a pas d’outliers.
On va essayer de voir les corrélations entre ces variables numériques.
library(corrplot)
M <- cor(data.num)
M
age height_cm weight_kgs overall_rating potential value_euro wage_euro
age 1.00000000 0.059579475 0.233819426 0.46152922 -0.259578566 NA NA
height_cm 0.05957947 1.000000000 0.495320359 0.03807987 0.008525838 NA NA
weight_kgs 0.23381943 0.495320359 1.000000000 0.15052093 -0.015098950 NA NA
overall_rating 0.46152922 0.038079873 0.150520928 1.00000000 0.647249240 NA NA
potential -0.25957857 0.008525838 -0.015098950 0.64724924 1.000000000 NA NA
value_euro NA NA NA NA NA 1 NA
wage_euro NA NA NA NA NA NA 1
weak_foot.1.5. 0.06110803 -0.130463702 -0.128875336 0.21639350 0.167735495 NA NA
skill_moves.1.5. 0.02968110 -0.320140714 -0.353601120 0.41871802 0.356241084 NA NA
national_rating NA NA NA NA NA NA NA
crossing 0.13335725 -0.388804511 -0.394145351 0.39834317 0.244438236 NA NA
finishing 0.07476055 -0.296793996 -0.294710244 0.34233114 0.247552068 NA NA
heading_accuracy 0.15172315 -0.043058013 0.027697604 0.34767032 0.204058724 NA NA
short_passing 0.13715078 -0.290321276 -0.291826210 0.50681904 0.368105526 NA NA
volleys 0.14775211 -0.279701660 -0.263662471 0.39692210 0.255953677 NA NA
dribbling 0.01633362 -0.382180531 -0.414171729 0.37871796 0.315429352 NA NA
curve 0.14762754 -0.345927472 -0.347161017 0.42389039 0.278243214 NA NA
freekick_accuracy 0.19600330 -0.316407282 -0.305299273 0.40087831 0.230355260 NA NA
long_passing 0.18501807 -0.262812586 -0.260839826 0.48999766 0.322550167 NA NA
ball_control 0.09155228 -0.328643104 -0.339170217 0.46433631 0.350661625 NA NA
acceleration -0.15166450 -0.406415045 -0.481583486 0.20485306 0.239457057 NA NA
sprint_speed -0.14426512 -0.350073549 -0.413237762 0.21844262 0.241553385 NA NA
agility -0.01317678 -0.438373386 -0.533049744 0.27526028 0.225621322 NA NA
reactions 0.46342071 -0.019999998 0.082796362 0.85576710 0.505011408 NA NA
balance -0.08360953 -0.532395628 -0.663188478 0.11466715 0.143428690 NA NA
weak_foot.1.5. skill_moves.1.5. national_rating crossing finishing heading_accuracy
age 0.06110803 0.02968110 NA 0.1333573 0.0747605504 0.15172315
height_cm -0.13046370 -0.32014071 NA -0.3888045 -0.2967939961 -0.04305801
weight_kgs -0.12887534 -0.35360112 NA -0.3941454 -0.2947102440 0.02769760
overall_rating 0.21639350 0.41871802 NA 0.3983432 0.3423311424 0.34767032
potential 0.16773550 0.35624108 NA 0.2444382 0.2475520682 0.20405872
value_euro NA NA NA NA NA NA
wage_euro NA NA NA NA NA NA
weak_foot.1.5. 1.00000000 0.34528383 NA 0.3116387 0.3696626010 0.19234650
skill_moves.1.5. 0.34528383 1.00000000 NA 0.7464664 0.7484922901 0.45478345
national_rating NA NA 1 NA NA NA
crossing 0.31163871 0.74646640 NA 1.0000000 0.6641825951 0.48400582
finishing 0.36966260 0.74849229 NA 0.6641826 1.0000000000 0.48281325
heading_accuracy 0.19234650 0.45478345 NA 0.4840058 0.4828132465 1.00000000
short_passing 0.32876561 0.73554364 NA 0.8124045 0.6718369584 0.65362141
volleys 0.36528831 0.74922958 NA 0.6965110 0.8864689171 0.51625955
dribbling 0.35881127 0.84182734 NA 0.8609119 0.8277433090 0.56487810
curve 0.35036054 0.77566582 NA 0.8377860 0.7656688766 0.45339583
freekick_accuracy 0.33357491 0.70483242 NA 0.7644982 0.7048280938 0.42091877
long_passing 0.28441514 0.62929786 NA 0.7600857 0.5254918645 0.52724108
ball_control 0.36185036 0.82126837 NA 0.8452110 0.7945573616 0.66932993
acceleration 0.27120125 0.66058759 NA 0.6779973 0.6167960811 0.35517422
sprint_speed 0.25930360 0.63366261 NA 0.6562626 0.6033525903 0.40461454
agility 0.30774521 0.68756220 NA 0.7025163 0.6515510463 0.28176027
reactions 0.20426802 0.38537376 NA 0.3975736 0.3420252447 0.33754128
balance 0.25648653 0.58437059 NA 0.6252702 0.5327378012 0.18801036
short_passing volleys dribbling curve freekick_accuracy long_passing ball_control
age 0.1371508 0.14775211 0.01633362 0.14762754 0.196003299 0.1850181 0.09155228
height_cm -0.2903213 -0.27970166 -0.38218053 -0.34592747 -0.316407282 -0.2628126 -0.32864310
weight_kgs -0.2918262 -0.26366247 -0.41417173 -0.34716102 -0.305299273 -0.2608398 -0.33917022
overall_rating 0.5068190 0.39692210 0.37871796 0.42389039 0.400878312 0.4899977 0.46433631
potential 0.3681055 0.25595368 0.31542935 0.27824321 0.230355260 0.3225502 0.35066162
value_euro NA NA NA NA NA NA NA
wage_euro NA NA NA NA NA NA NA
weak_foot.1.5. 0.3287656 0.36528831 0.35881127 0.35036054 0.333574910 0.2844151 0.36185036
skill_moves.1.5. 0.7355436 0.74922958 0.84182734 0.77566582 0.704832423 0.6292979 0.82126837
national_rating NA NA NA NA NA NA NA
crossing 0.8124045 0.69651102 0.86091186 0.83778599 0.764498156 0.7600857 0.84521097
finishing 0.6718370 0.88646892 0.82774331 0.76566888 0.704828094 0.5254919 0.79455736
heading_accuracy 0.6536214 0.51625955 0.56487810 0.45339583 0.420918766 0.5272411 0.66932993
short_passing 1.0000000 0.70500073 0.84926489 0.77946797 0.739337850 0.8987303 0.91492205
volleys 0.7050007 1.00000000 0.81211167 0.81077209 0.754154485 0.5791321 0.79905208
dribbling 0.8492649 0.81211167 1.00000000 0.84613569 0.757183926 0.7298700 0.94155615
curve 0.7794680 0.81077209 0.84613569 1.00000000 0.862054392 0.7163587 0.83378148
freekick_accuracy 0.7393378 0.75415448 0.75718393 0.86205439 1.000000000 0.7075056 0.76396485
long_passing 0.8987303 0.57913213 0.72987004 0.71635875 0.707505569 1.0000000 0.79595861
ball_control 0.9149221 0.79905208 0.94155615 0.83378148 0.763964852 0.7959586 1.00000000
acceleration 0.5820243 0.58269747 0.75934826 0.61681071 0.507970193 0.4576578 0.68987616
sprint_speed 0.5734883 0.56833676 0.73848869 0.58924118 0.478210858 0.4440151 0.67949276
agility 0.6217308 0.63103091 0.77008723 0.68640432 0.594473617 0.5313295 0.71238704
reactions 0.4916754 0.40088751 0.37938656 0.42011253 0.401976103 0.4711807 0.45203202
balance 0.5419388 0.52084928 0.66894019 0.59455209 0.526996011 0.4700364 0.60901322
acceleration sprint_speed agility reactions balance shot_power jumping stamina
age -0.1516645 -0.14426512 -0.01317678 0.46342071 -0.08360953 0.1588746 0.183815869 0.1038039
height_cm -0.4064150 -0.35007355 -0.43837339 -0.02000000 -0.53239563 -0.2399605 -0.052169438 -0.2547940
weight_kgs -0.4815835 -0.41323776 -0.53304974 0.08279636 -0.66318848 -0.1984279 0.005740647 -0.2325543
overall_rating 0.2048531 0.21844262 0.27526028 0.85576710 0.11466715 0.4453064 0.271007648 0.3687488
potential 0.2394571 0.24155339 0.22562132 0.50501141 0.14342869 0.2891574 0.106848778 0.1991511
value_euro NA NA NA NA NA NA NA NA
wage_euro NA NA NA NA NA NA NA NA
weak_foot.1.5. 0.2712012 0.25930360 0.30774521 0.20426802 0.25648653 0.3414199 0.071491165 0.2414905
skill_moves.1.5. 0.6605876 0.63366261 0.68756220 0.38537376 0.58437059 0.7237465 0.109956451 0.5829703
national_rating NA NA NA NA NA NA NA NA
crossing 0.6779973 0.65626257 0.70251633 0.39757356 0.62527025 0.7139692 0.143083334 0.6858946
finishing 0.6167961 0.60335259 0.65155105 0.34202524 0.53273780 0.8207395 0.102385336 0.5272932
heading_accuracy 0.3551742 0.40461454 0.28176027 0.33754128 0.18801036 0.6175126 0.390032646 0.6503556
short_passing 0.5820243 0.57348825 0.62173078 0.49167538 0.54193880 0.7797748 0.210346580 0.7304599
volleys 0.5826975 0.56833676 0.63103091 0.40088751 0.52084928 0.8359407 0.132197020 0.5426657
dribbling 0.7593483 0.73848869 0.77008723 0.37938656 0.66894019 0.8095024 0.152883728 0.7027476
curve 0.6168107 0.58924118 0.68640432 0.42011253 0.59455209 0.7973994 0.115951492 0.6051306
freekick_accuracy 0.5079702 0.47821086 0.59447362 0.40197610 0.52699601 0.7606529 0.086728246 0.5517808
long_passing 0.4576578 0.44401514 0.53132951 0.47118069 0.47003639 0.6826800 0.166372682 0.6490225
ball_control 0.6898762 0.67949276 0.71238704 0.45203202 0.60901322 0.8355716 0.207110082 0.7432019
acceleration 1.0000000 0.92615050 0.81660282 0.20085136 0.71648518 0.5530209 0.225820617 0.6269849
sprint_speed 0.9261505 1.00000000 0.77020047 0.20417015 0.65156155 0.5572879 0.242447334 0.6394081
agility 0.8166028 0.77020047 1.00000000 0.28797619 0.77470410 0.5843563 0.222428688 0.5856175
reactions 0.2008514 0.20417015 0.28797619 1.00000000 0.16185568 0.4260197 0.264821950 0.3793778
balance 0.7164852 0.65156155 0.77470410 0.16185568 1.00000000 0.4717666 0.197026987 0.4930764
strength long_shots aggression interceptions positioning vision penalties
age 0.3396713146 0.16465481 0.26485867 0.198130657 0.08551035 0.19320089 0.14792392
height_cm 0.3187690659 -0.30560129 -0.07399600 -0.078730189 -0.35337973 -0.28725398 -0.27213427
weight_kgs 0.6104311847 -0.27855190 0.02192552 -0.034607195 -0.35528055 -0.28223447 -0.25145263
overall_rating 0.3607251725 0.42945805 0.39707198 0.322030319 0.36211586 0.50681180 0.35090157
potential 0.0779175561 0.26532562 0.17000485 0.151945768 0.24767728 0.34817942 0.22674561
value_euro NA NA NA NA NA NA NA
wage_euro NA NA NA NA NA NA NA
weak_foot.1.5. -0.0036093204 0.36559889 0.13703500 0.056743576 0.35532646 0.34368382 0.33757258
skill_moves.1.5. -0.0334149733 0.75705356 0.36034168 0.221912376 0.78661749 0.67608914 0.69588882
national_rating NA NA NA NA NA NA NA
crossing -0.0195133014 0.74921606 0.48627748 0.438164348 0.79143162 0.68786352 0.65516580
finishing -0.0006046457 0.88008050 0.26214175 -0.003939154 0.89004927 0.70286511 0.84050403
heading_accuracy 0.4902487257 0.51668376 0.70535601 0.559925566 0.54409651 0.28600261 0.56213520
short_passing 0.1476783896 0.76916678 0.62210761 0.549949797 0.76614512 0.71543180 0.68478314
volleys 0.0401962768 0.87120133 0.34428780 0.101064250 0.85166103 0.70291947 0.83426307
dribbling -0.0213919946 0.84613562 0.45966370 0.312198474 0.90165604 0.73133090 0.77329295
curve -0.0247818936 0.83906793 0.41168283 0.284055066 0.81769468 0.74875462 0.75682205
freekick_accuracy -0.0068103937 0.80845170 0.40588679 0.303410596 0.73608806 0.71861384 0.74031185
long_passing 0.1287097929 0.67849368 0.59963676 0.602028141 0.62590776 0.70160812 0.55443861
ball_control 0.0992848641 0.83984148 0.56352917 0.429608582 0.86949244 0.72066544 0.77506448
acceleration -0.1533685301 0.58940409 0.27347353 0.175684176 0.69457627 0.46950231 0.54456389
sprint_speed -0.0698021696 0.57124628 0.30406484 0.189405554 0.67787601 0.43860229 0.53282682
agility -0.2198615982 0.65162944 0.25910140 0.155746873 0.71505226 0.60115750 0.57334112
reactions 0.3009674412 0.43224500 0.41019741 0.344903214 0.39355449 0.51213952 0.35634492
balance -0.3773102555 0.54020194 0.20206645 0.167352072 0.60377760 0.49619018 0.49080402
composure marking standing_tackle sliding_tackle
age 0.39471684 0.14672473 0.11836161 0.10300828
height_cm -0.11184925 -0.08978321 -0.08145237 -0.08609258
weight_kgs -0.04453798 -0.05755933 -0.05631799 -0.06596499
overall_rating 0.72927827 0.29253832 0.25448066 0.22546619
potential 0.43411329 0.16299879 0.14392661 0.12975464
value_euro NA NA NA NA
wage_euro NA NA NA NA
weak_foot.1.5. 0.28556032 0.06700224 0.04571589 0.02784522
skill_moves.1.5. 0.59859513 0.25560083 0.22553172 0.19343774
national_rating NA NA NA NA
crossing 0.58972576 0.45740058 0.44057767 0.42240711
finishing 0.55052001 0.04205804 -0.01342694 -0.05143236
heading_accuracy 0.52557791 0.59314046 0.57301458 0.54645721
short_passing 0.70134094 0.57018650 0.54890710 0.51676133
volleys 0.60704824 0.13556991 0.08776972 0.05063970
dribbling 0.61382137 0.35333331 0.31957915 0.29228908
curve 0.62864390 0.30379909 0.27425040 0.24599100
freekick_accuracy 0.59569324 0.31073081 0.28908937 0.25801118
long_passing 0.66165070 0.59837689 0.59346445 0.56859354
ball_control 0.69047311 0.46621358 0.43095903 0.39856091
acceleration 0.36893191 0.21944821 0.18859012 0.18191434
sprint_speed 0.37400932 0.23720147 0.20545134 0.19855442
agility 0.45070741 0.18698461 0.14914548 0.13596240
reactions 0.69089393 0.29536045 0.26287910 0.23599235
balance 0.33170817 0.19737004 0.17273552 0.17005972
[ getOption("max.print") est atteint -- 14 lignes omises ]
On remarque des coefficients de corrélation élevés entre plusieurs variables.
corr_matrix <- cor(data.num, use="complete.obs")
corrplot(corr_matrix, method="circle", tl.cex=0.7, tl.col="black")
Maintenant, on transforme les variables qualitatives en factors :
data.quali$nationality = as.factor(data.quali$nationality)
data.quali$preferred_foot = as.factor(data.quali$preferred_foot)
data.quali$body_type = as.factor(data.quali$body_type)
data.quali$is_goalkeeper = as.factor(data.quali$is_goalkeeper)
data.quali$is_defense = as.factor(data.quali$is_defense)
data.quali$is_midfield = as.factor(data.quali$is_midfield)
data.quali$is_attack = as.factor(data.quali$is_attack)
summary(data.quali)
nationality preferred_foot body_type is_goalkeeper is_defense is_midfield is_attack
England : 1658 Left : 4173 Normal :10393 0:15889 0:11383 0:9458 0:13621
Germany : 1199 Right:13781 Lean : 6468 1: 2065 1: 6571 1:8496 1: 4333
Spain : 1070 Stocky : 1086
France : 925 Akinfenwa : 1
Argentina: 904 C. Ronaldo: 1
Brazil : 832 Courtois : 1
(Other) :11366 (Other) : 4
On remarque que dans body type nous avons des valeurs erronés qui n’ont pas de vrai signification. Il faudra donc supprimer ces lignes pour ne pas influencer notre analyse :
# Liste de valeurs à exclure
valeurs_a_exclure <- c("Akinfenwa", "C. Ronaldo", "Courtois")
# Filtrer les données en excluant ces valeurs
df_filtre <- subset(data, !(body_type %in% valeurs_a_exclure))
# Afficher le résultat
print(df_filtre)
NA
#Vérification de la distribution des catégories
df_filtre %>%
count(nationality, sort = TRUE) %>%
top_n(20) %>%
ggplot(aes(x=reorder(nationality, n), y=n)) +
geom_bar(stat="identity", fill="steelblue") +
coord_flip() +
theme_minimal() +
ggtitle("Top 20 des nationalités les plus représentées")
Selecting by n
# Calculer le pourcentage de valeurs manquantes par colonne
pourcentage_manquants <- sapply(df_filtre, function(col) {
mean(is.na(col) | col == "" | trimws(col) == "") * 100
})
# Trier les colonnes par pourcentage de valeurs manquantes (du plus grand au plus petit)
pourcentage_manquants_tries <- sort(pourcentage_manquants, decreasing = TRUE)
# Afficher le résultat
cat("Pourcentage de valeurs manquantes par colonne (trié) :\n")
Pourcentage de valeurs manquantes par colonne (trié) :
print(pourcentage_manquants_tries)
national_rating value_euro wage_euro age height_cm weight_kgs
95.237034 1.420534 1.370397 0.000000 0.000000 0.000000
nationality overall_rating potential preferred_foot weak_foot.1.5. skill_moves.1.5.
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
body_type crossing finishing heading_accuracy short_passing volleys
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
dribbling curve freekick_accuracy long_passing ball_control acceleration
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
sprint_speed agility reactions balance shot_power jumping
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
stamina strength long_shots aggression interceptions positioning
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
vision penalties composure marking standing_tackle sliding_tackle
0.000000 0.000000 0.000000 0.000000 0.000000 0.000000
is_goalkeeper is_defense is_midfield is_attack
0.000000 0.000000 0.000000 0.000000
df_filtre$national_rating = NULL
df_final <- df_filtre[!(df_filtre$value_euro == "" | is.na(df_filtre$value_euro) | df_filtre$wage_euro == "" | is.na(df_filtre$wage_euro)), ]
# Calculer le pourcentage de valeurs manquantes par colonne
pourcentage_manquants <- sapply(df_final, function(col) {
mean(is.na(col) | col == "" | trimws(col) == "") * 100
})
# Trier les colonnes par pourcentage de valeurs manquantes (du plus grand au plus petit)
pourcentage_manquants_tries <- sort(pourcentage_manquants, decreasing = TRUE)
# Afficher le résultat
cat("Pourcentage de valeurs manquantes par colonne (trié) :\n")
Pourcentage de valeurs manquantes par colonne (trié) :
print(pourcentage_manquants_tries)
age height_cm weight_kgs nationality overall_rating potential
0 0 0 0 0 0
value_euro wage_euro preferred_foot weak_foot.1.5. skill_moves.1.5. body_type
0 0 0 0 0 0
crossing finishing heading_accuracy short_passing volleys dribbling
0 0 0 0 0 0
curve freekick_accuracy long_passing ball_control acceleration sprint_speed
0 0 0 0 0 0
agility reactions balance shot_power jumping stamina
0 0 0 0 0 0
strength long_shots aggression interceptions positioning vision
0 0 0 0 0 0
penalties composure marking standing_tackle sliding_tackle is_goalkeeper
0 0 0 0 0 0
is_defense is_midfield is_attack
0 0 0
nums = df_final[sapply(df_final,is.numeric)==1]
df_scaled <- scale(nums)
########################### ANALYSE NON SUPERVISÉE (CLUSTERING) ########################################
# MÉTHODE K-MEANS
set.seed(123)
fviz_nbclust(df_scaled, kmeans, method = "wss") # Détermination du nombre optimal de clusters
install.packages("caret")
WARNING: Rtools is required to build R packages but is not currently installed. Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Warning in install.packages :
le package ‘caret’ est en cours d'utilisation et ne sera pas installé
kmeans_result <- kmeans(df_scaled, centers = 4, nstart = 25)
df_final$Cluster_KMeans <- as.factor(kmeans_result$cluster)
fviz_cluster(kmeans_result, data = df_scaled) + ggtitle("Clusters K-Means")
# ANALYSE EN COMPOSANTES PRINCIPALES (PCA)
pca_result <- prcomp(df_scaled, center = TRUE, scale. = TRUE)
summary(pca_result) # Variance expliquée
Importance of components:
PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11
Standard deviation 4.2979 2.4441 1.94081 1.48314 1.33192 1.13563 0.91860 0.89256 0.84671 0.80892 0.7530
Proportion of Variance 0.4398 0.1422 0.08968 0.05237 0.04224 0.03071 0.02009 0.01897 0.01707 0.01558 0.0135
Cumulative Proportion 0.4398 0.5820 0.67173 0.72410 0.76634 0.79705 0.81714 0.83611 0.85318 0.86876 0.8823
PC12 PC13 PC14 PC15 PC16 PC17 PC18 PC19 PC20 PC21 PC22
Standard deviation 0.67309 0.56745 0.55197 0.52505 0.51127 0.5020 0.48358 0.47674 0.46491 0.44768 0.44582
Proportion of Variance 0.01079 0.00767 0.00725 0.00656 0.00622 0.0060 0.00557 0.00541 0.00515 0.00477 0.00473
Cumulative Proportion 0.89304 0.90071 0.90796 0.91453 0.92075 0.9267 0.93232 0.93773 0.94288 0.94765 0.95238
PC23 PC24 PC25 PC26 PC27 PC28 PC29 PC30 PC31 PC32 PC33
Standard deviation 0.43447 0.41679 0.40870 0.40157 0.37441 0.36515 0.35812 0.33980 0.33235 0.3175 0.2970
Proportion of Variance 0.00449 0.00414 0.00398 0.00384 0.00334 0.00317 0.00305 0.00275 0.00263 0.0024 0.0021
Cumulative Proportion 0.95688 0.96101 0.96499 0.96883 0.97217 0.97534 0.97840 0.98114 0.98377 0.9862 0.9883
PC34 PC35 PC36 PC37 PC38 PC39 PC40 PC41 PC42
Standard deviation 0.28132 0.26304 0.25480 0.2507 0.24809 0.24141 0.20122 0.1837 0.14924
Proportion of Variance 0.00188 0.00165 0.00155 0.0015 0.00147 0.00139 0.00096 0.0008 0.00053
Cumulative Proportion 0.99016 0.99181 0.99335 0.9949 0.99631 0.99770 0.99867 0.9995 1.00000
fviz_eig(pca_result) # Graphique des variances expliquées
fviz_pca_ind(pca_result, col.ind = "cos2", gradient.cols = c("blue", "red"), repel = TRUE) + ggtitle("PCA des joueurs FIFA")
Au bout de 4 clusters on commence à diminuer lentement. Donc il est judicieux de travailler sur 4 clusters.
library(FactoMineR)
library(factoextra)
# Affichage du biplot
fviz_pca_biplot(pca_result,
repel = TRUE,
col.var = "blue", # Couleur des variables
col.ind = "red" # Couleur des individus
)
Les deux premières composantes principales expliquent 58.2% de la variance. On remarque bien que le fait d’être gardien de but est négativement corrélé avec des caractéristiques comme passings (short et long), crossing et value_euro ce qui est logique. La ^remière composante est plus expliquée par des caractéristiques techniques du joueurs alors que la deuxième par les caractéristiques physique.