football.df <- read.csv('data_clean.csv')
attach(football.df)
str(df)
## function (x, df1, df2, ncp, log = FALSE)
#Frequency table and proportion table for Wage
table(Wage)
## Wage
## 1000 2000 3000 4000 5000 6000 7000 8000 9000 10000
## 4770 2704 1734 1138 763 614 430 386 287 270
## 11000 12000 13000 14000 15000 16000 17000 18000 19000 20000
## 256 215 196 167 173 120 143 133 115 120
## 21000 22000 23000 24000 25000 26000 27000 28000 29000 30000
## 99 115 81 91 81 82 55 53 53 50
## 31000 32000 33000 34000 35000 36000 37000 38000 39000 40000
## 50 49 30 42 43 28 23 35 36 29
## 41000 42000 43000 44000 45000 46000 47000 48000 49000 50000
## 37 23 33 33 22 37 19 23 14 20
## 51000 52000 53000 54000 55000 56000 57000 58000 59000 60000
## 23 8 23 17 15 13 8 11 11 8
## 61000 62000 63000 64000 65000 66000 67000 68000 69000 70000
## 10 5 5 11 11 8 6 7 3 4
## 71000 72000 73000 74000 75000 76000 77000 78000 79000 80000
## 2 3 8 7 5 10 8 4 4 5
## 81000 82000 83000 84000 85000 86000 87000 88000 89000 90000
## 4 5 2 6 8 3 3 5 3 3
## 91000 92000 93000 94000 95000 96000 97000 98000 99000 100000
## 7 5 1 7 4 4 2 4 4 3
## 105000 110000 115000 120000 125000 130000 135000 140000 145000 150000
## 11 15 18 10 13 10 7 9 3 3
## 155000 160000 165000 170000 175000 180000 185000 190000 195000 200000
## 3 7 7 3 4 4 2 1 5 1
## 205000 210000 215000 225000 230000 240000 250000 255000 260000 265000
## 8 2 3 1 1 4 1 1 2 1
## 285000 290000 300000 315000 340000 355000 380000 405000 420000 455000
## 3 1 1 2 2 3 1 1 1 1
## 565000
## 1
propWage <- round(prop.table(table(Wage))*100,2)
propWage
## Wage
## 1000 2000 3000 4000 5000 6000 7000 8000 9000 10000
## 28.66 16.25 10.42 6.84 4.58 3.69 2.58 2.32 1.72 1.62
## 11000 12000 13000 14000 15000 16000 17000 18000 19000 20000
## 1.54 1.29 1.18 1.00 1.04 0.72 0.86 0.80 0.69 0.72
## 21000 22000 23000 24000 25000 26000 27000 28000 29000 30000
## 0.59 0.69 0.49 0.55 0.49 0.49 0.33 0.32 0.32 0.30
## 31000 32000 33000 34000 35000 36000 37000 38000 39000 40000
## 0.30 0.29 0.18 0.25 0.26 0.17 0.14 0.21 0.22 0.17
## 41000 42000 43000 44000 45000 46000 47000 48000 49000 50000
## 0.22 0.14 0.20 0.20 0.13 0.22 0.11 0.14 0.08 0.12
## 51000 52000 53000 54000 55000 56000 57000 58000 59000 60000
## 0.14 0.05 0.14 0.10 0.09 0.08 0.05 0.07 0.07 0.05
## 61000 62000 63000 64000 65000 66000 67000 68000 69000 70000
## 0.06 0.03 0.03 0.07 0.07 0.05 0.04 0.04 0.02 0.02
## 71000 72000 73000 74000 75000 76000 77000 78000 79000 80000
## 0.01 0.02 0.05 0.04 0.03 0.06 0.05 0.02 0.02 0.03
## 81000 82000 83000 84000 85000 86000 87000 88000 89000 90000
## 0.02 0.03 0.01 0.04 0.05 0.02 0.02 0.03 0.02 0.02
## 91000 92000 93000 94000 95000 96000 97000 98000 99000 100000
## 0.04 0.03 0.01 0.04 0.02 0.02 0.01 0.02 0.02 0.02
## 105000 110000 115000 120000 125000 130000 135000 140000 145000 150000
## 0.07 0.09 0.11 0.06 0.08 0.06 0.04 0.05 0.02 0.02
## 155000 160000 165000 170000 175000 180000 185000 190000 195000 200000
## 0.02 0.04 0.04 0.02 0.02 0.02 0.01 0.01 0.03 0.01
## 205000 210000 215000 225000 230000 240000 250000 255000 260000 265000
## 0.05 0.01 0.02 0.01 0.01 0.02 0.01 0.01 0.01 0.01
## 285000 290000 300000 315000 340000 355000 380000 405000 420000 455000
## 0.02 0.01 0.01 0.01 0.01 0.02 0.01 0.01 0.01 0.01
## 565000
## 0.01
#Frequency table and proportion table for Age
table(Age)
## Age
## 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
## 41 284 701 926 1091 1220 1163 1189 1206 1196 1272 1078 1027 897 877
## 31 32 33 34 35 36 37 38 39 40 41 42 45
## 667 553 392 397 190 125 80 36 24 7 2 1 1
propAge <- round(prop.table(table(Age))*100,2)
propAge
## Age
## 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
## 0.25 1.71 4.21 5.56 6.56 7.33 6.99 7.14 7.25 7.19 7.64 6.48 6.17 5.39 5.27
## 31 32 33 34 35 36 37 38 39 40 41 42 45
## 4.01 3.32 2.36 2.39 1.14 0.75 0.48 0.22 0.14 0.04 0.01 0.01 0.01
#Frequency table and proportion table for Overall Rating
table(Overall)
## Overall
## 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 1 20 31 36 100 123 158 191 240 254 324 373 425 486 604
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
## 665 765 920 1014 932 1055 1019 926 865 800 707 672 544 474 446
## 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## 340 280 153 162 134 90 94 68 45 32 22 13 15 11 5
## 91 92 94
## 6 1 2
propOverall <- round(prop.table(table(Overall))*100,2)
propOverall
## Overall
## 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 0.01 0.12 0.19 0.22 0.60 0.74 0.95 1.15 1.44 1.53 1.95 2.24 2.55 2.92 3.63
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
## 4.00 4.60 5.53 6.09 5.60 6.34 6.12 5.56 5.20 4.81 4.25 4.04 3.27 2.85 2.68
## 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## 2.04 1.68 0.92 0.97 0.81 0.54 0.56 0.41 0.27 0.19 0.13 0.08 0.09 0.07 0.03
## 91 92 94
## 0.04 0.01 0.01
#Frequency table and proportion table for Ball Control
table(BallControl)
## BallControl
## 5 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
## 1 2 5 46 56 89 74 89 107 103 105 143 120 131 110 143 142 107
## 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42
## 83 38 31 41 41 55 41 46 52 37 55 34 32 50 49 64 66 74
## 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 84 87 113 88 100 176 152 179 189 257 263 283 383 332 375 493 463 602
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
## 553 678 660 704 725 658 636 642 522 570 421 441 389 395 322 230 194 180
## 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
## 121 133 79 65 69 47 38 27 14 15 8 9 5 4 2 3 2 1
propBallControl <- round(prop.table(table(BallControl))*100,2)
propBallControl
## BallControl
## 5 8 9 10 11 12 13 14 15 16 17 18 19 20 21
## 0.01 0.01 0.03 0.28 0.34 0.53 0.44 0.53 0.64 0.62 0.63 0.86 0.72 0.79 0.66
## 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
## 0.86 0.85 0.64 0.50 0.23 0.19 0.25 0.25 0.33 0.25 0.28 0.31 0.22 0.33 0.20
## 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
## 0.19 0.30 0.29 0.38 0.40 0.44 0.50 0.52 0.68 0.53 0.60 1.06 0.91 1.08 1.14
## 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
## 1.54 1.58 1.70 2.30 1.99 2.25 2.96 2.78 3.62 3.32 4.07 3.97 4.23 4.36 3.95
## 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
## 3.82 3.86 3.14 3.42 2.53 2.65 2.34 2.37 1.93 1.38 1.17 1.08 0.73 0.80 0.47
## 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
## 0.39 0.41 0.28 0.23 0.16 0.08 0.09 0.05 0.05 0.03 0.02 0.01 0.02 0.01 0.01
#Frequency table and proportion table for Agility
table(Agility)
## Agility
## 14 15 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
## 1 1 4 5 2 13 38 27 22 40 30 22 47 49 99 125 140 127
## 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
## 139 119 106 123 139 138 130 106 112 123 128 148 132 134 181 149 196 175
## 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
## 242 218 231 272 304 334 384 363 392 388 425 448 462 489 508 495 557 513
## 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87
## 529 500 510 481 465 449 419 410 386 342 217 230 218 190 158 135 123 119
## 88 89 90 91 92 93 94 95 96
## 93 69 103 83 61 36 18 3 1
propAgility <- round(prop.table(table(Agility))*100,2)
propAgility
## Agility
## 14 15 18 19 20 21 22 23 24 25 26 27 28 29 30
## 0.01 0.01 0.02 0.03 0.01 0.08 0.23 0.16 0.13 0.24 0.18 0.13 0.28 0.29 0.59
## 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
## 0.75 0.84 0.76 0.84 0.72 0.64 0.74 0.84 0.83 0.78 0.64 0.67 0.74 0.77 0.89
## 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 0.79 0.81 1.09 0.90 1.18 1.05 1.45 1.31 1.39 1.63 1.83 2.01 2.31 2.18 2.36
## 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
## 2.33 2.55 2.69 2.78 2.94 3.05 2.97 3.35 3.08 3.18 3.00 3.06 2.89 2.79 2.70
## 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
## 2.52 2.46 2.32 2.05 1.30 1.38 1.31 1.14 0.95 0.81 0.74 0.72 0.56 0.41 0.62
## 91 92 93 94 95 96
## 0.50 0.37 0.22 0.11 0.02 0.01
#Frequency table and proportion table for Stamina
table(Stamina)
## Stamina
## 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
## 1 2 9 4 21 38 42 35 60 66 72 60 73 85 70 81 104 69
## 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
## 130 92 124 124 139 84 69 66 90 66 92 91 86 82 88 104 59 70
## 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65
## 96 105 137 176 223 229 285 340 215 204 307 322 379 374 419 420 493 534
## 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
## 518 566 612 620 527 506 542 486 523 509 435 392 388 321 274 212 230 177
## 84 85 86 87 88 89 90 91 92 93 94 95 96
## 162 157 131 105 93 85 100 73 56 26 7 2 2
propStamina <- round(prop.table(table(Stamina))*100,2)
propStamina
## Stamina
## 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
## 0.01 0.01 0.05 0.02 0.13 0.23 0.25 0.21 0.36 0.40 0.43 0.36 0.44 0.51 0.42
## 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41
## 0.49 0.62 0.41 0.78 0.55 0.75 0.75 0.84 0.50 0.41 0.40 0.54 0.40 0.55 0.55
## 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56
## 0.52 0.49 0.53 0.62 0.35 0.42 0.58 0.63 0.82 1.06 1.34 1.38 1.71 2.04 1.29
## 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71
## 1.23 1.84 1.93 2.28 2.25 2.52 2.52 2.96 3.21 3.11 3.40 3.68 3.73 3.17 3.04
## 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
## 3.26 2.92 3.14 3.06 2.61 2.36 2.33 1.93 1.65 1.27 1.38 1.06 0.97 0.94 0.79
## 87 88 89 90 91 92 93 94 95 96
## 0.63 0.56 0.51 0.60 0.44 0.34 0.16 0.04 0.01 0.01
#Frequency table and proportion table for Standing Tackle
table(StandingTackle)
## StandingTackle
## 2 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
## 1 1 3 4 22 39 148 263 343 352 362 281 215 226 259 248 232 218
## 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
## 183 196 183 182 152 162 185 148 180 178 168 140 147 162 157 123 143 145
## 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
## 156 135 147 106 128 146 101 89 151 135 116 119 150 169 212 225 219 231
## 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
## 312 318 331 332 452 483 559 538 575 438 481 361 396 272 334 285 249 225
## 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
## 177 138 119 89 70 52 43 32 32 18 15 11 5 5 6 2 1 1
propstandingtackle <- round(prop.table(table(StandingTackle))*100,2)
propstandingtackle
## StandingTackle
## 2 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 0.01 0.01 0.02 0.02 0.13 0.23 0.89 1.58 2.06 2.12 2.18 1.69 1.29 1.36 1.56
## 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
## 1.49 1.39 1.31 1.10 1.18 1.10 1.09 0.91 0.97 1.11 0.89 1.08 1.07 1.01 0.84
## 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
## 0.88 0.97 0.94 0.74 0.86 0.87 0.94 0.81 0.88 0.64 0.77 0.88 0.61 0.53 0.91
## 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
## 0.81 0.70 0.72 0.90 1.02 1.27 1.35 1.32 1.39 1.87 1.91 1.99 1.99 2.72 2.90
## 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
## 3.36 3.23 3.45 2.63 2.89 2.17 2.38 1.63 2.01 1.71 1.50 1.35 1.06 0.83 0.72
## 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
## 0.53 0.42 0.31 0.26 0.19 0.19 0.11 0.09 0.07 0.03 0.03 0.04 0.01 0.01 0.01
#Frequency table and proportion table for Dribbling
table(Dribbling)
## Dribbling
## 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
## 4 22 36 59 70 72 110 175 223 212 209 134 133 94 82 89 78 57
## 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
## 60 37 46 67 55 48 59 57 87 64 87 57 77 82 65 62 89 87
## 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
## 91 89 99 93 122 163 145 137 187 197 196 214 275 275 292 374 364 432
## 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
## 471 489 488 482 598 598 647 611 601 564 562 434 410 388 382 345 335 305
## 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
## 217 168 173 104 106 70 67 47 46 32 37 27 17 8 12 4 3 1
## 94 95 96 97
## 1 1 1 1
propDribbling <- round(prop.table(table(Dribbling))*100,2)
propDribbling
## Dribbling
## 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18
## 0.02 0.13 0.22 0.35 0.42 0.43 0.66 1.05 1.34 1.27 1.26 0.81 0.80 0.56 0.49
## 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
## 0.53 0.47 0.34 0.36 0.22 0.28 0.40 0.33 0.29 0.35 0.34 0.52 0.38 0.52 0.34
## 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48
## 0.46 0.49 0.39 0.37 0.53 0.52 0.55 0.53 0.59 0.56 0.73 0.98 0.87 0.82 1.12
## 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63
## 1.18 1.18 1.29 1.65 1.65 1.75 2.25 2.19 2.60 2.83 2.94 2.93 2.90 3.59 3.59
## 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78
## 3.89 3.67 3.61 3.39 3.38 2.61 2.46 2.33 2.30 2.07 2.01 1.83 1.30 1.01 1.04
## 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
## 0.62 0.64 0.42 0.40 0.28 0.28 0.19 0.22 0.16 0.10 0.05 0.07 0.02 0.02 0.01
## 94 95 96 97
## 0.01 0.01 0.01 0.01
#Frequency table and proportion table for Short Passing
table(ShortPassing)
## ShortPassing
## 7 8 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
## 2 1 7 17 12 14 43 34 47 52 57 64 60 79 121 130 116 102
## 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
## 119 121 133 95 85 96 85 72 81 73 60 81 67 93 89 95 89 108
## 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
## 130 117 123 180 167 154 194 272 280 335 377 389 420 530 502 578 502 699
## 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
## 664 798 730 658 634 665 490 526 405 434 356 347 278 235 183 177 122 102
## 81 82 83 84 85 86 87 88 89 90 91 92 93
## 62 64 43 46 26 18 5 6 8 7 1 2 2
propShortPassing <- round(prop.table(table(ShortPassing))*100,2)
propShortPassing
## ShortPassing
## 7 8 11 12 13 14 15 16 17 18 19 20 21 22 23
## 0.01 0.01 0.04 0.10 0.07 0.08 0.26 0.20 0.28 0.31 0.34 0.38 0.36 0.47 0.73
## 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
## 0.78 0.70 0.61 0.72 0.73 0.80 0.57 0.51 0.58 0.51 0.43 0.49 0.44 0.36 0.49
## 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
## 0.40 0.56 0.53 0.57 0.53 0.65 0.78 0.70 0.74 1.08 1.00 0.93 1.17 1.63 1.68
## 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68
## 2.01 2.27 2.34 2.52 3.18 3.02 3.47 3.02 4.20 3.99 4.79 4.39 3.95 3.81 4.00
## 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83
## 2.94 3.16 2.43 2.61 2.14 2.08 1.67 1.41 1.10 1.06 0.73 0.61 0.37 0.38 0.26
## 84 85 86 87 88 89 90 91 92 93
## 0.28 0.16 0.11 0.03 0.04 0.05 0.04 0.01 0.01 0.01
#Frequency table and proportion table for Positions
Position.modified <- factor(Position)
levels(Position)
## [1] "defence" "Forward" "goal keeper" "Midfield"
propPosition <- round(prop.table(table(Position))*100,2)
propPosition
## Position
## defence Forward goal keeper Midfield
## 31.71 23.81 11.42 33.06
#Frequency table and proportion table for Value of Player
table(Value)
## Value
## 10000 20000 30000 40000 50000 60000 70000
## 15 21 23 64 125 148 139
## 80000 90000 100000 110000 120000 130000 140000
## 113 136 169 172 186 194 194
## 150000 160000 170000 180000 190000 200000 210000
## 168 196 164 193 140 166 135
## 220000 230000 240000 250000 260000 270000 280000
## 156 132 134 145 148 153 117
## 290000 300000 325000 350000 375000 400000 425000
## 125 223 323 305 354 304 330
## 450000 475000 500000 525000 550000 575000 600000
## 311 279 279 309 262 243 280
## 625000 650000 675000 700000 725000 750000 775000
## 219 207 255 191 221 203 165
## 800000 825000 850000 875000 900000 925000 950000
## 147 182 163 157 173 123 149
## 975000 1000000 1100000 1200000 1300000 1400000 1500000
## 118 284 382 289 222 201 114
## 1600000 1700000 1800000 1900000 2000000 2100000 2200000
## 138 107 175 101 89 95 97
## 2300000 2400000 2500000 2600000 2700000 2800000 2900000
## 105 137 107 65 88 84 56
## 3000000 3100000 3200000 3300000 3400000 3500000 3600000
## 84 75 118 53 56 44 55
## 3700000 3800000 3900000 4000000 4100000 4200000 4300000
## 47 60 73 60 41 62 47
## 4400000 4500000 4600000 4700000 4800000 4900000 5000000
## 56 45 25 36 38 41 112
## 5500000 6000000 6500000 7000000 7500000 8000000 8500000
## 171 147 152 152 114 119 110
## 9000000 9500000 10000000 10500000 11000000 11500000 12000000
## 102 65 77 64 36 40 58
## 12500000 13000000 13500000 14000000 14500000 15000000 15500000
## 44 37 20 36 24 21 29
## 16000000 16500000 17000000 17500000 18000000 18500000 19000000
## 20 12 14 21 17 13 9
## 19500000 20000000 20500000 21000000 21500000 22000000 22500000
## 11 16 5 18 10 9 10
## 23000000 23500000 24000000 24500000 25000000 25500000 26000000
## 3 4 10 8 7 6 12
## 26500000 27000000 27500000 28000000 28500000 29000000 29500000
## 8 7 4 1 7 5 7
## 30000000 30500000 31000000 31500000 32000000 32500000 33000000
## 8 8 2 5 4 3 4
## 34000000 34500000 35000000 35500000 36000000 36500000 37000000
## 10 2 1 3 1 3 3
## 37500000 38000000 38500000 39000000 40000000 40500000 41000000
## 3 4 1 3 1 3 1
## 41500000 42000000 42500000 43000000 43500000 44000000 44500000
## 1 1 2 1 2 2 1
## 45000000 45500000 46000000 46500000 50000000 50500000 51000000
## 4 1 1 4 2 2 2
## 51500000 52000000 53000000 53500000 54000000 55000000 56500000
## 1 2 1 1 1 1 1
## 57000000 58000000 59000000 59500000 60000000 61000000 62000000
## 1 1 1 2 3 1 1
## 62500000 63000000 64000000 64500000 67000000 68000000 69500000
## 1 1 1 2 1 1 2
## 72000000 73500000 76500000 77000000 78000000 80000000 81000000
## 1 2 1 2 1 1 1
## 83500000 89000000 93000000 102000000 110500000 118500000
## 1 1 1 1 1 1
propValue <- round(prop.table(table(Value))*100,2)
propValue
## Value
## 10000 20000 30000 40000 50000 60000 70000
## 0.09 0.13 0.14 0.38 0.75 0.89 0.84
## 80000 90000 100000 110000 120000 130000 140000
## 0.68 0.82 1.02 1.03 1.12 1.17 1.17
## 150000 160000 170000 180000 190000 200000 210000
## 1.01 1.18 0.99 1.16 0.84 1.00 0.81
## 220000 230000 240000 250000 260000 270000 280000
## 0.94 0.79 0.81 0.87 0.89 0.92 0.70
## 290000 300000 325000 350000 375000 400000 425000
## 0.75 1.34 1.94 1.83 2.13 1.83 1.98
## 450000 475000 500000 525000 550000 575000 600000
## 1.87 1.68 1.68 1.86 1.57 1.46 1.68
## 625000 650000 675000 700000 725000 750000 775000
## 1.32 1.24 1.53 1.15 1.33 1.22 0.99
## 800000 825000 850000 875000 900000 925000 950000
## 0.88 1.09 0.98 0.94 1.04 0.74 0.90
## 975000 1000000 1100000 1200000 1300000 1400000 1500000
## 0.71 1.71 2.30 1.74 1.33 1.21 0.68
## 1600000 1700000 1800000 1900000 2000000 2100000 2200000
## 0.83 0.64 1.05 0.61 0.53 0.57 0.58
## 2300000 2400000 2500000 2600000 2700000 2800000 2900000
## 0.63 0.82 0.64 0.39 0.53 0.50 0.34
## 3000000 3100000 3200000 3300000 3400000 3500000 3600000
## 0.50 0.45 0.71 0.32 0.34 0.26 0.33
## 3700000 3800000 3900000 4000000 4100000 4200000 4300000
## 0.28 0.36 0.44 0.36 0.25 0.37 0.28
## 4400000 4500000 4600000 4700000 4800000 4900000 5000000
## 0.34 0.27 0.15 0.22 0.23 0.25 0.67
## 5500000 6000000 6500000 7000000 7500000 8000000 8500000
## 1.03 0.88 0.91 0.91 0.68 0.72 0.66
## 9000000 9500000 10000000 10500000 11000000 11500000 12000000
## 0.61 0.39 0.46 0.38 0.22 0.24 0.35
## 12500000 13000000 13500000 14000000 14500000 15000000 15500000
## 0.26 0.22 0.12 0.22 0.14 0.13 0.17
## 16000000 16500000 17000000 17500000 18000000 18500000 19000000
## 0.12 0.07 0.08 0.13 0.10 0.08 0.05
## 19500000 20000000 20500000 21000000 21500000 22000000 22500000
## 0.07 0.10 0.03 0.11 0.06 0.05 0.06
## 23000000 23500000 24000000 24500000 25000000 25500000 26000000
## 0.02 0.02 0.06 0.05 0.04 0.04 0.07
## 26500000 27000000 27500000 28000000 28500000 29000000 29500000
## 0.05 0.04 0.02 0.01 0.04 0.03 0.04
## 30000000 30500000 31000000 31500000 32000000 32500000 33000000
## 0.05 0.05 0.01 0.03 0.02 0.02 0.02
## 34000000 34500000 35000000 35500000 36000000 36500000 37000000
## 0.06 0.01 0.01 0.02 0.01 0.02 0.02
## 37500000 38000000 38500000 39000000 40000000 40500000 41000000
## 0.02 0.02 0.01 0.02 0.01 0.02 0.01
## 41500000 42000000 42500000 43000000 43500000 44000000 44500000
## 0.01 0.01 0.01 0.01 0.01 0.01 0.01
## 45000000 45500000 46000000 46500000 50000000 50500000 51000000
## 0.02 0.01 0.01 0.02 0.01 0.01 0.01
## 51500000 52000000 53000000 53500000 54000000 55000000 56500000
## 0.01 0.01 0.01 0.01 0.01 0.01 0.01
## 57000000 58000000 59000000 59500000 60000000 61000000 62000000
## 0.01 0.01 0.01 0.01 0.02 0.01 0.01
## 62500000 63000000 64000000 64500000 67000000 68000000 69500000
## 0.01 0.01 0.01 0.01 0.01 0.01 0.01
## 72000000 73500000 76500000 77000000 78000000 80000000 81000000
## 0.01 0.01 0.01 0.01 0.01 0.01 0.01
## 83500000 89000000 93000000 102000000 110500000 118500000
## 0.01 0.01 0.01 0.01 0.01 0.01
#Bar Chart for Age
plot(Age,xlab = "Age", ylab = "Count", main="Bar Chart for Age")
#Bar Chart for Position
plot(football.df$Position, xlab= "Position", ylab= "Count", main = "Bar Chart for Position")
#Histogram for Wage
hist(Wage, xlab = "Wage", ylab = "Frequency", main = "Histogram of wage")
#Histogram for Overall Rating
hist(Overall, xlab="Overall Rating", ylab = "Frequency", main = "Histogram of Overall Rating")
#Histogram for Ball Control
hist(BallControl, xlab = "Ball Control", ylab = "Frequency", main = "Histogram of Ball Control")
#Histogram for Stamina
hist(Stamina, xlab = "Stamina", ylab="Frequency", main = "Histogram of Stamina")
#Histogram for Agility
hist(Agility, xlab = "Agility", ylab="Frequency", main = "Histogram of Agility")
#Histogram for Value
hist(Value,main = "Histogram of value",
xlab = "Value", ylab = "Count")
#Histogram for Short Passing
hist(football.df$ShortPassing,main = "Histogram of short passing",
xlab = "Shortpassing",ylab = "Count")
#Histogram for Dribbling
hist(football.df$Dribbling,main = "Histogram of Dribbling",
xlab = "Dribbling",ylab = "Count")
hist(football.df$StandingTackle,main = "Histogram of StandingTackle",
xlab = "StandingTackle",ylab = "Count")
#Box Plot for Overall Rating
boxplot(Overall,width = 0.5,
horizontal = TRUE,main = "Boxplot for Overall Rating",
xlab = "Overall rating",col = c("lightblue"))
#Box Plot for Wage
boxplot(Wage,width = 0.5,
horizontal = TRUE,main = "Boxplot for Wage",
xlab = "Wage",col = c("lightblue"))
#Box Plot for Ball Control
boxplot(BallControl,width = 0.5,
horizontal = TRUE,main = "Boxplot for Ball Control",
xlab = "Ball Control",col = c("lightblue"))
#Box Plot for Stamina
boxplot(Stamina,width = 0.5,
horizontal = TRUE,main = "Boxplot for Stamina",
xlab = "Stamina",col = c("lightblue"))
#Box Plot for Agility
boxplot(Agility, width = 0.5,
horizontal = TRUE, main = "Boxplot for Agility",
xlab = "Agility", col = c("lightblue"))
#Box Plot for Value
boxplot(football.df$Value,width = 0.5,
horizontal = TRUE,main = "Boxplot for value",
xlab = "Value",col = c("lightblue"))
#Box Plot for Short Passing
boxplot(football.df$ShortPassing,width = 0.5,
horizontal = TRUE,main = "Boxplot for short passing",
xlab = "short passing",col = c("lightblue"))
#Box Plot for Dribbling
boxplot(football.df$Dribbling,width = 0.5,
horizontal = TRUE,main = "Boxplot for Dribbling",
xlab = "Dribbling",col = c("lightblue"))
#Box Plot for Standing Tackle
boxplot(football.df$StandingTackle,width = 0.5,
horizontal = TRUE,main = "Boxplot for Standing Tackle",
xlab = "Standing tacle",col = c("lightblue"))
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.1
##
## Attaching package: 'ggplot2'
## The following object is masked from 'football.df':
##
## Position
#Frequency Polygon for Wage
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = Wage))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Frequency Polygon for Ball Control
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = BallControl))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Frequency Polygon for Overall Rating
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = Overall))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Frequency Polygon for Agility
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = Agility))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Frequency Polygon for Stamina
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = Stamina))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Frequency Polygon for Value
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = Value))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Frequency Polygon for Short Passing
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = ShortPassing))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Frequency Polygon for Dribbling
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = Dribbling))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Frequency Polygon for Standing Tackle
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = StandingTackle))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#Pie Chart for Age
pie(table(Age), main="Pie Chart of Age")
#Pie Chart for Position
tab <- table(Position.modified)
# saving table as dataframe
tab.df <- as.data.frame(tab)
# storing counts into a variable x
x <- tab.df$Freq
# defining the lables
labels <- c("Defense", "Forward", "Goal Keeper", "Midfield")
# Plotting Pie chart.
pie(x,labels, main = "Pie Chart of Position Modified ")
library(psych)
## Warning: package 'psych' was built under R version 3.6.1
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
#Describing Variables
Position <- factor(Position.modified)
describe(football.df[,c(4:15)])
## vars n mean sd median trimmed
## Age 1 16643 25.23 4.72 25 25.03
## Overall 2 16643 66.16 7.01 66 66.14
## Value 3 16643 2442666.59 5720628.56 675000 1215337.59
## Wage 4 16643 9618.04 22263.52 3000 5007.66
## Position* 5 16643 2.46 1.24 2 2.45
## ShortPassing 6 16643 58.54 14.81 62 60.38
## Dribbling 7 16643 55.10 19.01 61 57.52
## BallControl 8 16643 58.14 16.79 63 60.57
## Agility 9 16643 63.38 14.81 66 64.40
## Stamina 10 16643 63.16 16.06 66 64.82
## StandingTackle 11 16643 47.78 21.68 55 48.62
## Release.Clause 12 16643 4585060.99 11118717.76 1100000 2177437.93
## mad min max range skew kurtosis
## Age 5.93 16 45 29 0.34 -0.56
## Overall 7.41 46 94 48 0.08 0.08
## Value 689409.00 10000 118500000 118490000 7.00 74.82
## Wage 2965.20 1000 565000 564000 7.97 101.38
## Position* 1.48 1 4 3 0.12 -1.61
## ShortPassing 10.38 7 93 86 -1.08 0.73
## Dribbling 13.34 4 97 93 -1.07 0.28
## BallControl 10.38 5 96 91 -1.25 0.97
## Agility 13.34 14 96 82 -0.59 -0.08
## Stamina 13.34 12 96 84 -0.91 0.43
## StandingTackle 23.72 2 93 91 -0.35 -1.30
## Release.Clause 1149015.00 13000 228100000 228087000 7.11 77.11
## se
## Age 0.04
## Overall 0.05
## Value 44343.29
## Wage 172.58
## Position* 0.01
## ShortPassing 0.11
## Dribbling 0.15
## BallControl 0.13
## Agility 0.11
## Stamina 0.12
## StandingTackle 0.17
## Release.Clause 86186.43
#Applying Mean to all Varibles
sapply(football.df[c(4:7,9:14)], mean)
## Age Overall Value Wage ShortPassing
## 2.522622e+01 6.616277e+01 2.442667e+06 9.618038e+03 5.854395e+01
## Dribbling BallControl Agility Stamina StandingTackle
## 5.510473e+01 5.813627e+01 6.338070e+01 6.316001e+01 4.777630e+01
#Aggregating using Position
aggregate(football.df[,c(4:15)], by = list(Position),mean)
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Group.1 Age Overall Value Wage Position ShortPassing
## 1 defence 25.58621 66.40337 2018687 9214.665 NA 59.16105
## 2 Forward 24.78975 66.41949 3056410 11207.219 NA 61.43362
## 3 goal keeper 26.09632 64.46316 1597268 6726.842 NA 27.22526
## 4 Midfield 24.89478 66.33400 2699321 9858.986 NA 66.68490
## Dribbling BallControl Agility Stamina StandingTackle Release.Clause
## 1 52.10269 57.53278 60.02539 67.98352 66.77283 3789796
## 2 65.62670 65.94043 69.86320 64.23725 30.89197 5701532
## 3 13.81474 19.91263 40.22211 30.36053 14.10947 2997590
## 4 64.66455 66.29366 69.92749 69.08268 53.33673 5092084
df = data.table::fread("data_clean.csv")
colnames(df)
## [1] "V1" "ID" "Name" "Age"
## [5] "Overall" "Value" "Wage" "Position"
## [9] "ShortPassing" "Dribbling" "BallControl" "Agility"
## [13] "Stamina" "StandingTackle" "Release Clause"
attach(df)
## The following object is masked _by_ .GlobalEnv:
##
## Position
## The following object is masked from package:ggplot2:
##
## Position
## The following objects are masked from football.df:
##
## Age, Agility, BallControl, Dribbling, ID, Name, Overall,
## Position, ShortPassing, Stamina, StandingTackle, Value, Wage
# plotting scatter plot
plot(Age,`Release Clause`,xlab="Age", ylab="Release clause")
# plotting scatter plot
plot(Wage,`Release Clause`,xlab="Wage", ylab="Release clause")
# plotting scatter plot
plot(Overall,`Release Clause`,xlab="Overall Rating", ylab="Release clause")
# plotting scatter plot
plot(BallControl,`Release Clause`,xlab="ball control", ylab="Release clause")
# plotting scatter plot
plot(Agility,`Release Clause`,xlab="Agility", ylab="Release clause")
# plotting scatter plot
plot(Stamina,`Release Clause`,xlab="stamina", ylab="Release clause")
# plotting scatter plot
plot(StandingTackle,`Release Clause`,xlab="StandingTackle", ylab="Release clause")
# plotting scatter plot
plot(Value,`Release Clause`,xlab=" Market value", ylab="Release clause")
# plotting scatter plot
plot(ShortPassing,`Release Clause`,xlab="Short Passing", ylab="Release clause")
# plotting scatter plot
plot(Dribbling,`Release Clause`,xlab="Dribling", ylab="Release clause")
#Box plot of Postion of player and Release clause
boxplot(`Release Clause` ~ `Position`,
main = "Boxplot for Variable Release clause grouped by position",
col=(c("white","red","gray","lightblue")))
psych::describe(df)
## Warning in psych::describe(df): NAs introduced by coercion
## Warning in psych::describe(df): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf
## vars n mean sd median trimmed
## V1 1 16643 9168.65 5297.15 9207 9182.53
## ID 2 16643 213845.01 30546.29 221493 218014.15
## Name* 3 16643 NaN NA NA NaN
## Age 4 16643 25.23 4.72 25 25.03
## Overall 5 16643 66.16 7.01 66 66.14
## Value 6 16643 2442666.59 5720628.56 675000 1215337.59
## Wage 7 16643 9618.04 22263.52 3000 5007.66
## Position* 8 16643 NaN NA NA NaN
## ShortPassing 9 16643 58.54 14.81 62 60.38
## Dribbling 10 16643 55.10 19.01 61 57.52
## BallControl 11 16643 58.14 16.79 63 60.57
## Agility 12 16643 63.38 14.81 66 64.40
## Stamina 13 16643 63.16 16.06 66 64.82
## StandingTackle 14 16643 47.78 21.68 55 48.62
## Release Clause 15 16643 4585060.99 11118717.76 1100000 2177437.93
## mad min max range skew kurtosis
## V1 6839.23 0 18206 18206 -0.02 -1.21
## ID 26408.07 16 246620 246604 -2.23 9.21
## Name* NA Inf -Inf -Inf NA NA
## Age 5.93 16 45 29 0.34 -0.56
## Overall 7.41 46 94 48 0.08 0.08
## Value 689409.00 10000 118500000 118490000 7.00 74.82
## Wage 2965.20 1000 565000 564000 7.97 101.38
## Position* NA Inf -Inf -Inf NA NA
## ShortPassing 10.38 7 93 86 -1.08 0.73
## Dribbling 13.34 4 97 93 -1.07 0.28
## BallControl 10.38 5 96 91 -1.25 0.97
## Agility 13.34 14 96 82 -0.59 -0.08
## Stamina 13.34 12 96 84 -0.91 0.43
## StandingTackle 23.72 2 93 91 -0.35 -1.30
## Release Clause 1149015.00 13000 228100000 228087000 7.11 77.11
## se
## V1 41.06
## ID 236.78
## Name* NA
## Age 0.04
## Overall 0.05
## Value 44343.29
## Wage 172.58
## Position* NA
## ShortPassing 0.11
## Dribbling 0.15
## BallControl 0.13
## Agility 0.11
## Stamina 0.12
## StandingTackle 0.17
## Release Clause 86186.43
# taking a subset of continuous variables
Subset.df <- df[,c('Wage','Age','Overall','BallControl','Stamina','Agility','StandingTackle','Value','ShortPassing','Dribbling')]
# correlation matrix on new dataframe airlineSubset.df
corMat <- cor(Subset.df, use = "complete")
# round off upto 2 decimal places
round(corMat, 3)
## Wage Age Overall BallControl Stamina Agility
## Wage 1.000 0.149 0.574 0.276 0.181 0.155
## Age 0.149 1.000 0.465 0.096 0.104 -0.013
## Overall 0.574 0.465 1.000 0.463 0.371 0.268
## BallControl 0.276 0.096 0.463 1.000 0.734 0.705
## Stamina 0.181 0.104 0.371 0.734 1.000 0.573
## Agility 0.155 -0.013 0.268 0.705 0.573 1.000
## StandingTackle 0.134 0.122 0.262 0.432 0.578 0.139
## Value 0.862 0.076 0.629 0.310 0.214 0.196
## ShortPassing 0.297 0.142 0.505 0.913 0.721 0.614
## Dribbling 0.234 0.019 0.374 0.939 0.692 0.765
## StandingTackle Value ShortPassing Dribbling
## Wage 0.134 0.862 0.297 0.234
## Age 0.122 0.076 0.142 0.019
## Overall 0.262 0.629 0.505 0.374
## BallControl 0.432 0.310 0.913 0.939
## Stamina 0.578 0.214 0.721 0.692
## Agility 0.139 0.196 0.614 0.765
## StandingTackle 1.000 0.113 0.551 0.315
## Value 0.113 1.000 0.329 0.272
## ShortPassing 0.551 0.329 1.000 0.845
## Dribbling 0.315 0.272 0.845 1.000
# correlation significance value
library(psych)
corr.test(Subset.df, use = "complete")
## Call:corr.test(x = Subset.df, use = "complete")
## Correlation matrix
## Wage Age Overall BallControl Stamina Agility
## Wage 1.00 0.15 0.57 0.28 0.18 0.15
## Age 0.15 1.00 0.46 0.10 0.10 -0.01
## Overall 0.57 0.46 1.00 0.46 0.37 0.27
## BallControl 0.28 0.10 0.46 1.00 0.73 0.71
## Stamina 0.18 0.10 0.37 0.73 1.00 0.57
## Agility 0.15 -0.01 0.27 0.71 0.57 1.00
## StandingTackle 0.13 0.12 0.26 0.43 0.58 0.14
## Value 0.86 0.08 0.63 0.31 0.21 0.20
## ShortPassing 0.30 0.14 0.51 0.91 0.72 0.61
## Dribbling 0.23 0.02 0.37 0.94 0.69 0.77
## StandingTackle Value ShortPassing Dribbling
## Wage 0.13 0.86 0.30 0.23
## Age 0.12 0.08 0.14 0.02
## Overall 0.26 0.63 0.51 0.37
## BallControl 0.43 0.31 0.91 0.94
## Stamina 0.58 0.21 0.72 0.69
## Agility 0.14 0.20 0.61 0.77
## StandingTackle 1.00 0.11 0.55 0.31
## Value 0.11 1.00 0.33 0.27
## ShortPassing 0.55 0.33 1.00 0.85
## Dribbling 0.31 0.27 0.85 1.00
## Sample Size
## [1] 16643
## Probability values (Entries above the diagonal are adjusted for multiple tests.)
## Wage Age Overall BallControl Stamina Agility
## Wage 0 0.00 0 0 0 0.0
## Age 0 0.00 0 0 0 0.1
## Overall 0 0.00 0 0 0 0.0
## BallControl 0 0.00 0 0 0 0.0
## Stamina 0 0.00 0 0 0 0.0
## Agility 0 0.10 0 0 0 0.0
## StandingTackle 0 0.00 0 0 0 0.0
## Value 0 0.00 0 0 0 0.0
## ShortPassing 0 0.00 0 0 0 0.0
## Dribbling 0 0.01 0 0 0 0.0
## StandingTackle Value ShortPassing Dribbling
## Wage 0 0 0 0.00
## Age 0 0 0 0.03
## Overall 0 0 0 0.00
## BallControl 0 0 0 0.00
## Stamina 0 0 0 0.00
## Agility 0 0 0 0.00
## StandingTackle 0 0 0 0.00
## Value 0 0 0 0.00
## ShortPassing 0 0 0 0.00
## Dribbling 0 0 0 0.00
##
## To see confidence intervals of the correlations, print with the short=FALSE option
# correlogram
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.6.1
## corrplot 0.84 loaded
corrplot(cor(Subset.df), method = "circle")
# correlogram with correlation coefficient
corrplot(cor(Subset.df), method = "number")
psych::describe(df)
## Warning in psych::describe(df): NAs introduced by coercion
## Warning in psych::describe(df): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf
## vars n mean sd median trimmed
## V1 1 16643 9168.65 5297.15 9207 9182.53
## ID 2 16643 213845.01 30546.29 221493 218014.15
## Name* 3 16643 NaN NA NA NaN
## Age 4 16643 25.23 4.72 25 25.03
## Overall 5 16643 66.16 7.01 66 66.14
## Value 6 16643 2442666.59 5720628.56 675000 1215337.59
## Wage 7 16643 9618.04 22263.52 3000 5007.66
## Position* 8 16643 NaN NA NA NaN
## ShortPassing 9 16643 58.54 14.81 62 60.38
## Dribbling 10 16643 55.10 19.01 61 57.52
## BallControl 11 16643 58.14 16.79 63 60.57
## Agility 12 16643 63.38 14.81 66 64.40
## Stamina 13 16643 63.16 16.06 66 64.82
## StandingTackle 14 16643 47.78 21.68 55 48.62
## Release Clause 15 16643 4585060.99 11118717.76 1100000 2177437.93
## mad min max range skew kurtosis
## V1 6839.23 0 18206 18206 -0.02 -1.21
## ID 26408.07 16 246620 246604 -2.23 9.21
## Name* NA Inf -Inf -Inf NA NA
## Age 5.93 16 45 29 0.34 -0.56
## Overall 7.41 46 94 48 0.08 0.08
## Value 689409.00 10000 118500000 118490000 7.00 74.82
## Wage 2965.20 1000 565000 564000 7.97 101.38
## Position* NA Inf -Inf -Inf NA NA
## ShortPassing 10.38 7 93 86 -1.08 0.73
## Dribbling 13.34 4 97 93 -1.07 0.28
## BallControl 10.38 5 96 91 -1.25 0.97
## Agility 13.34 14 96 82 -0.59 -0.08
## Stamina 13.34 12 96 84 -0.91 0.43
## StandingTackle 23.72 2 93 91 -0.35 -1.30
## Release Clause 1149015.00 13000 228100000 228087000 7.11 77.11
## se
## V1 41.06
## ID 236.78
## Name* NA
## Age 0.04
## Overall 0.05
## Value 44343.29
## Wage 172.58
## Position* NA
## ShortPassing 0.11
## Dribbling 0.15
## BallControl 0.13
## Agility 0.11
## Stamina 0.12
## StandingTackle 0.17
## Release Clause 86186.43
model2 <- lm(`Release Clause` ~ Age
+ Overall
+ Value
+ Wage
+ Position
+ ShortPassing
+ Dribbling
+ BallControl
+ Agility
+ Stamina
+ StandingTackle,
data = df)
summary(model2)
##
## Call:
## lm(formula = `Release Clause` ~ Age + Overall + Value + Wage +
## Position + ShortPassing + Dribbling + BallControl + Agility +
## Stamina + StandingTackle, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -21870952 -185410 -13073 219380 16897522
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.440e+06 1.165e+05 12.360 < 2e-16 ***
## Age -3.136e+04 2.344e+03 -13.381 < 2e-16 ***
## Overall -8.248e+03 2.644e+03 -3.120 0.00181 **
## Value 1.941e+00 3.469e-03 559.528 < 2e-16 ***
## Wage 4.806e-01 8.133e-01 0.591 0.55460
## PositionForward -9.013e+04 4.038e+04 -2.232 0.02565 *
## Positiongoal keeper -3.128e+04 7.953e+04 -0.393 0.69409
## PositionMidfield -2.061e+04 2.962e+04 -0.696 0.48660
## ShortPassing 2.719e+02 1.769e+03 0.154 0.87787
## Dribbling 1.174e+03 1.612e+03 0.728 0.46651
## BallControl 1.014e+03 2.343e+03 0.433 0.66512
## Agility -1.095e+03 9.934e+02 -1.103 0.27017
## Stamina -6.389e+03 9.964e+02 -6.412 1.48e-10 ***
## StandingTackle 2.127e+03 8.944e+02 2.378 0.01740 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1164000 on 16629 degrees of freedom
## Multiple R-squared: 0.989, Adjusted R-squared: 0.989
## F-statistic: 1.155e+05 on 13 and 16629 DF, p-value: < 2.2e-16