Reading Data

football.df <- read.csv('data_clean.csv')
attach(football.df)

Structure of the data

str(df)
## function (x, df1, df2, ncp, log = FALSE)

1.Univariate Analysis

1.1 Frequency Dustribution Tables

#Frequency table and proportion table for Wage
table(Wage)
## Wage
##   1000   2000   3000   4000   5000   6000   7000   8000   9000  10000 
##   4770   2704   1734   1138    763    614    430    386    287    270 
##  11000  12000  13000  14000  15000  16000  17000  18000  19000  20000 
##    256    215    196    167    173    120    143    133    115    120 
##  21000  22000  23000  24000  25000  26000  27000  28000  29000  30000 
##     99    115     81     91     81     82     55     53     53     50 
##  31000  32000  33000  34000  35000  36000  37000  38000  39000  40000 
##     50     49     30     42     43     28     23     35     36     29 
##  41000  42000  43000  44000  45000  46000  47000  48000  49000  50000 
##     37     23     33     33     22     37     19     23     14     20 
##  51000  52000  53000  54000  55000  56000  57000  58000  59000  60000 
##     23      8     23     17     15     13      8     11     11      8 
##  61000  62000  63000  64000  65000  66000  67000  68000  69000  70000 
##     10      5      5     11     11      8      6      7      3      4 
##  71000  72000  73000  74000  75000  76000  77000  78000  79000  80000 
##      2      3      8      7      5     10      8      4      4      5 
##  81000  82000  83000  84000  85000  86000  87000  88000  89000  90000 
##      4      5      2      6      8      3      3      5      3      3 
##  91000  92000  93000  94000  95000  96000  97000  98000  99000 100000 
##      7      5      1      7      4      4      2      4      4      3 
## 105000 110000 115000 120000 125000 130000 135000 140000 145000 150000 
##     11     15     18     10     13     10      7      9      3      3 
## 155000 160000 165000 170000 175000 180000 185000 190000 195000 200000 
##      3      7      7      3      4      4      2      1      5      1 
## 205000 210000 215000 225000 230000 240000 250000 255000 260000 265000 
##      8      2      3      1      1      4      1      1      2      1 
## 285000 290000 300000 315000 340000 355000 380000 405000 420000 455000 
##      3      1      1      2      2      3      1      1      1      1 
## 565000 
##      1
propWage <- round(prop.table(table(Wage))*100,2)
propWage
## Wage
##   1000   2000   3000   4000   5000   6000   7000   8000   9000  10000 
##  28.66  16.25  10.42   6.84   4.58   3.69   2.58   2.32   1.72   1.62 
##  11000  12000  13000  14000  15000  16000  17000  18000  19000  20000 
##   1.54   1.29   1.18   1.00   1.04   0.72   0.86   0.80   0.69   0.72 
##  21000  22000  23000  24000  25000  26000  27000  28000  29000  30000 
##   0.59   0.69   0.49   0.55   0.49   0.49   0.33   0.32   0.32   0.30 
##  31000  32000  33000  34000  35000  36000  37000  38000  39000  40000 
##   0.30   0.29   0.18   0.25   0.26   0.17   0.14   0.21   0.22   0.17 
##  41000  42000  43000  44000  45000  46000  47000  48000  49000  50000 
##   0.22   0.14   0.20   0.20   0.13   0.22   0.11   0.14   0.08   0.12 
##  51000  52000  53000  54000  55000  56000  57000  58000  59000  60000 
##   0.14   0.05   0.14   0.10   0.09   0.08   0.05   0.07   0.07   0.05 
##  61000  62000  63000  64000  65000  66000  67000  68000  69000  70000 
##   0.06   0.03   0.03   0.07   0.07   0.05   0.04   0.04   0.02   0.02 
##  71000  72000  73000  74000  75000  76000  77000  78000  79000  80000 
##   0.01   0.02   0.05   0.04   0.03   0.06   0.05   0.02   0.02   0.03 
##  81000  82000  83000  84000  85000  86000  87000  88000  89000  90000 
##   0.02   0.03   0.01   0.04   0.05   0.02   0.02   0.03   0.02   0.02 
##  91000  92000  93000  94000  95000  96000  97000  98000  99000 100000 
##   0.04   0.03   0.01   0.04   0.02   0.02   0.01   0.02   0.02   0.02 
## 105000 110000 115000 120000 125000 130000 135000 140000 145000 150000 
##   0.07   0.09   0.11   0.06   0.08   0.06   0.04   0.05   0.02   0.02 
## 155000 160000 165000 170000 175000 180000 185000 190000 195000 200000 
##   0.02   0.04   0.04   0.02   0.02   0.02   0.01   0.01   0.03   0.01 
## 205000 210000 215000 225000 230000 240000 250000 255000 260000 265000 
##   0.05   0.01   0.02   0.01   0.01   0.02   0.01   0.01   0.01   0.01 
## 285000 290000 300000 315000 340000 355000 380000 405000 420000 455000 
##   0.02   0.01   0.01   0.01   0.01   0.02   0.01   0.01   0.01   0.01 
## 565000 
##   0.01
#Frequency table and proportion table for Age
table(Age)
## Age
##   16   17   18   19   20   21   22   23   24   25   26   27   28   29   30 
##   41  284  701  926 1091 1220 1163 1189 1206 1196 1272 1078 1027  897  877 
##   31   32   33   34   35   36   37   38   39   40   41   42   45 
##  667  553  392  397  190  125   80   36   24    7    2    1    1
propAge <- round(prop.table(table(Age))*100,2)
propAge
## Age
##   16   17   18   19   20   21   22   23   24   25   26   27   28   29   30 
## 0.25 1.71 4.21 5.56 6.56 7.33 6.99 7.14 7.25 7.19 7.64 6.48 6.17 5.39 5.27 
##   31   32   33   34   35   36   37   38   39   40   41   42   45 
## 4.01 3.32 2.36 2.39 1.14 0.75 0.48 0.22 0.14 0.04 0.01 0.01 0.01
#Frequency table and proportion table for Overall Rating
table(Overall)
## Overall
##   46   47   48   49   50   51   52   53   54   55   56   57   58   59   60 
##    1   20   31   36  100  123  158  191  240  254  324  373  425  486  604 
##   61   62   63   64   65   66   67   68   69   70   71   72   73   74   75 
##  665  765  920 1014  932 1055 1019  926  865  800  707  672  544  474  446 
##   76   77   78   79   80   81   82   83   84   85   86   87   88   89   90 
##  340  280  153  162  134   90   94   68   45   32   22   13   15   11    5 
##   91   92   94 
##    6    1    2
propOverall <- round(prop.table(table(Overall))*100,2)
propOverall
## Overall
##   46   47   48   49   50   51   52   53   54   55   56   57   58   59   60 
## 0.01 0.12 0.19 0.22 0.60 0.74 0.95 1.15 1.44 1.53 1.95 2.24 2.55 2.92 3.63 
##   61   62   63   64   65   66   67   68   69   70   71   72   73   74   75 
## 4.00 4.60 5.53 6.09 5.60 6.34 6.12 5.56 5.20 4.81 4.25 4.04 3.27 2.85 2.68 
##   76   77   78   79   80   81   82   83   84   85   86   87   88   89   90 
## 2.04 1.68 0.92 0.97 0.81 0.54 0.56 0.41 0.27 0.19 0.13 0.08 0.09 0.07 0.03 
##   91   92   94 
## 0.04 0.01 0.01
#Frequency table and proportion table for Ball Control
table(BallControl)
## BallControl
##   5   8   9  10  11  12  13  14  15  16  17  18  19  20  21  22  23  24 
##   1   2   5  46  56  89  74  89 107 103 105 143 120 131 110 143 142 107 
##  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39  40  41  42 
##  83  38  31  41  41  55  41  46  52  37  55  34  32  50  49  64  66  74 
##  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60 
##  84  87 113  88 100 176 152 179 189 257 263 283 383 332 375 493 463 602 
##  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78 
## 553 678 660 704 725 658 636 642 522 570 421 441 389 395 322 230 194 180 
##  79  80  81  82  83  84  85  86  87  88  89  90  91  92  93  94  95  96 
## 121 133  79  65  69  47  38  27  14  15   8   9   5   4   2   3   2   1
propBallControl <- round(prop.table(table(BallControl))*100,2)
propBallControl
## BallControl
##    5    8    9   10   11   12   13   14   15   16   17   18   19   20   21 
## 0.01 0.01 0.03 0.28 0.34 0.53 0.44 0.53 0.64 0.62 0.63 0.86 0.72 0.79 0.66 
##   22   23   24   25   26   27   28   29   30   31   32   33   34   35   36 
## 0.86 0.85 0.64 0.50 0.23 0.19 0.25 0.25 0.33 0.25 0.28 0.31 0.22 0.33 0.20 
##   37   38   39   40   41   42   43   44   45   46   47   48   49   50   51 
## 0.19 0.30 0.29 0.38 0.40 0.44 0.50 0.52 0.68 0.53 0.60 1.06 0.91 1.08 1.14 
##   52   53   54   55   56   57   58   59   60   61   62   63   64   65   66 
## 1.54 1.58 1.70 2.30 1.99 2.25 2.96 2.78 3.62 3.32 4.07 3.97 4.23 4.36 3.95 
##   67   68   69   70   71   72   73   74   75   76   77   78   79   80   81 
## 3.82 3.86 3.14 3.42 2.53 2.65 2.34 2.37 1.93 1.38 1.17 1.08 0.73 0.80 0.47 
##   82   83   84   85   86   87   88   89   90   91   92   93   94   95   96 
## 0.39 0.41 0.28 0.23 0.16 0.08 0.09 0.05 0.05 0.03 0.02 0.01 0.02 0.01 0.01
#Frequency table and proportion table for Agility
table(Agility)
## Agility
##  14  15  18  19  20  21  22  23  24  25  26  27  28  29  30  31  32  33 
##   1   1   4   5   2  13  38  27  22  40  30  22  47  49  99 125 140 127 
##  34  35  36  37  38  39  40  41  42  43  44  45  46  47  48  49  50  51 
## 139 119 106 123 139 138 130 106 112 123 128 148 132 134 181 149 196 175 
##  52  53  54  55  56  57  58  59  60  61  62  63  64  65  66  67  68  69 
## 242 218 231 272 304 334 384 363 392 388 425 448 462 489 508 495 557 513 
##  70  71  72  73  74  75  76  77  78  79  80  81  82  83  84  85  86  87 
## 529 500 510 481 465 449 419 410 386 342 217 230 218 190 158 135 123 119 
##  88  89  90  91  92  93  94  95  96 
##  93  69 103  83  61  36  18   3   1
propAgility <- round(prop.table(table(Agility))*100,2)
propAgility
## Agility
##   14   15   18   19   20   21   22   23   24   25   26   27   28   29   30 
## 0.01 0.01 0.02 0.03 0.01 0.08 0.23 0.16 0.13 0.24 0.18 0.13 0.28 0.29 0.59 
##   31   32   33   34   35   36   37   38   39   40   41   42   43   44   45 
## 0.75 0.84 0.76 0.84 0.72 0.64 0.74 0.84 0.83 0.78 0.64 0.67 0.74 0.77 0.89 
##   46   47   48   49   50   51   52   53   54   55   56   57   58   59   60 
## 0.79 0.81 1.09 0.90 1.18 1.05 1.45 1.31 1.39 1.63 1.83 2.01 2.31 2.18 2.36 
##   61   62   63   64   65   66   67   68   69   70   71   72   73   74   75 
## 2.33 2.55 2.69 2.78 2.94 3.05 2.97 3.35 3.08 3.18 3.00 3.06 2.89 2.79 2.70 
##   76   77   78   79   80   81   82   83   84   85   86   87   88   89   90 
## 2.52 2.46 2.32 2.05 1.30 1.38 1.31 1.14 0.95 0.81 0.74 0.72 0.56 0.41 0.62 
##   91   92   93   94   95   96 
## 0.50 0.37 0.22 0.11 0.02 0.01
#Frequency table and proportion table for Stamina
table(Stamina)
## Stamina
##  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26  27  28  29 
##   1   2   9   4  21  38  42  35  60  66  72  60  73  85  70  81 104  69 
##  30  31  32  33  34  35  36  37  38  39  40  41  42  43  44  45  46  47 
## 130  92 124 124 139  84  69  66  90  66  92  91  86  82  88 104  59  70 
##  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62  63  64  65 
##  96 105 137 176 223 229 285 340 215 204 307 322 379 374 419 420 493 534 
##  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80  81  82  83 
## 518 566 612 620 527 506 542 486 523 509 435 392 388 321 274 212 230 177 
##  84  85  86  87  88  89  90  91  92  93  94  95  96 
## 162 157 131 105  93  85 100  73  56  26   7   2   2
propStamina <- round(prop.table(table(Stamina))*100,2)
propStamina
## Stamina
##   12   13   14   15   16   17   18   19   20   21   22   23   24   25   26 
## 0.01 0.01 0.05 0.02 0.13 0.23 0.25 0.21 0.36 0.40 0.43 0.36 0.44 0.51 0.42 
##   27   28   29   30   31   32   33   34   35   36   37   38   39   40   41 
## 0.49 0.62 0.41 0.78 0.55 0.75 0.75 0.84 0.50 0.41 0.40 0.54 0.40 0.55 0.55 
##   42   43   44   45   46   47   48   49   50   51   52   53   54   55   56 
## 0.52 0.49 0.53 0.62 0.35 0.42 0.58 0.63 0.82 1.06 1.34 1.38 1.71 2.04 1.29 
##   57   58   59   60   61   62   63   64   65   66   67   68   69   70   71 
## 1.23 1.84 1.93 2.28 2.25 2.52 2.52 2.96 3.21 3.11 3.40 3.68 3.73 3.17 3.04 
##   72   73   74   75   76   77   78   79   80   81   82   83   84   85   86 
## 3.26 2.92 3.14 3.06 2.61 2.36 2.33 1.93 1.65 1.27 1.38 1.06 0.97 0.94 0.79 
##   87   88   89   90   91   92   93   94   95   96 
## 0.63 0.56 0.51 0.60 0.44 0.34 0.16 0.04 0.01 0.01
#Frequency table and proportion table for Standing Tackle
table(StandingTackle)
## StandingTackle
##   2   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20  21 
##   1   1   3   4  22  39 148 263 343 352 362 281 215 226 259 248 232 218 
##  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39 
## 183 196 183 182 152 162 185 148 180 178 168 140 147 162 157 123 143 145 
##  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57 
## 156 135 147 106 128 146 101  89 151 135 116 119 150 169 212 225 219 231 
##  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75 
## 312 318 331 332 452 483 559 538 575 438 481 361 396 272 334 285 249 225 
##  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92  93 
## 177 138 119  89  70  52  43  32  32  18  15  11   5   5   6   2   1   1
propstandingtackle <- round(prop.table(table(StandingTackle))*100,2)
propstandingtackle
## StandingTackle
##    2    5    6    7    8    9   10   11   12   13   14   15   16   17   18 
## 0.01 0.01 0.02 0.02 0.13 0.23 0.89 1.58 2.06 2.12 2.18 1.69 1.29 1.36 1.56 
##   19   20   21   22   23   24   25   26   27   28   29   30   31   32   33 
## 1.49 1.39 1.31 1.10 1.18 1.10 1.09 0.91 0.97 1.11 0.89 1.08 1.07 1.01 0.84 
##   34   35   36   37   38   39   40   41   42   43   44   45   46   47   48 
## 0.88 0.97 0.94 0.74 0.86 0.87 0.94 0.81 0.88 0.64 0.77 0.88 0.61 0.53 0.91 
##   49   50   51   52   53   54   55   56   57   58   59   60   61   62   63 
## 0.81 0.70 0.72 0.90 1.02 1.27 1.35 1.32 1.39 1.87 1.91 1.99 1.99 2.72 2.90 
##   64   65   66   67   68   69   70   71   72   73   74   75   76   77   78 
## 3.36 3.23 3.45 2.63 2.89 2.17 2.38 1.63 2.01 1.71 1.50 1.35 1.06 0.83 0.72 
##   79   80   81   82   83   84   85   86   87   88   89   90   91   92   93 
## 0.53 0.42 0.31 0.26 0.19 0.19 0.11 0.09 0.07 0.03 0.03 0.04 0.01 0.01 0.01
#Frequency table and proportion table for Dribbling
table(Dribbling)
## Dribbling
##   4   5   6   7   8   9  10  11  12  13  14  15  16  17  18  19  20  21 
##   4  22  36  59  70  72 110 175 223 212 209 134 133  94  82  89  78  57 
##  22  23  24  25  26  27  28  29  30  31  32  33  34  35  36  37  38  39 
##  60  37  46  67  55  48  59  57  87  64  87  57  77  82  65  62  89  87 
##  40  41  42  43  44  45  46  47  48  49  50  51  52  53  54  55  56  57 
##  91  89  99  93 122 163 145 137 187 197 196 214 275 275 292 374 364 432 
##  58  59  60  61  62  63  64  65  66  67  68  69  70  71  72  73  74  75 
## 471 489 488 482 598 598 647 611 601 564 562 434 410 388 382 345 335 305 
##  76  77  78  79  80  81  82  83  84  85  86  87  88  89  90  91  92  93 
## 217 168 173 104 106  70  67  47  46  32  37  27  17   8  12   4   3   1 
##  94  95  96  97 
##   1   1   1   1
propDribbling <- round(prop.table(table(Dribbling))*100,2)
propDribbling
## Dribbling
##    4    5    6    7    8    9   10   11   12   13   14   15   16   17   18 
## 0.02 0.13 0.22 0.35 0.42 0.43 0.66 1.05 1.34 1.27 1.26 0.81 0.80 0.56 0.49 
##   19   20   21   22   23   24   25   26   27   28   29   30   31   32   33 
## 0.53 0.47 0.34 0.36 0.22 0.28 0.40 0.33 0.29 0.35 0.34 0.52 0.38 0.52 0.34 
##   34   35   36   37   38   39   40   41   42   43   44   45   46   47   48 
## 0.46 0.49 0.39 0.37 0.53 0.52 0.55 0.53 0.59 0.56 0.73 0.98 0.87 0.82 1.12 
##   49   50   51   52   53   54   55   56   57   58   59   60   61   62   63 
## 1.18 1.18 1.29 1.65 1.65 1.75 2.25 2.19 2.60 2.83 2.94 2.93 2.90 3.59 3.59 
##   64   65   66   67   68   69   70   71   72   73   74   75   76   77   78 
## 3.89 3.67 3.61 3.39 3.38 2.61 2.46 2.33 2.30 2.07 2.01 1.83 1.30 1.01 1.04 
##   79   80   81   82   83   84   85   86   87   88   89   90   91   92   93 
## 0.62 0.64 0.42 0.40 0.28 0.28 0.19 0.22 0.16 0.10 0.05 0.07 0.02 0.02 0.01 
##   94   95   96   97 
## 0.01 0.01 0.01 0.01
#Frequency table and proportion table for Short Passing
table(ShortPassing)
## ShortPassing
##   7   8  11  12  13  14  15  16  17  18  19  20  21  22  23  24  25  26 
##   2   1   7  17  12  14  43  34  47  52  57  64  60  79 121 130 116 102 
##  27  28  29  30  31  32  33  34  35  36  37  38  39  40  41  42  43  44 
## 119 121 133  95  85  96  85  72  81  73  60  81  67  93  89  95  89 108 
##  45  46  47  48  49  50  51  52  53  54  55  56  57  58  59  60  61  62 
## 130 117 123 180 167 154 194 272 280 335 377 389 420 530 502 578 502 699 
##  63  64  65  66  67  68  69  70  71  72  73  74  75  76  77  78  79  80 
## 664 798 730 658 634 665 490 526 405 434 356 347 278 235 183 177 122 102 
##  81  82  83  84  85  86  87  88  89  90  91  92  93 
##  62  64  43  46  26  18   5   6   8   7   1   2   2
propShortPassing <- round(prop.table(table(ShortPassing))*100,2)
propShortPassing
## ShortPassing
##    7    8   11   12   13   14   15   16   17   18   19   20   21   22   23 
## 0.01 0.01 0.04 0.10 0.07 0.08 0.26 0.20 0.28 0.31 0.34 0.38 0.36 0.47 0.73 
##   24   25   26   27   28   29   30   31   32   33   34   35   36   37   38 
## 0.78 0.70 0.61 0.72 0.73 0.80 0.57 0.51 0.58 0.51 0.43 0.49 0.44 0.36 0.49 
##   39   40   41   42   43   44   45   46   47   48   49   50   51   52   53 
## 0.40 0.56 0.53 0.57 0.53 0.65 0.78 0.70 0.74 1.08 1.00 0.93 1.17 1.63 1.68 
##   54   55   56   57   58   59   60   61   62   63   64   65   66   67   68 
## 2.01 2.27 2.34 2.52 3.18 3.02 3.47 3.02 4.20 3.99 4.79 4.39 3.95 3.81 4.00 
##   69   70   71   72   73   74   75   76   77   78   79   80   81   82   83 
## 2.94 3.16 2.43 2.61 2.14 2.08 1.67 1.41 1.10 1.06 0.73 0.61 0.37 0.38 0.26 
##   84   85   86   87   88   89   90   91   92   93 
## 0.28 0.16 0.11 0.03 0.04 0.05 0.04 0.01 0.01 0.01
#Frequency table and proportion table for Positions
Position.modified <- factor(Position)
levels(Position)
## [1] "defence"     "Forward"     "goal keeper" "Midfield"
propPosition <- round(prop.table(table(Position))*100,2)
propPosition
## Position
##     defence     Forward goal keeper    Midfield 
##       31.71       23.81       11.42       33.06
  #Frequency table and proportion table for Value of Player
table(Value)
## Value
##     10000     20000     30000     40000     50000     60000     70000 
##        15        21        23        64       125       148       139 
##     80000     90000    100000    110000    120000    130000    140000 
##       113       136       169       172       186       194       194 
##    150000    160000    170000    180000    190000    200000    210000 
##       168       196       164       193       140       166       135 
##    220000    230000    240000    250000    260000    270000    280000 
##       156       132       134       145       148       153       117 
##    290000    300000    325000    350000    375000    400000    425000 
##       125       223       323       305       354       304       330 
##    450000    475000    500000    525000    550000    575000    600000 
##       311       279       279       309       262       243       280 
##    625000    650000    675000    700000    725000    750000    775000 
##       219       207       255       191       221       203       165 
##    800000    825000    850000    875000    900000    925000    950000 
##       147       182       163       157       173       123       149 
##    975000   1000000   1100000   1200000   1300000   1400000   1500000 
##       118       284       382       289       222       201       114 
##   1600000   1700000   1800000   1900000   2000000   2100000   2200000 
##       138       107       175       101        89        95        97 
##   2300000   2400000   2500000   2600000   2700000   2800000   2900000 
##       105       137       107        65        88        84        56 
##   3000000   3100000   3200000   3300000   3400000   3500000   3600000 
##        84        75       118        53        56        44        55 
##   3700000   3800000   3900000   4000000   4100000   4200000   4300000 
##        47        60        73        60        41        62        47 
##   4400000   4500000   4600000   4700000   4800000   4900000   5000000 
##        56        45        25        36        38        41       112 
##   5500000   6000000   6500000   7000000   7500000   8000000   8500000 
##       171       147       152       152       114       119       110 
##   9000000   9500000  10000000  10500000  11000000  11500000  12000000 
##       102        65        77        64        36        40        58 
##  12500000  13000000  13500000  14000000  14500000  15000000  15500000 
##        44        37        20        36        24        21        29 
##  16000000  16500000  17000000  17500000  18000000  18500000  19000000 
##        20        12        14        21        17        13         9 
##  19500000  20000000  20500000  21000000  21500000  22000000  22500000 
##        11        16         5        18        10         9        10 
##  23000000  23500000  24000000  24500000  25000000  25500000  26000000 
##         3         4        10         8         7         6        12 
##  26500000  27000000  27500000  28000000  28500000  29000000  29500000 
##         8         7         4         1         7         5         7 
##  30000000  30500000  31000000  31500000  32000000  32500000  33000000 
##         8         8         2         5         4         3         4 
##  34000000  34500000  35000000  35500000  36000000  36500000  37000000 
##        10         2         1         3         1         3         3 
##  37500000  38000000  38500000  39000000  40000000  40500000  41000000 
##         3         4         1         3         1         3         1 
##  41500000  42000000  42500000  43000000  43500000  44000000  44500000 
##         1         1         2         1         2         2         1 
##  45000000  45500000  46000000  46500000  50000000  50500000  51000000 
##         4         1         1         4         2         2         2 
##  51500000  52000000  53000000  53500000  54000000  55000000  56500000 
##         1         2         1         1         1         1         1 
##  57000000  58000000  59000000  59500000  60000000  61000000  62000000 
##         1         1         1         2         3         1         1 
##  62500000  63000000  64000000  64500000  67000000  68000000  69500000 
##         1         1         1         2         1         1         2 
##  72000000  73500000  76500000  77000000  78000000  80000000  81000000 
##         1         2         1         2         1         1         1 
##  83500000  89000000  93000000 102000000 110500000 118500000 
##         1         1         1         1         1         1
propValue <- round(prop.table(table(Value))*100,2)
propValue
## Value
##     10000     20000     30000     40000     50000     60000     70000 
##      0.09      0.13      0.14      0.38      0.75      0.89      0.84 
##     80000     90000    100000    110000    120000    130000    140000 
##      0.68      0.82      1.02      1.03      1.12      1.17      1.17 
##    150000    160000    170000    180000    190000    200000    210000 
##      1.01      1.18      0.99      1.16      0.84      1.00      0.81 
##    220000    230000    240000    250000    260000    270000    280000 
##      0.94      0.79      0.81      0.87      0.89      0.92      0.70 
##    290000    300000    325000    350000    375000    400000    425000 
##      0.75      1.34      1.94      1.83      2.13      1.83      1.98 
##    450000    475000    500000    525000    550000    575000    600000 
##      1.87      1.68      1.68      1.86      1.57      1.46      1.68 
##    625000    650000    675000    700000    725000    750000    775000 
##      1.32      1.24      1.53      1.15      1.33      1.22      0.99 
##    800000    825000    850000    875000    900000    925000    950000 
##      0.88      1.09      0.98      0.94      1.04      0.74      0.90 
##    975000   1000000   1100000   1200000   1300000   1400000   1500000 
##      0.71      1.71      2.30      1.74      1.33      1.21      0.68 
##   1600000   1700000   1800000   1900000   2000000   2100000   2200000 
##      0.83      0.64      1.05      0.61      0.53      0.57      0.58 
##   2300000   2400000   2500000   2600000   2700000   2800000   2900000 
##      0.63      0.82      0.64      0.39      0.53      0.50      0.34 
##   3000000   3100000   3200000   3300000   3400000   3500000   3600000 
##      0.50      0.45      0.71      0.32      0.34      0.26      0.33 
##   3700000   3800000   3900000   4000000   4100000   4200000   4300000 
##      0.28      0.36      0.44      0.36      0.25      0.37      0.28 
##   4400000   4500000   4600000   4700000   4800000   4900000   5000000 
##      0.34      0.27      0.15      0.22      0.23      0.25      0.67 
##   5500000   6000000   6500000   7000000   7500000   8000000   8500000 
##      1.03      0.88      0.91      0.91      0.68      0.72      0.66 
##   9000000   9500000  10000000  10500000  11000000  11500000  12000000 
##      0.61      0.39      0.46      0.38      0.22      0.24      0.35 
##  12500000  13000000  13500000  14000000  14500000  15000000  15500000 
##      0.26      0.22      0.12      0.22      0.14      0.13      0.17 
##  16000000  16500000  17000000  17500000  18000000  18500000  19000000 
##      0.12      0.07      0.08      0.13      0.10      0.08      0.05 
##  19500000  20000000  20500000  21000000  21500000  22000000  22500000 
##      0.07      0.10      0.03      0.11      0.06      0.05      0.06 
##  23000000  23500000  24000000  24500000  25000000  25500000  26000000 
##      0.02      0.02      0.06      0.05      0.04      0.04      0.07 
##  26500000  27000000  27500000  28000000  28500000  29000000  29500000 
##      0.05      0.04      0.02      0.01      0.04      0.03      0.04 
##  30000000  30500000  31000000  31500000  32000000  32500000  33000000 
##      0.05      0.05      0.01      0.03      0.02      0.02      0.02 
##  34000000  34500000  35000000  35500000  36000000  36500000  37000000 
##      0.06      0.01      0.01      0.02      0.01      0.02      0.02 
##  37500000  38000000  38500000  39000000  40000000  40500000  41000000 
##      0.02      0.02      0.01      0.02      0.01      0.02      0.01 
##  41500000  42000000  42500000  43000000  43500000  44000000  44500000 
##      0.01      0.01      0.01      0.01      0.01      0.01      0.01 
##  45000000  45500000  46000000  46500000  50000000  50500000  51000000 
##      0.02      0.01      0.01      0.02      0.01      0.01      0.01 
##  51500000  52000000  53000000  53500000  54000000  55000000  56500000 
##      0.01      0.01      0.01      0.01      0.01      0.01      0.01 
##  57000000  58000000  59000000  59500000  60000000  61000000  62000000 
##      0.01      0.01      0.01      0.01      0.02      0.01      0.01 
##  62500000  63000000  64000000  64500000  67000000  68000000  69500000 
##      0.01      0.01      0.01      0.01      0.01      0.01      0.01 
##  72000000  73500000  76500000  77000000  78000000  80000000  81000000 
##      0.01      0.01      0.01      0.01      0.01      0.01      0.01 
##  83500000  89000000  93000000 102000000 110500000 118500000 
##      0.01      0.01      0.01      0.01      0.01      0.01

1.2 Bar Charts

#Bar Chart for Age
plot(Age,xlab = "Age", ylab = "Count", main="Bar Chart for Age")

#Bar Chart for Position
plot(football.df$Position, xlab= "Position", ylab= "Count", main = "Bar Chart for Position")

1.3 Histograms

#Histogram for Wage
hist(Wage, xlab = "Wage", ylab = "Frequency", main = "Histogram of wage")

#Histogram for Overall Rating
hist(Overall, xlab="Overall Rating", ylab = "Frequency", main = "Histogram of Overall Rating")

#Histogram for Ball Control
hist(BallControl, xlab = "Ball Control", ylab = "Frequency", main = "Histogram of Ball Control")

#Histogram for Stamina
hist(Stamina, xlab = "Stamina", ylab="Frequency", main = "Histogram of Stamina")

#Histogram for Agility
hist(Agility, xlab = "Agility", ylab="Frequency", main = "Histogram of Agility")

#Histogram for Value
hist(Value,main = "Histogram of value",
xlab = "Value", ylab = "Count")

#Histogram for Short Passing
hist(football.df$ShortPassing,main = "Histogram of short passing",
xlab = "Shortpassing",ylab = "Count")

#Histogram for Dribbling
hist(football.df$Dribbling,main = "Histogram of Dribbling",
xlab = "Dribbling",ylab = "Count")

hist(football.df$StandingTackle,main = "Histogram of StandingTackle",
xlab = "StandingTackle",ylab = "Count")

1.4 Box Plots

#Box Plot for Overall Rating
boxplot(Overall,width = 0.5,
horizontal = TRUE,main = "Boxplot for Overall Rating",
xlab = "Overall rating",col = c("lightblue"))

#Box Plot for Wage
boxplot(Wage,width = 0.5,
horizontal = TRUE,main = "Boxplot for Wage",
xlab = "Wage",col = c("lightblue"))

#Box Plot for Ball Control
boxplot(BallControl,width = 0.5,
horizontal = TRUE,main = "Boxplot for Ball Control",
xlab = "Ball Control",col = c("lightblue"))

#Box Plot for Stamina
boxplot(Stamina,width = 0.5,
horizontal = TRUE,main = "Boxplot for Stamina",
xlab = "Stamina",col = c("lightblue"))

#Box Plot for Agility
boxplot(Agility, width = 0.5,
horizontal = TRUE, main = "Boxplot for Agility",
xlab = "Agility", col = c("lightblue"))

#Box Plot for Value
boxplot(football.df$Value,width = 0.5,
horizontal = TRUE,main = "Boxplot for value",
xlab = "Value",col = c("lightblue"))

#Box Plot for Short Passing
boxplot(football.df$ShortPassing,width = 0.5,
horizontal = TRUE,main = "Boxplot for short passing",
xlab = "short passing",col = c("lightblue"))

#Box Plot for Dribbling
boxplot(football.df$Dribbling,width = 0.5,
horizontal = TRUE,main = "Boxplot for Dribbling",
xlab = "Dribbling",col = c("lightblue"))

#Box Plot for Standing Tackle
boxplot(football.df$StandingTackle,width = 0.5,
horizontal = TRUE,main = "Boxplot for Standing Tackle",
xlab = "Standing tacle",col = c("lightblue"))

1.5 Frequency Polygons

library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.1
## 
## Attaching package: 'ggplot2'
## The following object is masked from 'football.df':
## 
##     Position
#Frequency Polygon for Wage
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = Wage))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Frequency Polygon for Ball Control
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = BallControl))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Frequency Polygon for Overall Rating
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = Overall))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Frequency Polygon for Agility
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = Agility))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Frequency Polygon for Stamina
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = Stamina))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Frequency Polygon for Value
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = Value))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Frequency Polygon for Short Passing
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = ShortPassing))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Frequency Polygon for Dribbling
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = Dribbling))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Frequency Polygon for Standing Tackle
ggplot(data = football.df) + geom_freqpoly(mapping = aes (x = StandingTackle))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

1.6 Pie Charts

#Pie Chart for Age
pie(table(Age), main="Pie Chart of Age")

#Pie Chart for Position
tab <- table(Position.modified)
# saving table as dataframe
tab.df <- as.data.frame(tab)
# storing counts into a variable x
x <- tab.df$Freq
# defining the lables
labels <- c("Defense", "Forward", "Goal Keeper", "Midfield")
# Plotting Pie chart.
pie(x,labels, main = "Pie Chart of Position Modified ")

1.7 Descriptive Statistics Table

library(psych)
## Warning: package 'psych' was built under R version 3.6.1
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
#Describing Variables
Position <- factor(Position.modified)
describe(football.df[,c(4:15)])
##                vars     n       mean          sd  median    trimmed
## Age               1 16643      25.23        4.72      25      25.03
## Overall           2 16643      66.16        7.01      66      66.14
## Value             3 16643 2442666.59  5720628.56  675000 1215337.59
## Wage              4 16643    9618.04    22263.52    3000    5007.66
## Position*         5 16643       2.46        1.24       2       2.45
## ShortPassing      6 16643      58.54       14.81      62      60.38
## Dribbling         7 16643      55.10       19.01      61      57.52
## BallControl       8 16643      58.14       16.79      63      60.57
## Agility           9 16643      63.38       14.81      66      64.40
## Stamina          10 16643      63.16       16.06      66      64.82
## StandingTackle   11 16643      47.78       21.68      55      48.62
## Release.Clause   12 16643 4585060.99 11118717.76 1100000 2177437.93
##                       mad   min       max     range  skew kurtosis
## Age                  5.93    16        45        29  0.34    -0.56
## Overall              7.41    46        94        48  0.08     0.08
## Value           689409.00 10000 118500000 118490000  7.00    74.82
## Wage              2965.20  1000    565000    564000  7.97   101.38
## Position*            1.48     1         4         3  0.12    -1.61
## ShortPassing        10.38     7        93        86 -1.08     0.73
## Dribbling           13.34     4        97        93 -1.07     0.28
## BallControl         10.38     5        96        91 -1.25     0.97
## Agility             13.34    14        96        82 -0.59    -0.08
## Stamina             13.34    12        96        84 -0.91     0.43
## StandingTackle      23.72     2        93        91 -0.35    -1.30
## Release.Clause 1149015.00 13000 228100000 228087000  7.11    77.11
##                      se
## Age                0.04
## Overall            0.05
## Value          44343.29
## Wage             172.58
## Position*          0.01
## ShortPassing       0.11
## Dribbling          0.15
## BallControl        0.13
## Agility            0.11
## Stamina            0.12
## StandingTackle     0.17
## Release.Clause 86186.43
#Applying Mean to all Varibles
sapply(football.df[c(4:7,9:14)], mean)
##            Age        Overall          Value           Wage   ShortPassing 
##   2.522622e+01   6.616277e+01   2.442667e+06   9.618038e+03   5.854395e+01 
##      Dribbling    BallControl        Agility        Stamina StandingTackle 
##   5.510473e+01   5.813627e+01   6.338070e+01   6.316001e+01   4.777630e+01
#Aggregating using Position
aggregate(football.df[,c(4:15)], by = list(Position),mean)
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
##       Group.1      Age  Overall   Value      Wage Position ShortPassing
## 1     defence 25.58621 66.40337 2018687  9214.665       NA     59.16105
## 2     Forward 24.78975 66.41949 3056410 11207.219       NA     61.43362
## 3 goal keeper 26.09632 64.46316 1597268  6726.842       NA     27.22526
## 4    Midfield 24.89478 66.33400 2699321  9858.986       NA     66.68490
##   Dribbling BallControl  Agility  Stamina StandingTackle Release.Clause
## 1  52.10269    57.53278 60.02539 67.98352       66.77283        3789796
## 2  65.62670    65.94043 69.86320 64.23725       30.89197        5701532
## 3  13.81474    19.91263 40.22211 30.36053       14.10947        2997590
## 4  64.66455    66.29366 69.92749 69.08268       53.33673        5092084
df = data.table::fread("data_clean.csv")
colnames(df)
##  [1] "V1"             "ID"             "Name"           "Age"           
##  [5] "Overall"        "Value"          "Wage"           "Position"      
##  [9] "ShortPassing"   "Dribbling"      "BallControl"    "Agility"       
## [13] "Stamina"        "StandingTackle" "Release Clause"
attach(df)
## The following object is masked _by_ .GlobalEnv:
## 
##     Position
## The following object is masked from package:ggplot2:
## 
##     Position
## The following objects are masked from football.df:
## 
##     Age, Agility, BallControl, Dribbling, ID, Name, Overall,
##     Position, ShortPassing, Stamina, StandingTackle, Value, Wage

2. Bi-Variate Analysis

2.1 Scatter plot

Scatter plot of Age and Release clause

# plotting scatter plot
plot(Age,`Release Clause`,xlab="Age", ylab="Release clause")

Scatter plot of Wage and Release clause

# plotting scatter plot
plot(Wage,`Release Clause`,xlab="Wage", ylab="Release clause")

Scatter plot of Overall Rating and Release clause

# plotting scatter plot
plot(Overall,`Release Clause`,xlab="Overall Rating", ylab="Release clause")

Scatter plot of Ball Control and Release clause

# plotting scatter plot
plot(BallControl,`Release Clause`,xlab="ball control", ylab="Release clause")

Scatter plot of Agility and Release clause

# plotting scatter plot
plot(Agility,`Release Clause`,xlab="Agility", ylab="Release clause")

Scatter plot of Stamina and Release clause

# plotting scatter plot
plot(Stamina,`Release Clause`,xlab="stamina", ylab="Release clause")

Scatter plot of Standing Tackle and Release clause

# plotting scatter plot
plot(StandingTackle,`Release Clause`,xlab="StandingTackle", ylab="Release clause")

Scatter plot of Value and Release clause

# plotting scatter plot
plot(Value,`Release Clause`,xlab=" Market value", ylab="Release clause")

Scatter plot of Shortpassing and Release clause

# plotting scatter plot
plot(ShortPassing,`Release Clause`,xlab="Short Passing", ylab="Release clause")

Scatter plot of Dribbling and Release clause

# plotting scatter plot
plot(Dribbling,`Release Clause`,xlab="Dribling", ylab="Release clause")

2.2 Box Plot

#Box plot of Postion of player and Release clause

boxplot(`Release Clause` ~ `Position`,
                main = "Boxplot for Variable Release clause grouped by position",
                 col=(c("white","red","gray","lightblue")))

2.3 summary statistics of various variables

psych::describe(df)
## Warning in psych::describe(df): NAs introduced by coercion

## Warning in psych::describe(df): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf
##                vars     n       mean          sd  median    trimmed
## V1                1 16643    9168.65     5297.15    9207    9182.53
## ID                2 16643  213845.01    30546.29  221493  218014.15
## Name*             3 16643        NaN          NA      NA        NaN
## Age               4 16643      25.23        4.72      25      25.03
## Overall           5 16643      66.16        7.01      66      66.14
## Value             6 16643 2442666.59  5720628.56  675000 1215337.59
## Wage              7 16643    9618.04    22263.52    3000    5007.66
## Position*         8 16643        NaN          NA      NA        NaN
## ShortPassing      9 16643      58.54       14.81      62      60.38
## Dribbling        10 16643      55.10       19.01      61      57.52
## BallControl      11 16643      58.14       16.79      63      60.57
## Agility          12 16643      63.38       14.81      66      64.40
## Stamina          13 16643      63.16       16.06      66      64.82
## StandingTackle   14 16643      47.78       21.68      55      48.62
## Release Clause   15 16643 4585060.99 11118717.76 1100000 2177437.93
##                       mad   min       max     range  skew kurtosis
## V1                6839.23     0     18206     18206 -0.02    -1.21
## ID               26408.07    16    246620    246604 -2.23     9.21
## Name*                  NA   Inf      -Inf      -Inf    NA       NA
## Age                  5.93    16        45        29  0.34    -0.56
## Overall              7.41    46        94        48  0.08     0.08
## Value           689409.00 10000 118500000 118490000  7.00    74.82
## Wage              2965.20  1000    565000    564000  7.97   101.38
## Position*              NA   Inf      -Inf      -Inf    NA       NA
## ShortPassing        10.38     7        93        86 -1.08     0.73
## Dribbling           13.34     4        97        93 -1.07     0.28
## BallControl         10.38     5        96        91 -1.25     0.97
## Agility             13.34    14        96        82 -0.59    -0.08
## Stamina             13.34    12        96        84 -0.91     0.43
## StandingTackle      23.72     2        93        91 -0.35    -1.30
## Release Clause 1149015.00 13000 228100000 228087000  7.11    77.11
##                      se
## V1                41.06
## ID               236.78
## Name*                NA
## Age                0.04
## Overall            0.05
## Value          44343.29
## Wage             172.58
## Position*            NA
## ShortPassing       0.11
## Dribbling          0.15
## BallControl        0.13
## Agility            0.11
## Stamina            0.12
## StandingTackle     0.17
## Release Clause 86186.43

3. Multi-Variate Analysis

3.1 Correlation matrix

# taking a subset of continuous variables
Subset.df <- df[,c('Wage','Age','Overall','BallControl','Stamina','Agility','StandingTackle','Value','ShortPassing','Dribbling')]

# correlation matrix on new dataframe airlineSubset.df
corMat <- cor(Subset.df, use = "complete")
# round off upto 2 decimal places
round(corMat, 3)
##                 Wage    Age Overall BallControl Stamina Agility
## Wage           1.000  0.149   0.574       0.276   0.181   0.155
## Age            0.149  1.000   0.465       0.096   0.104  -0.013
## Overall        0.574  0.465   1.000       0.463   0.371   0.268
## BallControl    0.276  0.096   0.463       1.000   0.734   0.705
## Stamina        0.181  0.104   0.371       0.734   1.000   0.573
## Agility        0.155 -0.013   0.268       0.705   0.573   1.000
## StandingTackle 0.134  0.122   0.262       0.432   0.578   0.139
## Value          0.862  0.076   0.629       0.310   0.214   0.196
## ShortPassing   0.297  0.142   0.505       0.913   0.721   0.614
## Dribbling      0.234  0.019   0.374       0.939   0.692   0.765
##                StandingTackle Value ShortPassing Dribbling
## Wage                    0.134 0.862        0.297     0.234
## Age                     0.122 0.076        0.142     0.019
## Overall                 0.262 0.629        0.505     0.374
## BallControl             0.432 0.310        0.913     0.939
## Stamina                 0.578 0.214        0.721     0.692
## Agility                 0.139 0.196        0.614     0.765
## StandingTackle          1.000 0.113        0.551     0.315
## Value                   0.113 1.000        0.329     0.272
## ShortPassing            0.551 0.329        1.000     0.845
## Dribbling               0.315 0.272        0.845     1.000

Correlation matrix with significance value

# correlation significance value
library(psych)
corr.test(Subset.df, use = "complete")
## Call:corr.test(x = Subset.df, use = "complete")
## Correlation matrix 
##                Wage   Age Overall BallControl Stamina Agility
## Wage           1.00  0.15    0.57        0.28    0.18    0.15
## Age            0.15  1.00    0.46        0.10    0.10   -0.01
## Overall        0.57  0.46    1.00        0.46    0.37    0.27
## BallControl    0.28  0.10    0.46        1.00    0.73    0.71
## Stamina        0.18  0.10    0.37        0.73    1.00    0.57
## Agility        0.15 -0.01    0.27        0.71    0.57    1.00
## StandingTackle 0.13  0.12    0.26        0.43    0.58    0.14
## Value          0.86  0.08    0.63        0.31    0.21    0.20
## ShortPassing   0.30  0.14    0.51        0.91    0.72    0.61
## Dribbling      0.23  0.02    0.37        0.94    0.69    0.77
##                StandingTackle Value ShortPassing Dribbling
## Wage                     0.13  0.86         0.30      0.23
## Age                      0.12  0.08         0.14      0.02
## Overall                  0.26  0.63         0.51      0.37
## BallControl              0.43  0.31         0.91      0.94
## Stamina                  0.58  0.21         0.72      0.69
## Agility                  0.14  0.20         0.61      0.77
## StandingTackle           1.00  0.11         0.55      0.31
## Value                    0.11  1.00         0.33      0.27
## ShortPassing             0.55  0.33         1.00      0.85
## Dribbling                0.31  0.27         0.85      1.00
## Sample Size 
## [1] 16643
## Probability values (Entries above the diagonal are adjusted for multiple tests.) 
##                Wage  Age Overall BallControl Stamina Agility
## Wage              0 0.00       0           0       0     0.0
## Age               0 0.00       0           0       0     0.1
## Overall           0 0.00       0           0       0     0.0
## BallControl       0 0.00       0           0       0     0.0
## Stamina           0 0.00       0           0       0     0.0
## Agility           0 0.10       0           0       0     0.0
## StandingTackle    0 0.00       0           0       0     0.0
## Value             0 0.00       0           0       0     0.0
## ShortPassing      0 0.00       0           0       0     0.0
## Dribbling         0 0.01       0           0       0     0.0
##                StandingTackle Value ShortPassing Dribbling
## Wage                        0     0            0      0.00
## Age                         0     0            0      0.03
## Overall                     0     0            0      0.00
## BallControl                 0     0            0      0.00
## Stamina                     0     0            0      0.00
## Agility                     0     0            0      0.00
## StandingTackle              0     0            0      0.00
## Value                       0     0            0      0.00
## ShortPassing                0     0            0      0.00
## Dribbling                   0     0            0      0.00
## 
##  To see confidence intervals of the correlations, print with the short=FALSE option

3.2 Visual Correlation

Correlogram with circle

# correlogram
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.6.1
## corrplot 0.84 loaded
corrplot(cor(Subset.df), method = "circle")

Correlogram with correlation coefficient

# correlogram with correlation coefficient
corrplot(cor(Subset.df), method = "number")

3.3 summary statistics of various variables

psych::describe(df)
## Warning in psych::describe(df): NAs introduced by coercion

## Warning in psych::describe(df): NAs introduced by coercion
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning
## Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf

## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning
## -Inf
##                vars     n       mean          sd  median    trimmed
## V1                1 16643    9168.65     5297.15    9207    9182.53
## ID                2 16643  213845.01    30546.29  221493  218014.15
## Name*             3 16643        NaN          NA      NA        NaN
## Age               4 16643      25.23        4.72      25      25.03
## Overall           5 16643      66.16        7.01      66      66.14
## Value             6 16643 2442666.59  5720628.56  675000 1215337.59
## Wage              7 16643    9618.04    22263.52    3000    5007.66
## Position*         8 16643        NaN          NA      NA        NaN
## ShortPassing      9 16643      58.54       14.81      62      60.38
## Dribbling        10 16643      55.10       19.01      61      57.52
## BallControl      11 16643      58.14       16.79      63      60.57
## Agility          12 16643      63.38       14.81      66      64.40
## Stamina          13 16643      63.16       16.06      66      64.82
## StandingTackle   14 16643      47.78       21.68      55      48.62
## Release Clause   15 16643 4585060.99 11118717.76 1100000 2177437.93
##                       mad   min       max     range  skew kurtosis
## V1                6839.23     0     18206     18206 -0.02    -1.21
## ID               26408.07    16    246620    246604 -2.23     9.21
## Name*                  NA   Inf      -Inf      -Inf    NA       NA
## Age                  5.93    16        45        29  0.34    -0.56
## Overall              7.41    46        94        48  0.08     0.08
## Value           689409.00 10000 118500000 118490000  7.00    74.82
## Wage              2965.20  1000    565000    564000  7.97   101.38
## Position*              NA   Inf      -Inf      -Inf    NA       NA
## ShortPassing        10.38     7        93        86 -1.08     0.73
## Dribbling           13.34     4        97        93 -1.07     0.28
## BallControl         10.38     5        96        91 -1.25     0.97
## Agility             13.34    14        96        82 -0.59    -0.08
## Stamina             13.34    12        96        84 -0.91     0.43
## StandingTackle      23.72     2        93        91 -0.35    -1.30
## Release Clause 1149015.00 13000 228100000 228087000  7.11    77.11
##                      se
## V1                41.06
## ID               236.78
## Name*                NA
## Age                0.04
## Overall            0.05
## Value          44343.29
## Wage             172.58
## Position*            NA
## ShortPassing       0.11
## Dribbling          0.15
## BallControl        0.13
## Agility            0.11
## Stamina            0.12
## StandingTackle     0.17
## Release Clause 86186.43

4. Multiple linear regression

model2 <- lm(`Release Clause` ~ Age
             + Overall
             + Value
             + Wage
             + Position
             + ShortPassing
             + Dribbling
             + BallControl
             + Agility
             + Stamina
             + StandingTackle,
            data = df)
summary(model2)
## 
## Call:
## lm(formula = `Release Clause` ~ Age + Overall + Value + Wage + 
##     Position + ShortPassing + Dribbling + BallControl + Agility + 
##     Stamina + StandingTackle, data = df)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -21870952   -185410    -13073    219380  16897522 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          1.440e+06  1.165e+05  12.360  < 2e-16 ***
## Age                 -3.136e+04  2.344e+03 -13.381  < 2e-16 ***
## Overall             -8.248e+03  2.644e+03  -3.120  0.00181 ** 
## Value                1.941e+00  3.469e-03 559.528  < 2e-16 ***
## Wage                 4.806e-01  8.133e-01   0.591  0.55460    
## PositionForward     -9.013e+04  4.038e+04  -2.232  0.02565 *  
## Positiongoal keeper -3.128e+04  7.953e+04  -0.393  0.69409    
## PositionMidfield    -2.061e+04  2.962e+04  -0.696  0.48660    
## ShortPassing         2.719e+02  1.769e+03   0.154  0.87787    
## Dribbling            1.174e+03  1.612e+03   0.728  0.46651    
## BallControl          1.014e+03  2.343e+03   0.433  0.66512    
## Agility             -1.095e+03  9.934e+02  -1.103  0.27017    
## Stamina             -6.389e+03  9.964e+02  -6.412 1.48e-10 ***
## StandingTackle       2.127e+03  8.944e+02   2.378  0.01740 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1164000 on 16629 degrees of freedom
## Multiple R-squared:  0.989,  Adjusted R-squared:  0.989 
## F-statistic: 1.155e+05 on 13 and 16629 DF,  p-value: < 2.2e-16