library("ggplot2")
df <-
  na.omit(data.frame(read.csv("Base de datos Jugadores.csv"))[, c(2, 3, 4, 5, 6, 7, 14, 22, 23, 24, 25, 26)])
summary(df)
##     overall        value_eur            wage_eur           age       
##  Min.   :42.00   Min.   :     9000   Min.   :   500   Min.   :16.00  
##  1st Qu.:64.00   1st Qu.:   600000   1st Qu.:  4000   1st Qu.:21.00  
##  Median :69.00   Median :  1400000   Median : 10000   Median :25.00  
##  Mean   :69.09   Mean   :  4817402   Mean   : 21894   Mean   :24.97  
##  3rd Qu.:74.00   3rd Qu.:  4300000   3rd Qu.: 27000   3rd Qu.:28.00  
##  Max.   :94.00   Max.   :185000000   Max.   :575000   Max.   :45.00  
##    height_cm     weight_kg      club_jersey_number    shooting   
##  Min.   :155   Min.   : 51.00   Min.   : 1.00      Min.   :14.0  
##  1st Qu.:177   1st Qu.: 71.00   1st Qu.: 9.00      1st Qu.:41.0  
##  Median :182   Median : 75.00   Median :18.00      Median :54.0  
##  Mean   :182   Mean   : 75.82   Mean   :20.63      Mean   :51.8  
##  3rd Qu.:187   3rd Qu.: 80.00   3rd Qu.:28.00      3rd Qu.:63.0  
##  Max.   :206   Max.   :107.00   Max.   :99.00      Max.   :93.0  
##     passing        dribbling       defending         physic     
##  Min.   :24.00   Min.   :24.00   Min.   :15.00   Min.   :27.00  
##  1st Qu.:50.00   1st Qu.:56.00   1st Qu.:35.00   1st Qu.:58.00  
##  Median :57.00   Median :63.00   Median :55.00   Median :66.00  
##  Mean   :56.44   Mean   :61.74   Mean   :50.73   Mean   :64.61  
##  3rd Qu.:64.00   3rd Qu.:69.00   3rd Qu.:64.00   3rd Qu.:72.00  
##  Max.   :93.00   Max.   :96.00   Max.   :90.00   Max.   :92.00
mod1 <- lm(value_eur ~ age + weight_kg + height_cm, data = df)
summary(mod1)
## 
## Call:
## lm(formula = value_eur ~ age + weight_kg + height_cm, data = df)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
##  -9328723  -3916711  -2919226   -535315 180309486 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5527912    1442197   3.833 0.000127 ***
## age           205494      10552  19.475  < 2e-16 ***
## weight_kg      70790      10384   6.817 9.42e-12 ***
## height_cm     -61603      10502  -5.866 4.49e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9873000 on 46810 degrees of freedom
## Multiple R-squared:  0.01146,    Adjusted R-squared:  0.0114 
## F-statistic: 180.9 on 3 and 46810 DF,  p-value: < 2.2e-16
anova(mod1)
## Analysis of Variance Table
## 
## Response: value_eur
##              Df     Sum Sq    Mean Sq F value    Pr(>F)    
## age           1 4.8274e+16 4.8274e+16 495.201 < 2.2e-16 ***
## weight_kg     1 1.2726e+15 1.2726e+15  13.055 0.0003028 ***
## height_cm     1 3.3545e+15 3.3545e+15  34.411 4.493e-09 ***
## Residuals 46810 4.5632e+18 9.7484e+13                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod2 <- lm(value_eur ~ physic + shooting + passing + dribbling, data = df)
summary(mod2)
## 
## Call:
## lm(formula = value_eur ~ physic + shooting + passing + dribbling, 
##     data = df)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
##  -7712520  -4175575  -2916495   -364496 179987555 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -3172529.5   428795.8  -7.399 1.40e-13 ***
## physic         78308.5     4921.8  15.910  < 2e-16 ***
## shooting       12681.0     5068.4   2.502   0.0124 *  
## passing        39805.0     7776.7   5.118 3.09e-07 ***
## dribbling        441.7     9242.4   0.048   0.9619    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9883000 on 46809 degrees of freedom
## Multiple R-squared:  0.009631,   Adjusted R-squared:  0.009547 
## F-statistic: 113.8 on 4 and 46809 DF,  p-value: < 2.2e-16
anova(mod2)
## Analysis of Variance Table
## 
## Response: value_eur
##              Df     Sum Sq    Mean Sq  F value    Pr(>F)    
## physic        1 3.0312e+16 3.0312e+16 310.3624 < 2.2e-16 ***
## shooting      1 9.2310e+15 9.2310e+15  94.5164 < 2.2e-16 ***
## passing       1 4.9165e+15 4.9165e+15  50.3404 1.311e-12 ***
## dribbling     1 2.2308e+11 2.2308e+11   0.0023    0.9619    
## Residuals 46809 4.5716e+18 9.7666e+13                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod3 <- lm(value_eur ~ overall + club_jersey_number, data = df)
summary(mod3)
## 
## Call:
## lm(formula = value_eur ~ overall + club_jersey_number, data = df)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -18268073  -3830389  -1536729   1840690 160624578 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        -60118027     372186 -161.53   <2e-16 ***
## overall               922863       5113  180.48   <2e-16 ***
## club_jersey_number     56993       2208   25.81   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7569000 on 46811 degrees of freedom
## Multiple R-squared:  0.419,  Adjusted R-squared:  0.419 
## F-statistic: 1.688e+04 on 2 and 46811 DF,  p-value: < 2.2e-16
anova(mod3)
## Analysis of Variance Table
## 
## Response: value_eur
##                       Df     Sum Sq    Mean Sq  F value    Pr(>F)    
## overall                1 1.8960e+18 1.8960e+18 33093.72 < 2.2e-16 ***
## club_jersey_number     1 3.8172e+16 3.8172e+16   666.26 < 2.2e-16 ***
## Residuals          46811 2.6819e+18 5.7292e+13                       
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1