library("ggplot2")
df <-
na.omit(data.frame(read.csv("Base de datos Jugadores.csv"))[, c(2, 3, 4, 5, 6, 7, 14, 22, 23, 24, 25, 26)])
summary(df)
## overall value_eur wage_eur age
## Min. :42.00 Min. : 9000 Min. : 500 Min. :16.00
## 1st Qu.:64.00 1st Qu.: 600000 1st Qu.: 4000 1st Qu.:21.00
## Median :69.00 Median : 1400000 Median : 10000 Median :25.00
## Mean :69.09 Mean : 4817402 Mean : 21894 Mean :24.97
## 3rd Qu.:74.00 3rd Qu.: 4300000 3rd Qu.: 27000 3rd Qu.:28.00
## Max. :94.00 Max. :185000000 Max. :575000 Max. :45.00
## height_cm weight_kg club_jersey_number shooting
## Min. :155 Min. : 51.00 Min. : 1.00 Min. :14.0
## 1st Qu.:177 1st Qu.: 71.00 1st Qu.: 9.00 1st Qu.:41.0
## Median :182 Median : 75.00 Median :18.00 Median :54.0
## Mean :182 Mean : 75.82 Mean :20.63 Mean :51.8
## 3rd Qu.:187 3rd Qu.: 80.00 3rd Qu.:28.00 3rd Qu.:63.0
## Max. :206 Max. :107.00 Max. :99.00 Max. :93.0
## passing dribbling defending physic
## Min. :24.00 Min. :24.00 Min. :15.00 Min. :27.00
## 1st Qu.:50.00 1st Qu.:56.00 1st Qu.:35.00 1st Qu.:58.00
## Median :57.00 Median :63.00 Median :55.00 Median :66.00
## Mean :56.44 Mean :61.74 Mean :50.73 Mean :64.61
## 3rd Qu.:64.00 3rd Qu.:69.00 3rd Qu.:64.00 3rd Qu.:72.00
## Max. :93.00 Max. :96.00 Max. :90.00 Max. :92.00
mod1 <- lm(value_eur ~ age + weight_kg + height_cm, data = df)
summary(mod1)
##
## Call:
## lm(formula = value_eur ~ age + weight_kg + height_cm, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9328723 -3916711 -2919226 -535315 180309486
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5527912 1442197 3.833 0.000127 ***
## age 205494 10552 19.475 < 2e-16 ***
## weight_kg 70790 10384 6.817 9.42e-12 ***
## height_cm -61603 10502 -5.866 4.49e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9873000 on 46810 degrees of freedom
## Multiple R-squared: 0.01146, Adjusted R-squared: 0.0114
## F-statistic: 180.9 on 3 and 46810 DF, p-value: < 2.2e-16
anova(mod1)
## Analysis of Variance Table
##
## Response: value_eur
## Df Sum Sq Mean Sq F value Pr(>F)
## age 1 4.8274e+16 4.8274e+16 495.201 < 2.2e-16 ***
## weight_kg 1 1.2726e+15 1.2726e+15 13.055 0.0003028 ***
## height_cm 1 3.3545e+15 3.3545e+15 34.411 4.493e-09 ***
## Residuals 46810 4.5632e+18 9.7484e+13
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod2 <- lm(value_eur ~ physic + shooting + passing + dribbling, data = df)
summary(mod2)
##
## Call:
## lm(formula = value_eur ~ physic + shooting + passing + dribbling,
## data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7712520 -4175575 -2916495 -364496 179987555
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -3172529.5 428795.8 -7.399 1.40e-13 ***
## physic 78308.5 4921.8 15.910 < 2e-16 ***
## shooting 12681.0 5068.4 2.502 0.0124 *
## passing 39805.0 7776.7 5.118 3.09e-07 ***
## dribbling 441.7 9242.4 0.048 0.9619
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9883000 on 46809 degrees of freedom
## Multiple R-squared: 0.009631, Adjusted R-squared: 0.009547
## F-statistic: 113.8 on 4 and 46809 DF, p-value: < 2.2e-16
anova(mod2)
## Analysis of Variance Table
##
## Response: value_eur
## Df Sum Sq Mean Sq F value Pr(>F)
## physic 1 3.0312e+16 3.0312e+16 310.3624 < 2.2e-16 ***
## shooting 1 9.2310e+15 9.2310e+15 94.5164 < 2.2e-16 ***
## passing 1 4.9165e+15 4.9165e+15 50.3404 1.311e-12 ***
## dribbling 1 2.2308e+11 2.2308e+11 0.0023 0.9619
## Residuals 46809 4.5716e+18 9.7666e+13
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
mod3 <- lm(value_eur ~ overall + club_jersey_number, data = df)
summary(mod3)
##
## Call:
## lm(formula = value_eur ~ overall + club_jersey_number, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -18268073 -3830389 -1536729 1840690 160624578
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -60118027 372186 -161.53 <2e-16 ***
## overall 922863 5113 180.48 <2e-16 ***
## club_jersey_number 56993 2208 25.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7569000 on 46811 degrees of freedom
## Multiple R-squared: 0.419, Adjusted R-squared: 0.419
## F-statistic: 1.688e+04 on 2 and 46811 DF, p-value: < 2.2e-16
anova(mod3)
## Analysis of Variance Table
##
## Response: value_eur
## Df Sum Sq Mean Sq F value Pr(>F)
## overall 1 1.8960e+18 1.8960e+18 33093.72 < 2.2e-16 ***
## club_jersey_number 1 3.8172e+16 3.8172e+16 666.26 < 2.2e-16 ***
## Residuals 46811 2.6819e+18 5.7292e+13
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1