El conjunto de datos salary.csv
contiene información de
un estudio para determinar el salario de una persona en una empresa en
base a las siguientes caracterÃsticas:
salario <- read.csv("salary.csv")
salario$Education.level <- as.factor(salario$Education.level)
head(salario)
## Gender Age Years.of.service Education.level Salary
## 1 0 27 1.7 0 39343
## 2 1 26 1.1 1 43205
## 3 1 26 1.2 0 47731
## 4 0 27 1.6 1 46525
## 5 0 26 1.5 1 40891
## 6 1 28 2.3 0 56642
##
## Call:
## lm(formula = Salary ~ ., data = salario)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9249.2 -2753.4 805.4 3127.8 9389.8
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5422.2 16899.8 0.321 0.7520
## Gender 4556.8 2358.3 1.932 0.0692 .
## Age 1575.3 747.9 2.106 0.0495 *
## Years.of.service -378.2 2466.7 -0.153 0.8798
## Education.level1 1933.7 2553.7 0.757 0.4587
## Education.level2 2783.1 3161.0 0.880 0.3902
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5309 on 18 degrees of freedom
## Multiple R-squared: 0.5753, Adjusted R-squared: 0.4574
## F-statistic: 4.877 on 5 and 18 DF, p-value: 0.005388
## Age Years.of.service Salary
## Age 1.000 0.888 0.670
## Years.of.service 0.888 1.000 0.584
## Salary 0.670 0.584 1.000
## Gender Age Years.of.service Education.level1
## 1.175690 5.179300 5.305805 1.378581
## Education.level2
## 1.403231
barplot(vif(m1), main = "VIF Values", horiz = TRUE, col = "steelblue")
abline(v = 4, lwd = 3, lty = 2)
##
## Call:
## lm(formula = Salary ~ Gender + Years.of.service + Education.level,
## data = salario)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9245.5 -2849.5 441.6 3166.0 9063.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 40221 3860 10.421 2.7e-09 ***
## Gender 5214 2540 2.052 0.05416 .
## Years.of.service 4287 1180 3.634 0.00177 **
## Education.level1 2301 2769 0.831 0.41630
## Education.level2 1165 3332 0.350 0.73048
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5769 on 19 degrees of freedom
## Multiple R-squared: 0.4707, Adjusted R-squared: 0.3592
## F-statistic: 4.224 on 4 and 19 DF, p-value: 0.01298
## Gender Years.of.service Education.level1 Education.level2
## 1.155135 1.027509 1.372164 1.320328
##
## Call:
## lm(formula = Salary ~ Gender + Age + Education.level, data = salario)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9181.4 -2692.8 714.6 3241.7 9314.1
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7518.8 9673.1 0.777 0.446557
## Gender 4617.0 2264.9 2.038 0.055663 .
## Age 1472.3 320.6 4.593 0.000199 ***
## Education.level1 1970.6 2476.2 0.796 0.435966
## Education.level2 2658.7 2975.7 0.894 0.382767
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5171 on 19 degrees of freedom
## Multiple R-squared: 0.5748, Adjusted R-squared: 0.4853
## F-statistic: 6.421 on 4 and 19 DF, p-value: 0.001915
## Gender Age Education.level1 Education.level2
## 1.143138 1.003011 1.366338 1.310838
salario$age_at_joining <- salario$Age - salario$Years.of.service
m4 <- lm (Salary ~ Gender + age_at_joining +Education.level, data = salario)
summary(m4)
##
## Call:
## lm(formula = Salary ~ Gender + age_at_joining + Education.level,
## data = salario)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9641.1 -3047.6 843.4 2924.0 9785.7
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -2258.8 12001.2 -0.188 0.852702
## Gender 4292.8 2288.6 1.876 0.076147 .
## age_at_joining 1969.7 438.2 4.495 0.000248 ***
## Education.level1 1767.2 2502.7 0.706 0.488691
## Education.level2 3295.7 3016.7 1.093 0.288272
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5229 on 19 degrees of freedom
## Multiple R-squared: 0.5652, Adjusted R-squared: 0.4736
## F-statistic: 6.174 on 4 and 19 DF, p-value: 0.002333
## Gender age_at_joining Education.level1 Education.level2
## 1.141427 1.006781 1.364957 1.317478