## ── Attaching packages ──────────────── tidyverse 1.3.0 ──
## ✔ ggplot2 3.2.1.9000 ✔ purrr 0.3.3
## ✔ tibble 2.1.3 ✔ dplyr 0.8.4
## ✔ tidyr 1.0.2 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ─────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## randomForest 4.6-14
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:dplyr':
##
## combine
## The following object is masked from 'package:ggplot2':
##
## margin
## Loading required package: car
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
## The following object is masked from 'package:purrr':
##
## some
## Loading required package: lmtest
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
## Loading required package: survival
## inlf hours kidslt6 kidsge6 age educ wage repwage hushrs husage huseduc
## 1 1 1610 1 0 32 12 3.3540 2.65 2708 34 12
## 2 1 1656 0 2 30 12 1.3889 2.65 2310 30 9
## 3 1 1980 1 3 35 12 4.5455 4.04 3072 40 12
## 4 1 456 0 3 34 12 1.0965 3.25 1920 53 10
## 5 1 1568 1 2 31 14 4.5918 3.60 2000 32 12
## 6 1 2032 0 0 54 12 4.7421 4.70 1040 57 11
## huswage faminc mtr motheduc fatheduc unem city exper nwifeinc
## 1 4.0288 16310 0.7215 12 7 5.0 0 14 10.910060
## 2 8.4416 21800 0.6615 7 7 11.0 1 5 19.499981
## 3 3.5807 21040 0.6915 12 7 5.0 0 15 12.039910
## 4 3.5417 7300 0.7815 7 7 5.0 0 6 6.799996
## 5 10.0000 27300 0.6215 12 14 9.5 1 7 20.100058
## 6 6.7106 19495 0.6915 14 7 7.5 1 33 9.859054
## lwage expersq
## 1 1.21015370 196
## 2 0.32851210 25
## 3 1.51413774 225
## 4 0.09212332 36
## 5 1.52427220 49
## 6 1.55648005 1089
## [1] 753 22
## [1] 428 22
##
## Call:
## lm(formula = lwage ~ exper + expersq + educ, data = working_w)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.08404 -0.30627 0.04952 0.37498 2.37115
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.5220406 0.1986321 -2.628 0.00890 **
## exper 0.0415665 0.0131752 3.155 0.00172 **
## expersq -0.0008112 0.0003932 -2.063 0.03974 *
## educ 0.1074896 0.0141465 7.598 1.94e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6664 on 424 degrees of freedom
## Multiple R-squared: 0.1568, Adjusted R-squared: 0.1509
## F-statistic: 26.29 on 3 and 424 DF, p-value: 1.302e-15
##
## Call:
## lm(formula = lwage ~ exper + expersq + kidslt6 + kidsge6 + husage +
## huswage + city + educ, data = working_w)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.07431 -0.30500 0.05477 0.37871 2.31157
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3853695 0.3163043 -1.218 0.22378
## exper 0.0398817 0.0133651 2.984 0.00301 **
## expersq -0.0007400 0.0003985 -1.857 0.06402 .
## kidslt6 -0.0564071 0.0890759 -0.633 0.52692
## kidsge6 -0.0143165 0.0276579 -0.518 0.60499
## husage -0.0028828 0.0049338 -0.584 0.55934
## huswage 0.0177470 0.0102733 1.727 0.08482 .
## city 0.0119960 0.0725595 0.165 0.86877
## educ 0.0986810 0.0151589 6.510 2.16e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6669 on 419 degrees of freedom
## Multiple R-squared: 0.1654, Adjusted R-squared: 0.1495
## F-statistic: 10.38 on 8 and 419 DF, p-value: 2.691e-13
##
## Call:
## lm(formula = educ ~ exper + expersq + kidslt6 + kidsge6 + husage +
## huswage + city + motheduc + fatheduc + huseduc, data = working_w)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.6882 -1.0572 0.0695 1.0888 5.7414
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.4326100 0.8071869 6.730 5.61e-11 ***
## exper 0.0368317 0.0344951 1.068 0.286257
## expersq -0.0006265 0.0010298 -0.608 0.543238
## kidslt6 0.4981594 0.2294117 2.171 0.030459 *
## kidsge6 -0.1015625 0.0714713 -1.421 0.156057
## husage 0.0045715 0.0130349 0.351 0.725978
## huswage 0.0630119 0.0272447 2.313 0.021219 *
## city -0.1385816 0.1904655 -0.728 0.467270
## motheduc 0.1198724 0.0312979 3.830 0.000148 ***
## fatheduc 0.1010756 0.0298633 3.385 0.000780 ***
## huseduc 0.3441645 0.0320967 10.723 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.723 on 417 degrees of freedom
## Multiple R-squared: 0.4447, Adjusted R-squared: 0.4314
## F-statistic: 33.4 on 10 and 417 DF, p-value: < 2.2e-16
##
## Call:
## lm(formula = lwage ~ exper + expersq + kidslt6 + kidsge6 + husage +
## huswage + city + predictions_first_stage, data = working_w)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.13493 -0.30004 0.03046 0.37142 2.27199
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1763588 0.4206911 0.419 0.6753
## exper 0.0419047 0.0139885 2.996 0.0029 **
## expersq -0.0007881 0.0004167 -1.891 0.0593 .
## kidslt6 -0.0255934 0.0941128 -0.272 0.7858
## kidsge6 -0.0234422 0.0291914 -0.803 0.4224
## husage -0.0042628 0.0051919 -0.821 0.4121
## huswage 0.0263802 0.0114511 2.304 0.0217 *
## city 0.0215685 0.0759034 0.284 0.7764
## predictions_first_stage 0.0531993 0.0263735 2.017 0.0443 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6965 on 419 degrees of freedom
## Multiple R-squared: 0.08988, Adjusted R-squared: 0.0725
## F-statistic: 5.172 on 8 and 419 DF, p-value: 3.581e-06
##
## Call:
## ivreg(formula = lwage ~ exper + expersq + kidslt6 + kidsge6 +
## husage + huswage + city + educ | . - educ + motheduc + fatheduc +
## huseduc, data = working_w)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.10175 -0.30407 0.03379 0.35255 2.25107
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.1763588 0.4071522 0.433 0.6651
## exper 0.0419047 0.0135384 3.095 0.0021 **
## expersq -0.0007881 0.0004033 -1.954 0.0514 .
## kidslt6 -0.0255934 0.0910840 -0.281 0.7789
## kidsge6 -0.0234422 0.0282519 -0.830 0.4071
## husage -0.0042628 0.0050249 -0.848 0.3967
## huswage 0.0263802 0.0110826 2.380 0.0177 *
## city 0.0215685 0.0734606 0.294 0.7692
## educ 0.0531993 0.0255247 2.084 0.0377 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6741 on 419 degrees of freedom
## Multiple R-Squared: 0.1475, Adjusted R-squared: 0.1312
## Wald test: 5.522 on 8 and 419 DF, p-value: 1.191e-06
##
## Call:
## lm(formula = lwage ~ exper + expersq + kidslt6 + kidsge6 + husage +
## huswage + city + predictions_first_stage, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.09828 -0.28606 0.05248 0.37258 2.29947
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0037711 0.4489252 -0.008 0.99330
## exper 0.0449370 0.0145632 3.086 0.00218 **
## expersq -0.0008394 0.0004344 -1.933 0.05404 .
## kidslt6 -0.0630522 0.0963953 -0.654 0.51345
## kidsge6 -0.0197164 0.0306834 -0.643 0.52089
## husage -0.0034744 0.0054358 -0.639 0.52310
## huswage 0.0219622 0.0118602 1.852 0.06484 .
## city 0.0679668 0.0804317 0.845 0.39863
## predictions_first_stage 0.0618777 0.0283253 2.185 0.02954 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6952 on 376 degrees of freedom
## Multiple R-squared: 0.1035, Adjusted R-squared: 0.08438
## F-statistic: 5.424 on 8 and 376 DF, p-value: 1.764e-06
##
## Call:
## lm(formula = lwage ~ exper + expersq + kidslt6 + kidsge6 + husage +
## huswage + city + predictions_first_stage_rf, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -3.0655 -0.3198 0.0376 0.3710 2.3277
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0416945 0.4824998 -0.086 0.93118
## exper 0.0460311 0.0145543 3.163 0.00169 **
## expersq -0.0008594 0.0004344 -1.978 0.04863 *
## kidslt6 -0.0420827 0.0952030 -0.442 0.65872
## kidsge6 -0.0211208 0.0306490 -0.689 0.49117
## husage -0.0033102 0.0054660 -0.606 0.54514
## huswage 0.0229111 0.0118142 1.939 0.05322 .
## city 0.0688384 0.0805209 0.855 0.39314
## predictions_first_stage_rf 0.0629275 0.0306877 2.051 0.04100 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6957 on 376 degrees of freedom
## Multiple R-squared: 0.1021, Adjusted R-squared: 0.08302
## F-statistic: 5.346 on 8 and 376 DF, p-value: 2.251e-06
second_stage_rf_rf <- randomForest(lwage ~ exper + expersq + kidslt6 + kidsge6 +
husage + huswage + city + predictions_first_stage_rf,
data = train)
library("iml")
predictor <- Predictor$new(
model = second_stage_rf_rf,
data = select(test, exper, expersq,
kidslt6, kidsge6,
husage, huswage, city,
predictions_first_stage_rf),
y = test$lwage,
predict.fun = predict,
class = "regression"
)
imp_rf <- FeatureImp$new(predictor, loss = "rmse")
plot(imp_rf)+theme_bw()


## # A tibble: 6 x 3
## vars1 vars2 value
## <chr> <chr> <dbl>
## 1 exper exper 1
## 2 exper expersq 0.959
## 3 exper kidslt6 -0.272
## 4 exper kidsge6 -0.360
## 5 exper husage 0.487
## 6 exper huswage -0.181
## # A tibble: 5 x 3
## vars1 vars2 value
## <chr> <chr> <dbl>
## 1 predictions_first_stage_rf expersq 0.243
## 2 predictions_first_stage_rf kidslt6 0.292
## 3 predictions_first_stage_rf husage 0.217
## 4 predictions_first_stage_rf huswage 0.494
## 5 predictions_first_stage_rf city 0.369

(new_obs <- data.frame(
exper = rep(10, 2),
expersq = rep(100, 2),
kidslt6 = rep(1, 2),
kidsge6 = c(0, 2),
husage = rep(35, 2),
huswage = rep(6, 2),
city = rep(1, 2),
predictions_first_stage_rf = rep(10, 2)
))
## exper expersq kidslt6 kidsge6 husage huswage city
## 1 10 100 1 0 35 6 1
## 2 10 100 1 2 35 6 1
## predictions_first_stage_rf
## 1 10
## 2 10
## 1 2
## 1.139720 1.216423

