Your task: build a model to predict rates of racial segregation at
the county level. First, choose your variables and split data into
training and test sets. Using a random forest model, you must choose
three variables as features (independent variables) in your model.
Create a plot(s) to show the importance and node purity of the features
in your model. Finally, check the rmse of the training and validation
models to inspect for overfitting.
Warning message:
In normalizePath(quartoSrcFile, winslash = "/") :
path[1]="": No such file or directory
model_data_rs <- data %>%
select(c("racial_segregation", "unemp_rate", "frac_middleclass", "poverty_rate"))
Error in data %>% select(c("racial_segregation", "unemp_rate", "frac_middleclass", :
could not find function "%>%"
varImpPlot(rf.fit)
model.valid_rs$predict <- predict(rf.fit, newdata = model.valid_rs)
model.valid_rs <- model.valid_rs %>%
mutate(error = racial_segregation-predict) %>%
mutate(square_error = error^2)
sqrt(mean(model.valid_rs$square_error, na.rm=T))
model.train_rs$predict <- predict(rf.fit, newdata = model.train_rs)
model.train_rs <- model.train_rs %>%
mutate(error = racial_segregation-predict) %>%
mutate(square_error = error^2)
sqrt(mean(model.train_rs$square_error, na.rm=T))
#repeat this analysis using linear regression
lm1 <- lm(racial_segregation ~ unemp_rate+frac_middleclass+poverty_rate, data = model.train_rs)
summary(lm1)
model.valid_rs <- model.valid_rs %>%
mutate(yhat = predict(lm1, newdata = model.valid_rs)) %>%
mutate(lm_residual = racial_segregation - yhat) %>%
mutate(lm_residual_sq = lm_residual^2)
ggplot(model.valid_rs) +
geom_point(aes(x=yhat, y=lm_residual)) +
geom_hline(aes(yintercept=0), linetype="dashed", color="red") +
xlab("Predicted Inequality") +
ylab("Residual Inequality")+
theme_minimal()+
labs(title = "Residual Plot for Validation Model")
#calculate rmse
sqrt(mean(model.valid_rs$lm_residual_sq, na.rm=T))
LS0tCnRpdGxlOiAiTUwgQWN0aXZpdHkiCm91dHB1dDogaHRtbF9ub3RlYm9vawotLS0KCllvdXIgdGFzazogYnVpbGQgYSBtb2RlbCB0byBwcmVkaWN0IHJhdGVzIG9mIHJhY2lhbCBzZWdyZWdhdGlvbiBhdCB0aGUgY291bnR5IGxldmVsLiBGaXJzdCwgY2hvb3NlIHlvdXIgdmFyaWFibGVzIGFuZCBzcGxpdCBkYXRhIGludG8gdHJhaW5pbmcgYW5kIHRlc3Qgc2V0cy4gVXNpbmcgYSByYW5kb20gZm9yZXN0IG1vZGVsLCB5b3UgbXVzdCBjaG9vc2UgdGhyZWUgdmFyaWFibGVzIGFzIGZlYXR1cmVzIChpbmRlcGVuZGVudCB2YXJpYWJsZXMpIGluIHlvdXIgbW9kZWwuIENyZWF0ZSBhIHBsb3QocykgdG8gc2hvdyB0aGUgaW1wb3J0YW5jZSBhbmQgbm9kZSBwdXJpdHkgb2YgdGhlIGZlYXR1cmVzIGluIHlvdXIgbW9kZWwuIEZpbmFsbHksIGNoZWNrIHRoZSBybXNlIG9mIHRoZSB0cmFpbmluZyBhbmQgdmFsaWRhdGlvbiBtb2RlbHMgdG8gaW5zcGVjdCBmb3Igb3ZlcmZpdHRpbmcuIAoKYGBge3J9Cgptb2RlbF9kYXRhX3JzIDwtIGRhdGEgJT4lIAogIHNlbGVjdChjKCJyYWNpYWxfc2VncmVnYXRpb24iLCAidW5lbXBfcmF0ZSIsICJmcmFjX21pZGRsZWNsYXNzIiwgInBvdmVydHlfcmF0ZSIpKQoKI3NwbGl0IGludG8gdHJhaW5pbmcgYW5kIHRlc3RpbmcKc2V0LnNlZWQoODY3NTMwOSkKCnNwbGl0IDwtIDAuNzUKcm93cyAgPC0gbnJvdyhtb2RlbF9kYXRhX3JzKQoKdHJhaW4uZW50cmllcyA8LSBzYW1wbGUocm93cywgcm93cypzcGxpdCkKCm1vZGVsLnRyYWluX3JzIDwtIG1vZGVsX2RhdGFfcnNbdHJhaW4uZW50cmllcywgXQptb2RlbC52YWxpZF9ycyAgPC0gbW9kZWxfZGF0YV9yc1stdHJhaW4uZW50cmllcywgIF0KCm1vZGVsLnRyYWluX3JzIDwtIG1vZGVsLnRyYWluX3JzICU+JSBkcm9wX25hKCkKCiNydW4gdGhlIG1vZGVsCnNldC5zZWVkKDQ1NDMpCnJmLmZpdCA8LSByYW5kb21Gb3Jlc3QocmFjaWFsX3NlZ3JlZ2F0aW9uIH4gLiwgZGF0YT1tb2RlbC50cmFpbl9ycywgbnRyZWU9MTAwMCwgbXRyeSA9IDEsIGtlZXAuZm9yZXN0PVQsIGltcG9ydGFuY2U9VCkKCnJmLmZpdAoKYGBgCmBgYHtyfQp2YXJJbXBQbG90KHJmLmZpdCkKYGBgCmBgYHtyfQptb2RlbC52YWxpZF9ycyRwcmVkaWN0IDwtIHByZWRpY3QocmYuZml0LCBuZXdkYXRhID0gbW9kZWwudmFsaWRfcnMpCgptb2RlbC52YWxpZF9ycyA8LSBtb2RlbC52YWxpZF9ycyAlPiUgCiAgbXV0YXRlKGVycm9yID0gcmFjaWFsX3NlZ3JlZ2F0aW9uLXByZWRpY3QpICU+JSAKICBtdXRhdGUoc3F1YXJlX2Vycm9yID0gZXJyb3JeMikgCgpzcXJ0KG1lYW4obW9kZWwudmFsaWRfcnMkc3F1YXJlX2Vycm9yLCBuYS5ybT1UKSkKYGBgCmBgYHtyfQptb2RlbC50cmFpbl9ycyRwcmVkaWN0IDwtIHByZWRpY3QocmYuZml0LCBuZXdkYXRhID0gbW9kZWwudHJhaW5fcnMpCgptb2RlbC50cmFpbl9ycyA8LSBtb2RlbC50cmFpbl9ycyAlPiUgCiAgbXV0YXRlKGVycm9yID0gcmFjaWFsX3NlZ3JlZ2F0aW9uLXByZWRpY3QpICU+JSAKICBtdXRhdGUoc3F1YXJlX2Vycm9yID0gZXJyb3JeMikgCgpzcXJ0KG1lYW4obW9kZWwudHJhaW5fcnMkc3F1YXJlX2Vycm9yLCBuYS5ybT1UKSkKYGBgCmBgYHtyfQojcmVwZWF0IHRoaXMgYW5hbHlzaXMgdXNpbmcgbGluZWFyIHJlZ3Jlc3Npb24KCmxtMSA8LSBsbShyYWNpYWxfc2VncmVnYXRpb24gfiB1bmVtcF9yYXRlK2ZyYWNfbWlkZGxlY2xhc3MrcG92ZXJ0eV9yYXRlLCBkYXRhID0gbW9kZWwudHJhaW5fcnMpCgpzdW1tYXJ5KGxtMSkKYGBgCmBgYHtyfQptb2RlbC52YWxpZF9ycyA8LSBtb2RlbC52YWxpZF9ycyAlPiUgCiAgbXV0YXRlKHloYXQgPSBwcmVkaWN0KGxtMSwgbmV3ZGF0YSA9IG1vZGVsLnZhbGlkX3JzKSkgJT4lIAogIG11dGF0ZShsbV9yZXNpZHVhbCA9IHJhY2lhbF9zZWdyZWdhdGlvbiAtIHloYXQpICU+JSAKICBtdXRhdGUobG1fcmVzaWR1YWxfc3EgPSBsbV9yZXNpZHVhbF4yKQoKZ2dwbG90KG1vZGVsLnZhbGlkX3JzKSArCiAgZ2VvbV9wb2ludChhZXMoeD15aGF0LCB5PWxtX3Jlc2lkdWFsKSkgKwogIGdlb21faGxpbmUoYWVzKHlpbnRlcmNlcHQ9MCksIGxpbmV0eXBlPSJkYXNoZWQiLCBjb2xvcj0icmVkIikgKwogIHhsYWIoIlByZWRpY3RlZCBJbmVxdWFsaXR5IikgKwogIHlsYWIoIlJlc2lkdWFsIEluZXF1YWxpdHkiKSsKICB0aGVtZV9taW5pbWFsKCkrCiAgbGFicyh0aXRsZSA9ICJSZXNpZHVhbCBQbG90IGZvciBWYWxpZGF0aW9uIE1vZGVsIikKYGBgCgpgYGB7cn0KI2NhbGN1bGF0ZSBybXNlCgpzcXJ0KG1lYW4obW9kZWwudmFsaWRfcnMkbG1fcmVzaWR1YWxfc3EsIG5hLnJtPVQpKQoKYGBgCgo=