Peter Goodridge
October 11, 2017
for (i in seq(1,100)){
fit <- randomForest(as.factor(vhappy) ~ workstat + divorce + educ + income + regattend + babies + female + prestige +
black + gwbush00,
data=train,
importance=TRUE,
ntree=2,
nodesize = 3
)
predction <- predict(fit, test)
table(Predicted = predction, Actual = veryhappy_test)
confus1 <- table(Predicted = predction, Actual = veryhappy_test)
fs[i] <- save_mets(confus1)
}## [1] "Mean F score: 0.2888285"
| 0 | 1 | |
|---|---|---|
| 0 | 705 | 255 |
| 1 | 193 | 104 |
## [1] "Precision: 0.35017 Recall: 0.28969 Accuracy: 0.6436 F Measure: 0.31707"
| PC1 | PC2 | PC3 | PC4 | PC5 | PC6 | PC7 | |
|---|---|---|---|---|---|---|---|
| workstat | -0.3923 | 0.1628 | -0.4017 | 0.1523 | -0.1047 | 0.1358 | 0.02633 |
| divorce | -0.00751 | 0.4509 | 0.3533 | 0.1942 | -0.68 | -0.1529 | -0.1762 |
| educ | -0.5294 | -0.338 | 0.07658 | -0.02035 | 0.09876 | -0.1158 | -0.1012 |
| income | -0.4057 | 0.0635 | 0.1592 | -0.04094 | -0.3043 | 0.1751 | 0.03531 |
| regattend | 0.1017 | -0.3746 | 0.07983 | 0.6666 | -0.1802 | 0.1214 | 0.5911 |
| babies | 0.0655 | -0.3099 | 0.06528 | -0.5528 | -0.3904 | 0.5867 | 0.1596 |
| female | 0.2175 | -0.4762 | 0.4904 | 0.01337 | -0.04191 | -0.2233 | -0.3535 |
| prestige | -0.5397 | -0.2764 | 0.04974 | 0.04069 | -0.05798 | -0.1723 | -0.1093 |
| black | 0.1897 | -0.2754 | -0.5171 | 0.2933 | -0.275 | 0.2036 | -0.6023 |
| gwbush00 | -0.1092 | 0.1911 | 0.4021 | 0.3149 | 0.3984 | 0.6589 | -0.2868 |
| PC8 | PC9 | PC10 | |
|---|---|---|---|
| workstat | -0.1583 | -0.7587 | -0.06583 |
| divorce | 0.3182 | -0.04311 | -0.1192 |
| educ | 0.2489 | 0.1274 | -0.6983 |
| income | -0.7421 | 0.3587 | 0.01928 |
| regattend | 0.03279 | 0.02074 | -0.02228 |
| babies | 0.2261 | -0.129 | 0.005008 |
| female | -0.3252 | -0.4505 | 0.003286 |
| prestige | 0.3008 | 0.05147 | 0.7 |
| black | -0.01021 | 0.233 | -0.02223 |
| gwbush00 | 0.1118 | -0.02672 | 0.04878 |
pca.train <- prcomp(train[,1:5],scale. = T)
pca.test <- prcomp(test[,1:5],scale. = T)
train_new <- cbind(pca.train$x, train[,cols])
test_new <- cbind(pca.test$x, test[,cols])
vhappy_test <- as.character(test_new[,6])
colnames(train_new) <- c(paste0('PC', 1:5), 'vhappy')
colnames(test_new) <- c(paste0('PC', 1:5), 'vhappy')
fs <- c(1:100)
for (i in seq(1,100)){
fit <- randomForest(as.factor(vhappy) ~ PC1 + PC2 + PC3 + PC4,
data=train_new,
importance=TRUE,
ntree=2,
nodesize = 3
)
prediction <- predict(fit, test_new)
confus2 <- table(Predicted = prediction, Actual = vhappy_test)
fs[i] <- save_mets(confus2)
}## [1] "Mean F score: 0.3156388"
| 0 | 1 | |
|---|---|---|
| 0 | 525 | 189 |
| 1 | 373 | 170 |
## [1] "Precision: 0.31308 Recall: 0.47354 Accuracy: 0.5529 F Measure: 0.37694"
| 0 | 1 | |
|---|---|---|
| 0 | 705 | 255 |
| 1 | 193 | 104 |
## [1] "Precision: 0.35017 Recall: 0.28969 Accuracy: 0.6436 F Measure: 0.31707"
| 0 | 1 | |
|---|---|---|
| 0 | 525 | 189 |
| 1 | 373 | 170 |
## [1] "Precision: 0.31308 Recall: 0.47354 Accuracy: 0.5529 F Measure: 0.37694"