crime <- read.delim('/Users/helenalindsay/Documents/Fall_23/ISYE6501/hw5/uscrime.txt')
Q10.1
Regression tree model
#install.packages("rpart")
library(rpart)
tree_model <- rpart(Crime ~ ., data = crime)
print(tree_model)
## n= 47
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 47 6880928.0 905.0851
## 2) Po1< 7.65 23 779243.5 669.6087
## 4) Pop< 22.5 12 243811.0 550.5000 *
## 5) Pop>=22.5 11 179470.7 799.5455 *
## 3) Po1>=7.65 24 3604162.0 1130.7500
## 6) NW< 7.65 10 557574.9 886.9000 *
## 7) NW>=7.65 14 2027225.0 1304.9290 *
Visualize the model
plot(tree_model,uniform=TRUE, margin=0.1, compress=TRUE, cex=0.8, width=20, height=10)
text(tree_model)

Random forest model
#install.packages("randomForest")
library(randomForest)
set.seed(131)
rf_model <- randomForest(Crime ~ ., data = crime) # ntree=300
print(rf_model)
##
## Call:
## randomForest(formula = Crime ~ ., data = crime)
## Type of random forest: regression
## Number of trees: 500
## No. of variables tried at each split: 5
##
## Mean of squared residuals: 82510.74
## % Var explained: 43.64
Visualize the model
plot(rf_model)

Q10.3
credit <- read.table("/Users/helenalindsay/Documents/Fall_23/ISYE6501/hw5/germancredit.txt")
credit$V21 <- ifelse(credit$V21 == 1, 1, 0) # good = 1, bad = 0
credit$V21 <- as.factor(credit$V21)
credit_model <- glm(V21 ~ ., data=credit, family=binomial(link='logit'))
summary(credit_model)
##
## Call:
## glm(formula = V21 ~ ., family = binomial(link = "logit"), data = credit)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.6116 -0.7095 0.3752 0.6994 2.3410
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -4.005e-01 1.084e+00 -0.369 0.711869
## V1A12 3.749e-01 2.179e-01 1.720 0.085400 .
## V1A13 9.657e-01 3.692e-01 2.616 0.008905 **
## V1A14 1.712e+00 2.322e-01 7.373 1.66e-13 ***
## V2 -2.786e-02 9.296e-03 -2.997 0.002724 **
## V3A31 -1.434e-01 5.489e-01 -0.261 0.793921
## V3A32 5.861e-01 4.305e-01 1.362 0.173348
## V3A33 8.532e-01 4.717e-01 1.809 0.070470 .
## V3A34 1.436e+00 4.399e-01 3.264 0.001099 **
## V4A41 1.666e+00 3.743e-01 4.452 8.51e-06 ***
## V4A410 1.489e+00 7.764e-01 1.918 0.055163 .
## V4A42 7.916e-01 2.610e-01 3.033 0.002421 **
## V4A43 8.916e-01 2.471e-01 3.609 0.000308 ***
## V4A44 5.228e-01 7.623e-01 0.686 0.492831
## V4A45 2.164e-01 5.500e-01 0.393 0.694000
## V4A46 -3.628e-02 3.965e-01 -0.092 0.927082
## V4A48 2.059e+00 1.212e+00 1.699 0.089297 .
## V4A49 7.401e-01 3.339e-01 2.216 0.026668 *
## V5 -1.283e-04 4.444e-05 -2.887 0.003894 **
## V6A62 3.577e-01 2.861e-01 1.250 0.211130
## V6A63 3.761e-01 4.011e-01 0.938 0.348476
## V6A64 1.339e+00 5.249e-01 2.551 0.010729 *
## V6A65 9.467e-01 2.625e-01 3.607 0.000310 ***
## V7A72 6.691e-02 4.270e-01 0.157 0.875475
## V7A73 1.828e-01 4.105e-01 0.445 0.656049
## V7A74 8.310e-01 4.455e-01 1.866 0.062110 .
## V7A75 2.766e-01 4.134e-01 0.669 0.503410
## V8 -3.301e-01 8.828e-02 -3.739 0.000185 ***
## V9A92 2.755e-01 3.865e-01 0.713 0.476040
## V9A93 8.161e-01 3.799e-01 2.148 0.031718 *
## V9A94 3.671e-01 4.537e-01 0.809 0.418448
## V10A102 -4.360e-01 4.101e-01 -1.063 0.287700
## V10A103 9.786e-01 4.243e-01 2.307 0.021072 *
## V11 -4.776e-03 8.641e-02 -0.055 0.955920
## V12A122 -2.814e-01 2.534e-01 -1.111 0.266630
## V12A123 -1.945e-01 2.360e-01 -0.824 0.409743
## V12A124 -7.304e-01 4.245e-01 -1.721 0.085308 .
## V13 1.454e-02 9.222e-03 1.576 0.114982
## V14A142 1.232e-01 4.119e-01 0.299 0.764878
## V14A143 6.463e-01 2.391e-01 2.703 0.006871 **
## V15A152 4.436e-01 2.347e-01 1.890 0.058715 .
## V15A153 6.839e-01 4.770e-01 1.434 0.151657
## V16 -2.721e-01 1.895e-01 -1.436 0.151109
## V17A172 -5.361e-01 6.796e-01 -0.789 0.430160
## V17A173 -5.547e-01 6.549e-01 -0.847 0.397015
## V17A174 -4.795e-01 6.623e-01 -0.724 0.469086
## V18 -2.647e-01 2.492e-01 -1.062 0.288249
## V19A192 3.000e-01 2.013e-01 1.491 0.136060
## V20A202 1.392e+00 6.258e-01 2.225 0.026095 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1221.73 on 999 degrees of freedom
## Residual deviance: 895.82 on 951 degrees of freedom
## AIC: 993.82
##
## Number of Fisher Scoring iterations: 5
ROC curve
#install.packages('ROCR')
library(ROCR)
pr <- prediction(fitted.results, credit$V21)
prf <- performance(pr, measure = "tpr", x.measure = "fpr")
plot(prf)

auc <- performance(pr, measure = "auc")
auc <- auc@y.values[[1]]
auc
## [1] 0.7609524