DATA 621 Homework #3
df <- read.csv('data/crime-training-data_modified.csv')
evaluation <- read.csv("data/crime-evaluation-data_modified.csv")Data Exploration
Are There Missing Values?
First we look at the data to see if any variables have missing data:
It looks like we have a complete data set. No need to impute values.
Splitting the Data
Next, we look to split our data between a training set (train) and a test data set (test). We’ll use a 70-30 split between train and test, respectively.
Exploratory Data Analysis
Now, let’s look at our training data. By looking at a correlation matrix, we can see which variables may be too correlated to be included together in a model as predictor variables. This will help us later during the model selection process.
Next we will look at each potential predictor and how it is distributed across the target variable.
The following plots show how predictors are distributed between a positive target variable (areas with crime rates higher than the median, i.e. blue) and a negative target variable (areas with crime rates below the median, i.e. red). What we are looking for is variables that show way to split data into two groups.
for (var in names(train)){
if(var != "target"){
plot_df <- train
plot_df$x <- plot_df[,var]
p <- ggplot(plot_df, aes(x, color = factor(target))) +
geom_density() +
theme_light() +
ggtitle(var) +
scale_color_brewer(palette = "Set1") +
theme(legend.position = "none",
axis.title.y = element_blank(),
axis.title.x = element_blank())
print(p)
}
}Looking at the plots above, the nox variable seems to be the best variable to divide the data into the two groups.
Data Preparation
Feature Engineering
library(fuzzyjoin)
density_diff <- function(data,eval_data,var,target,new_column){
data[[paste0(var,"temp")]] <- data[[var]]
eval_data[[paste0(var,"temp")]] <- eval_data[[var]]
#standardize variable between 0 and 1
data[[var]] <- (data[[var]] - min(data[[var]]))/(max(data[[var]]) - min(data[[var]]))
eval_data[[var]] <- (eval_data[[var]] - min(eval_data[[var]]))/(max(eval_data[[var]]) - min(eval_data[[var]]))
## Calculate density estimates
g1 <- ggplot(data, aes(x=data[[var]], group=target, colour=target)) +
geom_density(data = data) + xlim(min(data[[var]]), max(data[[var]]))
gg1 <- ggplot_build(g1)
# construct the dataset
x <- gg1$data[[1]]$x[gg1$data[[1]]$group == 1]
y1 <- gg1$data[[1]]$y[gg1$data[[1]]$group == 1]
y2 <- gg1$data[[1]]$y[gg1$data[[1]]$group == 2]
df2 <- data.frame(x = x, ymin = pmin(y1, y2), ymax = pmax(y1, y2), side=(y1<y2), ydiff = y2-y1)
##creating the second graph object
g3 <- ggplot(df2) +
geom_line(aes(x = x, y = ydiff, colour = side)) +
geom_area(aes(x = x, y = ydiff, fill = side, alpha = 0.4)) +
guides(alpha = FALSE, fill = FALSE)
data$join <- data[[var]]
df2$join <- df2$x
temp <- difference_left_join(data,df2, by="join", max_dist =.001)
means <- aggregate(ydiff ~ temp$join.x, temp, mean)
colnames(means) <- c("std_var","density_diff")
new_data <- merge(means,data, by.x ="std_var", by.y =var)
new_data$std_var <- NULL
new_data$join <- NULL
new_data[new_column] <- new_data$density_diff
new_data$density_diff <- NULL
###################same thing with eval data ############################
eval_data$join <- eval_data[[var]]
df2$join <- df2$x
temp <- difference_left_join(eval_data,df2, by="join", max_dist =.001)
means <- aggregate(ydiff ~ temp$join.x, temp, mean)
##new stuff
colnames(means) <- c("std_var","density_diff")
#data["key"] <- (var - min(var))/(max(var) - min(var))
eval_data2 <- merge(means,eval_data, by.x ="std_var", by.y =var)
eval_data2$std_var <- NULL
eval_data2$join <- NULL
eval_data2[new_column] <- eval_data2$density_diff
eval_data2$density_diff <- NULL
eval_data2[[var]] <- eval_data2[[paste0(var,"temp")]]
eval_data2[[paste0(var,"temp")]] <- NULL
new_data[[var]] <- new_data[[paste0(var,"temp")]]
new_data[[paste0(var,"temp")]] <- NULL
mylist <- list(g1,g3,new_data,eval_data2)
names(mylist)<- c("dist","dist_diff","new_data_training","new_data_eval")
return(mylist)
}Creating Some New Variables
'data.frame': 327 obs. of 13 variables:
$ zn : num 0 0 0 30 0 0 80 22 0 22 ...
$ indus : num 19.58 19.58 18.1 4.93 2.46 ...
$ chas : int 0 1 0 0 0 0 0 0 0 0 ...
$ nox : num 0.605 0.871 0.74 0.428 0.488 0.52 0.392 0.431 0.437 0.431 ...
$ rm : num 7.93 5.4 6.49 6.39 7.16 ...
$ age : num 96.2 100 100 7.8 92.2 71.3 19.1 8.9 45 8.4 ...
$ dis : num 2.05 1.32 1.98 7.04 2.7 ...
$ rad : int 5 5 24 6 3 5 1 7 5 7 ...
$ tax : int 403 403 666 300 193 384 315 330 398 330 ...
$ ptratio: num 14.7 14.7 20.2 16.6 17.8 20.9 16.4 19.1 18.7 19.1 ...
$ lstat : num 3.7 26.82 18.85 5.19 4.82 ...
$ medv : num 50 13.4 15.4 23.7 37.9 26.5 20.9 24.8 21.4 42.8 ...
$ target : int 1 1 1 0 0 0 0 0 0 1 ...
'data.frame': 139 obs. of 13 variables:
$ zn : num 0 0 0 0 0 100 0 0 0 0 ...
$ indus : num 18.1 18.1 5.19 18.1 2.46 1.32 18.1 3.24 6.2 2.89 ...
$ chas : int 0 0 0 0 0 0 0 0 1 0 ...
$ nox : num 0.693 0.693 0.515 0.532 0.488 0.411 0.679 0.46 0.507 0.445 ...
$ rm : num 5.45 4.52 6.32 7.06 6.15 ...
$ age : num 100 100 38.1 77 68.8 40.5 95.4 32.2 66.5 62.5 ...
$ dis : num 1.49 1.66 6.46 3.41 3.28 ...
$ rad : int 24 24 5 24 3 5 24 4 8 2 ...
$ tax : int 666 666 224 666 193 256 666 430 307 276 ...
$ ptratio: num 20.2 20.2 20.2 20.2 17.8 15.1 20.2 16.9 17.4 18 ...
$ lstat : num 30.59 36.98 5.68 7.01 13.15 ...
$ medv : num 5 7 22.2 25 29.6 31.6 8.3 19.8 29 33.2 ...
$ target : int 1 1 0 1 0 0 1 0 1 0 ...
## try it out
train <- density_diff(train,test,"nox",train$target,"nox_density")$new_data_training
train <- density_diff(train,test,"age",train$target,"age_density")$new_data_training
train <- density_diff(train,test,"indus",train$target,"indus_density")$new_data_training
train <- density_diff(train,test,"dis",train$target,"dis_density")$new_data_training
train <- density_diff(train,test,"rad",train$target,"rad_density")$new_data_training
train <- density_diff(train,test,"tax",train$target,"tax_density")$new_data_training
train <- density_diff(train,test,"ptratio",train$target,"ptratio_density")$new_data_training
train <- density_diff(train,test,"lstat",train$target,"lstat_density")$new_data_training
train <- density_diff(train,test,"medv",train$target,"medv_density")$new_data_training
test<- density_diff(train,test,"nox",train$target,"nox_density")$new_data_eval
test<- density_diff(train,test,"age",train$target,"age_density")$new_data_training
test<- density_diff(train,test,"indus",train$target,"indus_density")$new_data_training
test<- density_diff(train,test,"dis",train$target,"dis_density")$new_data_training
test<- density_diff(train,test,"rad",train$target,"rad_density")$new_data_training
test<- density_diff(train,test,"tax",train$target,"tax_density")$new_data_training
test<- density_diff(train,test,"ptratio",train$target,"ptratio_density")$new_data_training
test<- density_diff(train,test,"lstat",train$target,"lstat_density")$new_data_training
test<- density_diff(train,test,"medv",train$target,"medv_density")$new_data_training
str(train)'data.frame': 327 obs. of 22 variables:
$ zn : num 0 0 0 0 0 0 0 0 0 0 ...
$ chas : int 0 0 0 0 0 0 0 0 0 0 ...
$ rm : num 5.68 5.99 5.28 5 6.78 ...
$ target : int 1 1 1 1 1 0 1 1 1 1 ...
$ nox_density : num 1.73 1.73 1.7 1.7 1.71 ...
$ nox : num 0.693 0.693 0.7 0.7 0.679 0.609 0.718 0.693 0.74 0.679 ...
$ age_density : num 3.15 3.15 3.65 2.04 2.41 ...
$ age : num 100 100 98.1 89.5 90.8 98 76.5 85.4 100 95.6 ...
$ indus_density : num 3.58 3.58 3.58 3.58 3.58 ...
$ indus : num 18.1 18.1 18.1 18.1 18.1 ...
$ dis_density : num 3.02 3.47 3.02 3.31 3.64 ...
$ dis : num 1.43 1.59 1.43 1.52 1.82 ...
$ rad_density : num 1.54 1.54 1.54 1.54 1.54 ...
$ rad : int 24 24 24 24 24 4 24 24 24 24 ...
$ tax_density : num 1.95 1.95 1.95 1.95 1.95 ...
$ tax : int 666 666 666 666 666 711 666 666 666 666 ...
$ ptratio_density: num 3.87 3.87 3.87 3.87 3.87 ...
$ ptratio : num 20.2 20.2 20.2 20.2 20.2 20.1 20.2 20.2 20.2 20.2 ...
$ lstat_density : num 1.013 0.723 0.248 0.222 0.822 ...
$ lstat : num 23 26.8 30.8 32 25.8 ...
$ medv_density : num 0.258 0.325 0.546 0.576 0.591 ...
$ medv : num 5 5.6 7.2 7.4 7.5 8.1 8.4 8.5 8.7 9.5 ...
'data.frame': 327 obs. of 22 variables:
$ zn : num 0 0 0 0 0 0 0 0 0 0 ...
$ chas : int 0 0 0 0 0 0 0 0 0 0 ...
$ rm : num 5.68 5.99 5.28 5 6.78 ...
$ target : int 1 1 1 1 1 0 1 1 1 1 ...
$ nox_density : num 1.73 1.73 1.7 1.7 1.71 ...
$ nox : num 0.693 0.693 0.7 0.7 0.679 0.609 0.718 0.693 0.74 0.679 ...
$ age_density : num 3.15 3.15 3.65 2.04 2.41 ...
$ age : num 100 100 98.1 89.5 90.8 98 76.5 85.4 100 95.6 ...
$ indus_density : num 3.58 3.58 3.58 3.58 3.58 ...
$ indus : num 18.1 18.1 18.1 18.1 18.1 ...
$ dis_density : num 3.02 3.47 3.02 3.31 3.64 ...
$ dis : num 1.43 1.59 1.43 1.52 1.82 ...
$ rad_density : num 1.54 1.54 1.54 1.54 1.54 ...
$ rad : int 24 24 24 24 24 4 24 24 24 24 ...
$ tax_density : num 1.95 1.95 1.95 1.95 1.95 ...
$ tax : int 666 666 666 666 666 711 666 666 666 666 ...
$ ptratio_density: num 3.87 3.87 3.87 3.87 3.87 ...
$ ptratio : num 20.2 20.2 20.2 20.2 20.2 20.1 20.2 20.2 20.2 20.2 ...
$ lstat_density : num 1.013 0.723 0.248 0.222 0.822 ...
$ lstat : num 23 26.8 30.8 32 25.8 ...
$ medv_density : num 0.258 0.325 0.546 0.576 0.591 ...
$ medv : num 5 5.6 7.2 7.4 7.5 8.1 8.4 8.5 8.7 9.5 ...
Model Building
We start by applying occam’s razor and create a baseline model that only has one predictor. Any model we build beyond that will have to outperform this simplest model.
Call:
glm(formula = target ~ nox, family = binomial(link = "logit"),
data = train)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.1163 -0.4050 -0.1814 0.2717 2.5754
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -16.370 1.823 -8.981 <2e-16 ***
nox 30.372 3.444 8.818 <2e-16 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 451.70 on 326 degrees of freedom
Residual deviance: 205.67 on 325 degrees of freedom
AIC: 209.67
Number of Fisher Scoring iterations: 6
test$baseline <- ifelse(predict.glm(baseline, test,"response") >= 0.5,1,0)
cm <- confusionMatrix(factor(test$baseline), factor(test$target),"1")
results <- tibble(model = "baseline",predictors = 1,F1 = cm$byClass[7],
deviance=baseline$deviance,
r2 = 1 - baseline$deviance/baseline$null.deviance,
aic=baseline$aic)
cmConfusion Matrix and Statistics
Reference
Prediction 0 1
0 152 38
1 23 114
Accuracy : 0.8135
95% CI : (0.7669, 0.8542)
No Information Rate : 0.5352
P-Value [Acc > NIR] : < 2e-16
Kappa : 0.6226
Mcnemar's Test P-Value : 0.07305
Sensitivity : 0.7500
Specificity : 0.8686
Pos Pred Value : 0.8321
Neg Pred Value : 0.8000
Prevalence : 0.4648
Detection Rate : 0.3486
Detection Prevalence : 0.4190
Balanced Accuracy : 0.8093
'Positive' Class : 1
Our baseline model is ok with an F1 score of 0.7889273.
Next we try adding every other variable, to build a full model. From here we can work backwards and eliminate non-significant predictors:
Call:
glm(formula = target ~ ., family = binomial(link = "logit"),
data = train)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.5526 -0.0665 0.0000 0.0000 3.2421
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -62.180562 24.584518 -2.529 0.01143 *
zn -0.050415 0.040815 -1.235 0.21676
chas -0.195603 1.093905 -0.179 0.85809
rm 0.011261 1.333754 0.008 0.99326
nox_density -0.179313 1.346273 -0.133 0.89404
nox 76.295730 37.680730 2.025 0.04289 *
age_density 0.301197 0.421311 0.715 0.47467
age -0.006903 0.026806 -0.258 0.79677
indus_density 4.521690 1.592202 2.840 0.00451 **
indus -0.597686 0.193076 -3.096 0.00196 **
dis_density 0.879526 0.541537 1.624 0.10435
dis 0.907365 0.457941 1.981 0.04755 *
rad_density -0.108287 0.247780 -0.437 0.66209
rad 1.652715 0.441300 3.745 0.00018 ***
tax_density -1.857615 1.189922 -1.561 0.11849
tax -0.033081 0.018735 -1.766 0.07743 .
ptratio_density -0.683744 0.340432 -2.008 0.04459 *
ptratio 0.987145 0.389351 2.535 0.01123 *
lstat_density -0.508557 0.537084 -0.947 0.34370
lstat 0.216231 0.127437 1.697 0.08974 .
medv_density 0.538486 0.293699 1.833 0.06673 .
medv 0.219624 0.132442 1.658 0.09727 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 451.699 on 326 degrees of freedom
Residual deviance: 89.297 on 305 degrees of freedom
AIC: 133.3
Number of Fisher Scoring iterations: 11
test$fullmodel <- ifelse(predict.glm(fullmodel, test,"response") < 0.5, 0, 1)
cm <- confusionMatrix(factor(test$fullmodel), factor(test$target),"1")
results <- rbind(results,tibble(model = "fullmodel",
predictors = 12,F1 = cm$byClass[7],
deviance=fullmodel$deviance,
r2 = 1-fullmodel$deviance/fullmodel$null.deviance,
aic=fullmodel$aic))
cmConfusion Matrix and Statistics
Reference
Prediction 0 1
0 167 9
1 8 143
Accuracy : 0.948
95% CI : (0.9181, 0.9694)
No Information Rate : 0.5352
P-Value [Acc > NIR] : <2e-16
Kappa : 0.8955
Mcnemar's Test P-Value : 1
Sensitivity : 0.9408
Specificity : 0.9543
Pos Pred Value : 0.9470
Neg Pred Value : 0.9489
Prevalence : 0.4648
Detection Rate : 0.4373
Detection Prevalence : 0.4618
Balanced Accuracy : 0.9475
'Positive' Class : 1
The full model has an F1 score is 0.9438944, which is a bit higher than before. However, many variables do not seem to be significant.
After some backward elimination of non-significant predictor variables, we arrive at the following model:
model1 <- glm(target ~ . -tax -rm -chas - age -zn -indus,
family = binomial(link = "logit"),
train)
summary(model1)
Call:
glm(formula = target ~ . - tax - rm - chas - age - zn - indus,
family = binomial(link = "logit"), data = train)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.6423 -0.0708 -0.0019 0.0000 3.5667
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -48.50337 18.01499 -2.692 0.007094 **
nox_density 1.11605 0.99779 1.119 0.263346
nox 29.26009 25.53773 1.146 0.251895
age_density 0.10263 0.24099 0.426 0.670197
indus_density 1.40396 0.49866 2.815 0.004871 **
dis_density 0.20473 0.43095 0.475 0.634740
dis 0.58861 0.34861 1.688 0.091322 .
rad_density -0.10699 0.19274 -0.555 0.578847
rad 1.30622 0.29319 4.455 8.38e-06 ***
tax_density -2.80661 0.75411 -3.722 0.000198 ***
ptratio_density -0.29021 0.27678 -1.049 0.294394
ptratio 0.73022 0.27325 2.672 0.007532 **
lstat_density -0.49402 0.46750 -1.057 0.290635
lstat 0.21515 0.11687 1.841 0.065619 .
medv_density 0.71614 0.27618 2.593 0.009513 **
medv 0.22990 0.07219 3.185 0.001450 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 451.70 on 326 degrees of freedom
Residual deviance: 102.82 on 311 degrees of freedom
AIC: 134.82
Number of Fisher Scoring iterations: 10
test$model1 <- ifelse(predict.glm(model1, test,"response") < 0.5, 0, 1)
cm <- confusionMatrix(factor(test$model1), factor(test$target),"1")
results <- rbind(results,tibble(model = "model1",
predictors = 6,F1 = cm$byClass[7],
deviance=model1$deviance,
r2 = 1-model1$deviance/model1$null.deviance,
aic=model1$aic))
cmConfusion Matrix and Statistics
Reference
Prediction 0 1
0 167 12
1 8 140
Accuracy : 0.9388
95% CI : (0.9071, 0.9622)
No Information Rate : 0.5352
P-Value [Acc > NIR] : <2e-16
Kappa : 0.8769
Mcnemar's Test P-Value : 0.5023
Sensitivity : 0.9211
Specificity : 0.9543
Pos Pred Value : 0.9459
Neg Pred Value : 0.9330
Prevalence : 0.4648
Detection Rate : 0.4281
Detection Prevalence : 0.4526
Balanced Accuracy : 0.9377
'Positive' Class : 1
density_models <- glm(target ~ nox_density +indus_density+age_density+dis_density+rad_density+tax_density+ptratio_density+lstat_density+medv_density, family = binomial(link = "logit"), train)
summary(density_models)
Call:
glm(formula = target ~ nox_density + indus_density + age_density +
dis_density + rad_density + tax_density + ptratio_density +
lstat_density + medv_density, family = binomial(link = "logit"),
data = train)
Deviance Residuals:
Min 1Q Median 3Q Max
-2.96836 -0.31245 -0.06225 0.20194 3.01402
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) 0.8093 0.5320 1.521 0.128210
nox_density 1.1492 0.2600 4.420 9.86e-06 ***
indus_density 1.0706 0.2488 4.302 1.69e-05 ***
age_density 0.1219 0.1698 0.718 0.472916
dis_density -0.1849 0.2138 -0.865 0.387284
rad_density 0.4141 0.1226 3.379 0.000727 ***
tax_density -1.2582 0.4408 -2.854 0.004314 **
ptratio_density 0.1550 0.1524 1.017 0.309139
lstat_density -0.2456 0.2055 -1.195 0.231963
medv_density 0.5417 0.2062 2.628 0.008596 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 451.70 on 326 degrees of freedom
Residual deviance: 164.12 on 317 degrees of freedom
AIC: 184.12
Number of Fisher Scoring iterations: 7
test$density_models <- ifelse(predict(density_models, test) < 0.5, 0, 1)
test$density_models_yhat <- predict(density_models, test)
cm <- confusionMatrix(factor(test$density_models), factor(test$target))
results <- rbind(results,tibble(model = "density models",
predictors = 9,
F1 = cm$byClass[7],
deviance = density_models$deviance,
r2 = 1 - density_models$deviance / density_models$null.deviance,
aic = density_models$aic))
cmConfusion Matrix and Statistics
Reference
Prediction 0 1
0 167 34
1 8 118
Accuracy : 0.8716
95% CI : (0.8304, 0.9058)
No Information Rate : 0.5352
P-Value [Acc > NIR] : < 2.2e-16
Kappa : 0.7389
Mcnemar's Test P-Value : 0.0001145
Sensitivity : 0.9543
Specificity : 0.7763
Pos Pred Value : 0.8308
Neg Pred Value : 0.9365
Prevalence : 0.5352
Detection Rate : 0.5107
Detection Prevalence : 0.6147
Balanced Accuracy : 0.8653
'Positive' Class : 0
Since the assignment mentions that the purpose is prediction, we will prefer F1 score as our measure of model success.
| model | predictors | F1 | deviance | r2 | aic |
|---|---|---|---|---|---|
| baseline | 1 | 0.7889273 | 205.67297 | 0.5446683 | 209.6730 |
| fullmodel | 12 | 0.9438944 | 89.29743 | 0.8023078 | 133.2974 |
| model1 | 6 | 0.9333333 | 102.81625 | 0.7723789 | 134.8163 |
| density models | 9 | 0.8882979 | 164.11933 | 0.6366623 | 184.1193 |
Looking these three models together, model1 looks like the best one. Although the full model scored an F1 that was ever-so-slightly higher on our test data set, model1 has half the predictors and it’s scores are almost exactly the same as fullmodel.
Model Selection
pROC Output
Baseline
par(pty = "s")
roc(test[["target"]], test[["baseline"]], plot = TRUE, legacy.axes = TRUE, print.auc = TRUE)
Call:
roc.default(response = test[["target"]], predictor = test[["baseline"]], plot = TRUE, legacy.axes = TRUE, print.auc = TRUE)
Data: test[["baseline"]] in 175 controls (test[["target"]] 0) < 152 cases (test[["target"]] 1).
Area under the curve: 0.8093
Full Model
par(pty = "s")
roc(test[["target"]], test[["fullmodel"]], plot = TRUE, legacy.axes = TRUE, print.auc = TRUE)
Call:
roc.default(response = test[["target"]], predictor = test[["fullmodel"]], plot = TRUE, legacy.axes = TRUE, print.auc = TRUE)
Data: test[["fullmodel"]] in 175 controls (test[["target"]] 0) < 152 cases (test[["target"]] 1).
Area under the curve: 0.9475
Model1
par(pty = "s")
roc(test[["target"]], test[["model1"]], plot = TRUE, legacy.axes = TRUE, print.auc = TRUE)
Call:
roc.default(response = test[["target"]], predictor = test[["model1"]], plot = TRUE, legacy.axes = TRUE, print.auc = TRUE)
Data: test[["model1"]] in 175 controls (test[["target"]] 0) < 152 cases (test[["target"]] 1).
Area under the curve: 0.9377
Density Model
par(pty = "s")
roc(test[["target"]], test[["density_models_yhat"]], plot = TRUE, legacy.axes = TRUE, print.auc = TRUE)
Call:
roc.default(response = test[["target"]], predictor = test[["density_models_yhat"]], plot = TRUE, legacy.axes = TRUE, print.auc = TRUE)
Data: test[["density_models_yhat"]] in 175 controls (test[["target"]] 0) < 152 cases (test[["target"]] 1).
Area under the curve: 0.9625