Compare the models and prepare a report (i) splitting 20% with 30% (ii) 5-fold with 10-fold cross validation #### import library
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.3.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.3 ✔ tibble 3.2.1
## ✔ purrr 1.0.2 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(caTools)
## Warning: package 'caTools' was built under R version 4.3.3
library(caret)
## Warning: package 'caret' was built under R version 4.3.3
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
donors<-read.csv("C:/Users/PMLS/Downloads/donors.csv")
glimpse(donors)
## Rows: 95,412
## Columns: 22
## $ age <int> 60, 46, NA, 70, 78, NA, 38, NA, NA, 65, NA, 75…
## $ numberChildren <int> NA, 1, NA, NA, 1, NA, 1, NA, NA, NA, NA, NA, 2…
## $ incomeRating <int> NA, 6, 3, 1, 3, NA, 4, 2, 3, NA, 2, 1, 4, NA, …
## $ wealthRating <int> NA, 9, 1, 4, 2, NA, 6, 9, 2, NA, 0, 5, 2, NA, …
## $ mailOrderPurchases <int> 0, 16, 2, 2, 60, 0, 0, 1, 0, 0, 0, 3, 16, 0, 1…
## $ totalGivingAmount <dbl> 240, 47, 202, 109, 254, 51, 107, 31, 199, 28, …
## $ numberGifts <int> 31, 3, 27, 16, 37, 4, 14, 5, 11, 3, 1, 2, 9, 1…
## $ smallestGiftAmount <dbl> 5, 10, 2, 2, 3, 10, 3, 5, 10, 3, 20, 10, 4, 5,…
## $ largestGiftAmount <dbl> 12, 25, 16, 11, 15, 16, 12, 11, 22, 15, 20, 15…
## $ averageGiftAmount <dbl> 7.741935, 15.666667, 7.481481, 6.812500, 6.864…
## $ yearsSinceFirstDonation <int> 8, 3, 7, 10, 11, 3, 10, 3, 9, 3, 1, 1, 8, 5, 4…
## $ monthsSinceLastDonation <int> 14, 14, 14, 14, 13, 20, 22, 18, 19, 22, 12, 14…
## $ inHouseDonor <lgl> FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE…
## $ plannedGivingDonor <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
## $ sweepstakesDonor <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
## $ P3Donor <lgl> FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE…
## $ state <chr> "IL", "CA", "NC", "CA", "FL", "AL", "IN", "LA"…
## $ urbanicity <chr> "town", "suburb", "rural", "rural", "suburb", …
## $ socioEconomicStatus <chr> "average", "highest", "average", "average", "a…
## $ isHomeowner <lgl> NA, TRUE, NA, NA, TRUE, NA, TRUE, NA, NA, NA, …
## $ gender <chr> "female", "male", "male", "female", "female", …
## $ respondedMailing <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
donors <- donors %>%
mutate(incomeRating = as.character(incomeRating)) %>%
mutate(incomeRating = as.factor(ifelse(is.na(incomeRating), 'UNK',
incomeRating)))
donors <- donors %>%
mutate(wealthRating = as.character(wealthRating)) %>%
mutate(wealthRating = as.factor(ifelse(is.na(wealthRating), 'UNK',
wealthRating))) %>%
mutate(urbanicity = as.character(urbanicity)) %>%
mutate(urbanicity = as.factor(ifelse(is.na(urbanicity), 'UNK',
urbanicity))) %>%
mutate(socioEconomicStatus = as.character(socioEconomicStatus)) %>%
mutate(socioEconomicStatus = as.factor(ifelse(is.na(socioEconomicStatus), 'UNK', socioEconomicStatus))) %>%
mutate(isHomeowner = as.character(isHomeowner)) %>%
mutate(isHomeowner = as.factor(ifelse(is.na(isHomeowner), 'UNK',
isHomeowner))) %>%
mutate(gender = as.character(gender)) %>%
mutate(gender = as.factor(ifelse(is.na(gender), 'UNK', gender)))
glimpse(donors)
## Rows: 95,412
## Columns: 22
## $ age <int> 60, 46, NA, 70, 78, NA, 38, NA, NA, 65, NA, 75…
## $ numberChildren <int> NA, 1, NA, NA, 1, NA, 1, NA, NA, NA, NA, NA, 2…
## $ incomeRating <fct> UNK, 6, 3, 1, 3, UNK, 4, 2, 3, UNK, 2, 1, 4, U…
## $ wealthRating <fct> UNK, 9, 1, 4, 2, UNK, 6, 9, 2, UNK, 0, 5, 2, U…
## $ mailOrderPurchases <int> 0, 16, 2, 2, 60, 0, 0, 1, 0, 0, 0, 3, 16, 0, 1…
## $ totalGivingAmount <dbl> 240, 47, 202, 109, 254, 51, 107, 31, 199, 28, …
## $ numberGifts <int> 31, 3, 27, 16, 37, 4, 14, 5, 11, 3, 1, 2, 9, 1…
## $ smallestGiftAmount <dbl> 5, 10, 2, 2, 3, 10, 3, 5, 10, 3, 20, 10, 4, 5,…
## $ largestGiftAmount <dbl> 12, 25, 16, 11, 15, 16, 12, 11, 22, 15, 20, 15…
## $ averageGiftAmount <dbl> 7.741935, 15.666667, 7.481481, 6.812500, 6.864…
## $ yearsSinceFirstDonation <int> 8, 3, 7, 10, 11, 3, 10, 3, 9, 3, 1, 1, 8, 5, 4…
## $ monthsSinceLastDonation <int> 14, 14, 14, 14, 13, 20, 22, 18, 19, 22, 12, 14…
## $ inHouseDonor <lgl> FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE…
## $ plannedGivingDonor <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
## $ sweepstakesDonor <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
## $ P3Donor <lgl> FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE…
## $ state <chr> "IL", "CA", "NC", "CA", "FL", "AL", "IN", "LA"…
## $ urbanicity <fct> town, suburb, rural, rural, suburb, town, town…
## $ socioEconomicStatus <fct> average, highest, average, average, average, a…
## $ isHomeowner <fct> UNK, TRUE, UNK, UNK, TRUE, UNK, TRUE, UNK, UNK…
## $ gender <fct> female, male, male, female, female, UNK, femal…
## $ respondedMailing <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
continous_numeric_vars <- c("age")
for (var in continous_numeric_vars) {
donors[[var]][is.na(donors[[var]])] <-mean(donors[[var]], na.rm = TRUE)
}
numeric_vars <- c("numberChildren")
for (var in numeric_vars) {
donors[[var]][is.na(donors[[var]])] <-median(donors[[var]], na.rm = TRUE)
}
glimpse(donors)
## Rows: 95,412
## Columns: 22
## $ age <dbl> 60.00000, 46.00000, 61.61165, 70.00000, 78.000…
## $ numberChildren <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1…
## $ incomeRating <fct> UNK, 6, 3, 1, 3, UNK, 4, 2, 3, UNK, 2, 1, 4, U…
## $ wealthRating <fct> UNK, 9, 1, 4, 2, UNK, 6, 9, 2, UNK, 0, 5, 2, U…
## $ mailOrderPurchases <int> 0, 16, 2, 2, 60, 0, 0, 1, 0, 0, 0, 3, 16, 0, 1…
## $ totalGivingAmount <dbl> 240, 47, 202, 109, 254, 51, 107, 31, 199, 28, …
## $ numberGifts <int> 31, 3, 27, 16, 37, 4, 14, 5, 11, 3, 1, 2, 9, 1…
## $ smallestGiftAmount <dbl> 5, 10, 2, 2, 3, 10, 3, 5, 10, 3, 20, 10, 4, 5,…
## $ largestGiftAmount <dbl> 12, 25, 16, 11, 15, 16, 12, 11, 22, 15, 20, 15…
## $ averageGiftAmount <dbl> 7.741935, 15.666667, 7.481481, 6.812500, 6.864…
## $ yearsSinceFirstDonation <int> 8, 3, 7, 10, 11, 3, 10, 3, 9, 3, 1, 1, 8, 5, 4…
## $ monthsSinceLastDonation <int> 14, 14, 14, 14, 13, 20, 22, 18, 19, 22, 12, 14…
## $ inHouseDonor <lgl> FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE…
## $ plannedGivingDonor <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
## $ sweepstakesDonor <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
## $ P3Donor <lgl> FALSE, FALSE, FALSE, FALSE, TRUE, FALSE, FALSE…
## $ state <chr> "IL", "CA", "NC", "CA", "FL", "AL", "IN", "LA"…
## $ urbanicity <fct> town, suburb, rural, rural, suburb, town, town…
## $ socioEconomicStatus <fct> average, highest, average, average, average, a…
## $ isHomeowner <fct> UNK, TRUE, UNK, UNK, TRUE, UNK, TRUE, UNK, UNK…
## $ gender <fct> female, male, male, female, female, UNK, femal…
## $ respondedMailing <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALS…
variables_of_interest <- c("age", "numberChildren", "mailOrderPurchases","totalGivingAmount")
par(mfrow = c(2,2))
for (variable in variables_of_interest) {
boxplot(donors[[variable]], main = variable, ylab = variable, col = "pink")
}
variables_of_interest<-c("numberGifts","smallestGiftAmount","largestGiftAmount","averageGiftAmount")
par(mfrow = c(2,2))
for (variable in variables_of_interest) {
boxplot(donors[[variable]], main = variable, ylab = variable, col = "yellow")
}
variables_of_interest <- c("yearsSinceFirstDonation","monthsSinceLastDonation")
par(mfrow = c(1,1))
for (variable in variables_of_interest) {
boxplot(donors[[variable]], main = variable, ylab = variable, col = "brown")
}
#### Making whiskers plot for outlier detection
variables_of_interest <- c("age", "numberChildren", "mailOrderPurchases","totalGivingAmount","numberGifts","smallestGiftAmount","largestGiftAmount","averageGiftAmount","yearsSinceFirstDonation","monthsSinceLastDonation")
par(mfrow = c(2, 5), mar = c(5, 4, 2, 2))
for (variable in variables_of_interest) {
boxplot(donors[[variable]] ~ donors$respondedMailing, data = donors,
main = paste("Whisker Plot of", variable, "by Target"),
xlab = "Target", ylab = variable, col = "brown")
}
#### Now we will remove outliers
donors <- donors %>%
mutate(max1 = quantile(mailOrderPurchases, .75) + (1.5 *IQR(mailOrderPurchases))) %>%
mutate(max2 = quantile(totalGivingAmount, .75) + (1.5 *
IQR(totalGivingAmount))) %>%
mutate(max3 = quantile(numberGifts, .75) + (1.5 * IQR(numberGifts))) %>%
mutate(max4 = quantile(smallestGiftAmount, .75) + (1.5 *
IQR(smallestGiftAmount))) %>%
mutate(max5 = quantile(largestGiftAmount, .75) + (1.5 *
IQR(largestGiftAmount))) %>%
mutate(max6 = quantile(averageGiftAmount, .75) + (1.5 *
IQR(averageGiftAmount))) %>%
filter(mailOrderPurchases <= max1) %>%
filter(totalGivingAmount <= max2) %>%
filter(numberGifts <= max3) %>%
filter(smallestGiftAmount <= max4) %>%
filter(largestGiftAmount <= max5) %>%
filter(averageGiftAmount <= max6) %>%
select(-max1,-max2,-max3,-max4,-max5,-max6)
set.seed(1234)
donors$respondedMailing <- factor(donors$respondedMailing)
evaluate_logistic_regression <- function(train_data, test_data) {
model <- glm(respondedMailing ~ ., data = train_data, family = "binomial")
summary(model)
}
split_20_80 <- sample.split(donors$respondedMailing, SplitRatio = 0.8)
train_data_20_80 <- donors[split_20_80, ]
test_data_20_80 <- donors[!split_20_80, ]
summary_20_80 <- evaluate_logistic_regression(train_data_20_80, test_data_20_80)
print(summary_20_80)
##
## Call:
## glm(formula = respondedMailing ~ ., family = "binomial", data = train_data)
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 4.052e-01 7.369e-01 0.550 0.582412
## age -1.055e-03 1.493e-03 -0.707 0.479837
## numberChildren -1.182e-01 6.385e-02 -1.850 0.064250 .
## incomeRating2 9.399e-02 9.105e-02 1.032 0.301932
## incomeRating3 1.857e-01 9.959e-02 1.865 0.062219 .
## incomeRating4 2.344e-01 9.424e-02 2.487 0.012885 *
## incomeRating5 2.459e-01 9.163e-02 2.684 0.007269 **
## incomeRating6 2.258e-01 1.078e-01 2.095 0.036205 *
## incomeRating7 3.510e-01 1.080e-01 3.252 0.001148 **
## incomeRatingUNK 3.176e-01 9.011e-02 3.524 0.000425 ***
## wealthRating1 -1.520e-01 1.797e-01 -0.846 0.397717
## wealthRating2 2.113e-02 1.697e-01 0.124 0.900927
## wealthRating3 1.334e-02 1.688e-01 0.079 0.936975
## wealthRating4 -1.619e-01 1.710e-01 -0.946 0.343964
## wealthRating5 -1.667e-01 1.689e-01 -0.987 0.323572
## wealthRating6 -1.102e-01 1.664e-01 -0.662 0.507954
## wealthRating7 -1.501e-01 1.668e-01 -0.900 0.368314
## wealthRating8 -1.411e-01 1.663e-01 -0.848 0.396346
## wealthRating9 -6.277e-02 1.675e-01 -0.375 0.707777
## wealthRatingUNK -1.382e-01 1.487e-01 -0.929 0.352713
## mailOrderPurchases -4.370e-03 1.387e-02 -0.315 0.752779
## totalGivingAmount 1.471e-03 9.497e-04 1.549 0.121399
## numberGifts 4.566e-03 8.979e-03 0.508 0.611111
## smallestGiftAmount -7.559e-03 1.006e-02 -0.751 0.452429
## largestGiftAmount -4.611e-02 7.326e-03 -6.295 3.08e-10 ***
## averageGiftAmount 4.848e-03 1.639e-02 0.296 0.767444
## yearsSinceFirstDonation 5.743e-03 9.811e-03 0.585 0.558353
## monthsSinceLastDonation -3.973e-02 5.755e-03 -6.904 5.07e-12 ***
## inHouseDonorTRUE 5.934e-02 8.762e-02 0.677 0.498244
## plannedGivingDonorTRUE 3.563e-01 7.463e-01 0.477 0.633018
## sweepstakesDonorTRUE -5.519e-01 1.730e-01 -3.191 0.001418 **
## P3DonorTRUE 1.062e-01 1.316e-01 0.807 0.419640
## stateAE -1.362e+01 2.928e+02 -0.047 0.962889
## stateAK -3.426e+00 9.936e-01 -3.448 0.000564 ***
## stateAL -2.069e+00 7.065e-01 -2.929 0.003404 **
## stateAP -1.851e+00 9.093e-01 -2.035 0.041817 *
## stateAR -2.100e+00 7.179e-01 -2.925 0.003443 **
## stateAZ -1.843e+00 7.008e-01 -2.630 0.008533 **
## stateCA -1.875e+00 6.925e-01 -2.707 0.006795 **
## stateCO -2.114e+00 7.062e-01 -2.993 0.002760 **
## stateCT -1.987e+00 1.244e+00 -1.597 0.110320
## stateDC -1.321e+01 8.827e+02 -0.015 0.988063
## stateDE -1.359e+01 8.827e+02 -0.015 0.987719
## stateFL -2.043e+00 6.948e-01 -2.941 0.003270 **
## stateGA -2.465e+00 7.029e-01 -3.508 0.000452 ***
## stateGU -1.341e-01 1.416e+00 -0.095 0.924541
## stateHI -1.559e+00 7.307e-01 -2.134 0.032839 *
## stateIA -2.243e+00 7.160e-01 -3.133 0.001728 **
## stateID -2.186e+00 7.452e-01 -2.934 0.003351 **
## stateIL -2.140e+00 6.960e-01 -3.075 0.002108 **
## stateIN -2.252e+00 7.023e-01 -3.206 0.001347 **
## stateKS -2.127e+00 7.127e-01 -2.984 0.002842 **
## stateKY -2.033e+00 7.072e-01 -2.875 0.004046 **
## stateLA -2.164e+00 7.088e-01 -3.053 0.002266 **
## stateMA -1.358e+01 2.066e+02 -0.066 0.947581
## stateMD -8.037e-01 9.412e-01 -0.854 0.393159
## stateME -1.367e+01 3.592e+02 -0.038 0.969636
## stateMI -1.998e+00 6.962e-01 -2.870 0.004107 **
## stateMN -2.424e+00 7.075e-01 -3.426 0.000613 ***
## stateMO -2.185e+00 7.024e-01 -3.110 0.001869 **
## stateMS -2.226e+00 7.222e-01 -3.083 0.002050 **
## stateMT -2.034e+00 7.367e-01 -2.761 0.005770 **
## stateNC -2.085e+00 6.988e-01 -2.984 0.002847 **
## stateND -2.010e+00 7.827e-01 -2.568 0.010217 *
## stateNE -2.203e+00 7.292e-01 -3.021 0.002519 **
## stateNH -1.384e+01 5.089e+02 -0.027 0.978300
## stateNJ -1.396e+00 1.028e+00 -1.357 0.174625
## stateNM -1.972e+00 7.190e-01 -2.743 0.006079 **
## stateNV -1.984e+00 7.147e-01 -2.777 0.005493 **
## stateNY -3.075e+00 1.225e+00 -2.509 0.012103 *
## stateOH -1.361e+01 1.438e+02 -0.095 0.924568
## stateOK -2.471e+00 7.153e-01 -3.454 0.000552 ***
## stateOR -1.715e+00 7.012e-01 -2.446 0.014443 *
## statePA -2.340e+00 1.234e+00 -1.896 0.057969 .
## stateRI -1.340e+01 3.572e+02 -0.038 0.970079
## stateSC -2.044e+00 7.063e-01 -2.894 0.003805 **
## stateSD -1.545e+00 7.408e-01 -2.086 0.036958 *
## stateTN -2.387e+00 7.056e-01 -3.383 0.000717 ***
## stateTX -2.149e+00 6.955e-01 -3.090 0.002001 **
## stateUT -2.342e+00 7.476e-01 -3.133 0.001729 **
## stateVA -1.359e+01 1.479e+02 -0.092 0.926743
## stateVI -1.362e+01 3.925e+02 -0.035 0.972309
## stateVT -1.364e+01 4.378e+02 -0.031 0.975153
## stateWA -2.048e+00 6.992e-01 -2.929 0.003400 **
## stateWI -2.033e+00 7.012e-01 -2.899 0.003743 **
## stateWV 1.013e-02 1.416e+00 0.007 0.994292
## stateWY -2.224e+00 7.938e-01 -2.802 0.005086 **
## urbanicityrural -8.005e-02 6.530e-02 -1.226 0.220266
## urbanicitysuburb 2.927e-02 6.086e-02 0.481 0.630506
## urbanicitytown -2.033e-02 6.293e-02 -0.323 0.746625
## urbanicityUNK -3.328e-02 1.357e-01 -0.245 0.806240
## urbanicityurban -2.180e-01 7.444e-02 -2.928 0.003409 **
## socioEconomicStatushighest 9.219e-02 5.326e-02 1.731 0.083458 .
## socioEconomicStatuslowest -1.392e-01 6.148e-02 -2.265 0.023524 *
## socioEconomicStatusUNK NA NA NA NA
## isHomeownerUNK -6.260e-02 5.458e-02 -1.147 0.251414
## genderjoint 4.262e-01 2.648e-01 1.609 0.107554
## gendermale -1.610e-02 4.094e-02 -0.393 0.694238
## genderUNK -2.787e-02 8.972e-02 -0.311 0.756063
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 22317 on 56007 degrees of freedom
## Residual deviance: 21830 on 55910 degrees of freedom
## AIC: 22026
##
## Number of Fisher Scoring iterations: 13
split_30_70 <- sample.split(donors$respondedMailing, SplitRatio = 0.7)
train_data_30_70 <- donors[split_30_70, ]
test_data_30_70 <- donors[!split_30_70, ]
summary_30_70 <- evaluate_logistic_regression(train_data_30_70, test_data_30_70)
print(summary_30_70)
##
## Call:
## glm(formula = respondedMailing ~ ., family = "binomial", data = train_data)
##
## Coefficients: (1 not defined because of singularities)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 3.017e-01 8.973e-01 0.336 0.73667
## age -2.618e-03 1.592e-03 -1.645 0.10003
## numberChildren -1.661e-01 6.965e-02 -2.385 0.01706 *
## incomeRating2 1.150e-01 9.813e-02 1.172 0.24114
## incomeRating3 2.107e-01 1.069e-01 1.970 0.04881 *
## incomeRating4 2.075e-01 1.020e-01 2.034 0.04194 *
## incomeRating5 2.765e-01 9.840e-02 2.810 0.00496 **
## incomeRating6 2.495e-01 1.150e-01 2.171 0.02996 *
## incomeRating7 4.924e-01 1.135e-01 4.338 1.44e-05 ***
## incomeRatingUNK 2.294e-01 9.669e-02 2.372 0.01769 *
## wealthRating1 1.058e-02 2.047e-01 0.052 0.95878
## wealthRating2 4.545e-02 1.964e-01 0.231 0.81695
## wealthRating3 1.303e-01 1.928e-01 0.676 0.49933
## wealthRating4 -1.845e-02 1.942e-01 -0.095 0.92430
## wealthRating5 1.068e-01 1.886e-01 0.566 0.57119
## wealthRating6 2.103e-01 1.863e-01 1.129 0.25907
## wealthRating7 7.495e-03 1.890e-01 0.040 0.96837
## wealthRating8 1.030e-01 1.865e-01 0.552 0.58063
## wealthRating9 5.192e-02 1.894e-01 0.274 0.78402
## wealthRatingUNK 9.309e-02 1.704e-01 0.546 0.58483
## mailOrderPurchases -1.602e-03 1.491e-02 -0.107 0.91444
## totalGivingAmount 2.059e-03 1.023e-03 2.013 0.04411 *
## numberGifts 1.734e-03 9.683e-03 0.179 0.85791
## smallestGiftAmount -1.441e-02 1.082e-02 -1.331 0.18304
## largestGiftAmount -4.674e-02 7.791e-03 -5.999 1.98e-09 ***
## averageGiftAmount -6.862e-04 1.758e-02 -0.039 0.96886
## yearsSinceFirstDonation 5.705e-04 1.045e-02 0.055 0.95647
## monthsSinceLastDonation -3.308e-02 6.112e-03 -5.412 6.22e-08 ***
## inHouseDonorTRUE 9.393e-02 9.274e-02 1.013 0.31117
## plannedGivingDonorTRUE 5.117e-01 7.490e-01 0.683 0.49453
## sweepstakesDonorTRUE -4.377e-01 1.806e-01 -2.423 0.01539 *
## P3DonorTRUE 6.523e-02 1.425e-01 0.458 0.64714
## stateAE -1.382e+01 3.591e+02 -0.038 0.96930
## stateAK -2.666e+00 9.948e-01 -2.680 0.00736 **
## stateAL -2.062e+00 8.680e-01 -2.375 0.01755 *
## stateAP -2.889e+00 1.322e+00 -2.184 0.02895 *
## stateAR -2.120e+00 8.801e-01 -2.409 0.01599 *
## stateAZ -1.962e+00 8.637e-01 -2.271 0.02313 *
## stateCA -1.871e+00 8.552e-01 -2.187 0.02873 *
## stateCO -2.140e+00 8.679e-01 -2.466 0.01365 *
## stateCT -1.861e+00 1.346e+00 -1.382 0.16686
## stateDC -1.309e+01 8.827e+02 -0.015 0.98817
## stateDE -1.370e+01 6.237e+02 -0.022 0.98248
## stateFL -2.169e+00 8.576e-01 -2.529 0.01144 *
## stateGA -2.381e+00 8.642e-01 -2.755 0.00587 **
## stateGU 2.755e-01 1.656e+00 0.166 0.86789
## stateHI -1.793e+00 8.958e-01 -2.002 0.04533 *
## stateIA -2.304e+00 8.786e-01 -2.622 0.00875 **
## stateID -2.046e+00 8.987e-01 -2.277 0.02281 *
## stateIL -2.271e+00 8.589e-01 -2.644 0.00819 **
## stateIN -2.211e+00 8.640e-01 -2.559 0.01049 *
## stateKS -2.025e+00 8.726e-01 -2.321 0.02031 *
## stateKY -2.203e+00 8.709e-01 -2.530 0.01141 *
## stateLA -2.389e+00 8.734e-01 -2.735 0.00623 **
## stateMA -1.363e+01 2.010e+02 -0.068 0.94592
## stateMD -1.737e+00 1.348e+00 -1.289 0.19743
## stateME -1.360e+01 3.596e+02 -0.038 0.96984
## stateMI -2.085e+00 8.589e-01 -2.427 0.01521 *
## stateMN -2.319e+00 8.679e-01 -2.671 0.00755 **
## stateMO -2.053e+00 8.633e-01 -2.378 0.01740 *
## stateMS -2.111e+00 8.805e-01 -2.397 0.01652 *
## stateMT -1.912e+00 8.964e-01 -2.133 0.03292 *
## stateNC -2.078e+00 8.610e-01 -2.413 0.01582 *
## stateND -1.882e+00 9.307e-01 -2.022 0.04316 *
## stateNE -2.220e+00 8.904e-01 -2.493 0.01268 *
## stateNH -1.395e+01 4.409e+02 -0.032 0.97476
## stateNJ -1.529e+00 1.354e+00 -1.129 0.25892
## stateNM -2.052e+00 8.825e-01 -2.325 0.02006 *
## stateNV -2.138e+00 8.798e-01 -2.429 0.01512 *
## stateNY -2.935e+00 1.325e+00 -2.215 0.02679 *
## stateOH -1.364e+01 1.709e+02 -0.080 0.93638
## stateOK -2.329e+00 8.739e-01 -2.665 0.00770 **
## stateOR -1.795e+00 8.638e-01 -2.078 0.03767 *
## statePA -2.037e+00 1.340e+00 -1.520 0.12843
## stateRI -1.342e+01 3.896e+02 -0.034 0.97252
## stateSC -1.966e+00 8.669e-01 -2.267 0.02337 *
## stateSD -1.508e+00 9.005e-01 -1.675 0.09398 .
## stateTN -2.310e+00 8.662e-01 -2.667 0.00766 **
## stateTX -2.048e+00 8.576e-01 -2.388 0.01696 *
## stateUT -2.136e+00 8.975e-01 -2.380 0.01730 *
## stateVA -2.574e+00 1.329e+00 -1.937 0.05274 .
## stateVI -1.348e+01 5.085e+02 -0.027 0.97885
## stateVT -1.390e+01 6.232e+02 -0.022 0.98221
## stateWA -1.977e+00 8.606e-01 -2.297 0.02164 *
## stateWI -2.033e+00 8.634e-01 -2.355 0.01851 *
## stateWV 9.837e-01 1.662e+00 0.592 0.55406
## stateWY -1.892e+00 9.303e-01 -2.033 0.04202 *
## urbanicityrural -8.421e-02 7.018e-02 -1.200 0.23020
## urbanicitysuburb 4.636e-02 6.527e-02 0.710 0.47752
## urbanicitytown -1.768e-02 6.754e-02 -0.262 0.79354
## urbanicityUNK 1.028e-01 1.398e-01 0.735 0.46207
## urbanicityurban -1.357e-01 7.936e-02 -1.709 0.08737 .
## socioEconomicStatushighest 1.037e-01 5.658e-02 1.834 0.06671 .
## socioEconomicStatuslowest -7.073e-02 6.590e-02 -1.073 0.28318
## socioEconomicStatusUNK NA NA NA NA
## isHomeownerUNK -1.621e-02 5.792e-02 -0.280 0.77954
## genderjoint 1.895e-01 3.154e-01 0.601 0.54798
## gendermale 2.737e-02 4.369e-02 0.627 0.53097
## genderUNK 5.843e-02 9.398e-02 0.622 0.53408
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 19532 on 49006 degrees of freedom
## Residual deviance: 19093 on 48909 degrees of freedom
## AIC: 19289
##
## Number of Fisher Scoring iterations: 13
####{r} set.seed(1234) cv_5_fold <- trainControl(method = "cv", number = 5) summary_5_fold <- train(respondedMailing ~ ., data = donors, method = "glm", trControl = cv_5_fold, family = "binomial")$finalModel print(summary_5_fold)
#### 10 folds
###{r} set.seed(1234) cv_10_fold <- trainControl(method = "cv", number = 10) summary_10_fold <- train(respondedMailing ~ ., data = donors, method = "glm", trControl = cv_10_fold, family = "binomial")$finalModel print(summary_10_fold)
A lower null deviance indicates that the model fits the data well even without any predictors.
The null deviance is useful for comparing the fit of the model to a model with no predictors (null model).
Lower residual deviance indicates better fit of the model to the data.
Residual deviance should ideally be significantly lower than the null deviance, indicating that the model improves the prediction compared to the null model.
A lower AIC value indicates a better model, with a good balance between fit and complexity.
Models with smaller AIC values are preferred over models with larger AIC values.
AIC can be used to compare different models, with the model having the lowest AIC being considered the best among the alternatives.