### IMPORTANT:
### UPLOAD THE FINAL EXCEL SHEET DR. XU PROVIDED
### includes the two new columns
data <- read_excel("University Honors Students (Fall 2017 - Fall 2021)(Final) with two new columns.xlsx")
Cleaning and Processing Data
# Modify the data set to rename N/A or missing values from specific columns.
data %<>%
mutate_at(vars(`First Generation`, `Low Income`), ~replace_na(., "N")) %>%
mutate_at(vars(`Capstone Project Completion`), ~replace_na(., "No")) %>%
mutate_at(vars(`Gender`,`Admit Type`), ~replace_na(., "Others"))
# Change capstone completion to binary variable
data$`Capstone Project Completion` <- ifelse(data$`Capstone Project Completion` == "Yes", 1, 0)
# Fourth, Fifth, and Sixth Years Students in the Admitted Honors Cohort be combined into a new variable, “Other”
data$`Admitted Honors Cohort`[data$`Admitted Honors Cohort` == "New Second Year (Waitlist)"] <- "Second Year"
data$`Admitted Honors Cohort`[data$`Admitted Honors Cohort` == "New Second Year"] <- "Second Year"
data$`Admitted Honors Cohort`[data$`Admitted Honors Cohort` == "New Third Year"] <- "Third Year"
data$`Admitted Honors Cohort`[data$`Admitted Honors Cohort` == "Fourth Year" | data$`Admitted Honors Cohort` == "Fifth Year" | data$`Admitted Honors Cohort` == "Sixth Year"] <- "Others"
# PP and ED be combined as “Others” under Colleges due to small sample sizes
data$`College`[data$`College` == "PP" | data$`College` == "ED"] <- "Others"
# disregard students’ majors and merely use their colleges
data$`Major` <- NULL
# IPEDS Ethnicity of Native Hawaiian or Pacific Islander be combined with Unknown to make an Other category due to small sample sizes
data$`IPEDS Ethnicity`[data$`IPEDS Ethnicity` == "NATIVE HAWAIIAN OR OTHER PACIFIC ISLANDER" | data$`IPEDS Ethnicity` == "UNKNOWN"] <- "OTHERS"
# Any admit type beginning with “T” can be consolidated into a “Transfer” category, and any admit type beginning with “F” can be consolidated into a “Freshman” category. If there are any outlying admit types, they can be consolidated into an “Other” category.
data$`Admit Type` <- sub("T.*", "T",data$`Admit Type`)
data$`Admit Type` <- sub("F.*", "F",data$`Admit Type`)
data$`Admit Type` <- sub("B.*", "Others",data$`Admit Type`)
data$`Admit Type` <- sub("A.*", "Others",data$`Admit Type`)
data
## # A tibble: 1,015 x 19
## Student Gender `IPEDS Ethnicity` `Low Income` `First Generati~` `Admit Type`
## <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 1 F WHITE N Y F
## 2 2 F WHITE N N F
## 3 3 M WHITE N N F
## 4 4 F ASIAN N N F
## 5 5 M MULTI-RACIAL N N F
## 6 6 F WHITE Y N F
## 7 7 M HISPANIC OR LATINO N Y F
## 8 8 M WHITE N N T
## 9 9 M WHITE Y N F
## 10 10 M WHITE N Y T
## # ... with 1,005 more rows, and 13 more variables:
## # `Evaluated High School GPA` <dbl>, `Highlander Scholarship` <chr>,
## # `Chancellor's Scholarship` <chr>, `Regents' Scholarship` <chr>,
## # `Previous School` <chr>, College <chr>, `UCR Entry Action` <chr>,
## # `Admitted Honors Cohort` <chr>, `Permanent City` <chr>,
## # `Permanent State` <chr>, `Capstone Project Completion` <dbl>,
## # Permanent.Region <chr>, Permanent.County <chr>
Descriptive Analysis: Tables
print("Number of Students Completed Capstone by Ethnicity:")
## [1] "Number of Students Completed Capstone by Ethnicity:"
addmargins(table(data$`IPEDS Ethnicity`, data$`Capstone Project Completion`))
##
## 0 1 Sum
## ASIAN 164 142 306
## BLACK OR AFRICAN AMERICAN 13 13 26
## HISPANIC OR LATINO 210 150 360
## MULTI-RACIAL 35 38 73
## NON-RESIDENT ALIEN 16 7 23
## OTHERS 7 6 13
## WHITE 99 115 214
## Sum 544 471 1015
cat("\n")
print("Number of Students Completed Capstone by Gender:")
## [1] "Number of Students Completed Capstone by Gender:"
addmargins(table(data$`Gender`, data$`Capstone Project Completion`))
##
## 0 1 Sum
## F 340 281 621
## M 198 183 381
## N 4 4 8
## Others 2 3 5
## Sum 544 471 1015
cat("\n")
print("Number of Students Completed Capstone by Low Income:")
## [1] "Number of Students Completed Capstone by Low Income:"
addmargins(table(data$`Low Income`, data$`Capstone Project Completion`))
##
## 0 1 Sum
## N 381 345 726
## Y 163 126 289
## Sum 544 471 1015
cat("\n")
print("Number of Students Completed Capstone by First Generation:")
## [1] "Number of Students Completed Capstone by First Generation:"
addmargins(table(data$`First Generation`, data$`Capstone Project Completion`))
##
## 0 1 Sum
## N 260 281 541
## Y 284 190 474
## Sum 544 471 1015
cat("\n")
print("Number of Students Completed Capstone by Admit Type:")
## [1] "Number of Students Completed Capstone by Admit Type:"
addmargins(table(data$`Admit Type`, data$`Capstone Project Completion`))
##
## 0 1 Sum
## F 419 374 793
## Others 8 8 16
## T 117 89 206
## Sum 544 471 1015
cat("\n")
print("Number of Students Completed Capstone by College:")
## [1] "Number of Students Completed Capstone by College:"
addmargins(table(data$`College`, data$`Capstone Project Completion`))
##
## 0 1 Sum
## BU 30 23 53
## EN 72 61 133
## HS 211 194 405
## NA 227 185 412
## Others 4 8 12
## Sum 544 471 1015
cat("\n")
print("Number of Students Completed Capstone by UCR Entry Action:")
## [1] "Number of Students Completed Capstone by UCR Entry Action:"
addmargins(table(data$`UCR Entry Action`, data$`Capstone Project Completion`))
##
## 0 1 Sum
## First Time Freshman 424 379 803
## First Time Transfer 120 92 212
## Sum 544 471 1015
cat("\n")
print("Number of Students Completed Capstone by Admitted Honors Cohort:")
## [1] "Number of Students Completed Capstone by Admitted Honors Cohort:"
addmargins(table(data$`Admitted Honors Cohort`, data$`Capstone Project Completion`))
##
## 0 1 Sum
## First Year 185 74 259
## Incoming Transfer 103 70 173
## Others 43 108 151
## Second Year 136 112 248
## Third Year 77 107 184
## Sum 544 471 1015
cat("\n")
print("Number of Students Completed Capstone by Permanent Region:")
## [1] "Number of Students Completed Capstone by Permanent Region:"
addmargins(table(data$`Permanent.Region`, data$`Capstone Project Completion`))
##
## 0 1 Sum
## 1 1 7 8
## 10 31 21 52
## 2 1 0 1
## 3 45 38 83
## 4 3 4 7
## 5 10 10 20
## 6 12 9 21
## 7 248 224 472
## 8 129 100 229
## 9 44 48 92
## other 5 1 6
## Other 6 4 10
## others 5 3 8
## Sum 540 469 1009
cat("\n")
print("Number of Students Completed Capstone by Permanent County:")
## [1] "Number of Students Completed Capstone by Permanent County:"
addmargins(table(data$`Permanent.County`, data$`Capstone Project Completion`))
##
## 0 1 Sum
## Alameda 12 9 21
## Alameda County 2 1 3
## Butte 0 1 1
## Contra Costa 4 5 9
## Fresno 1 1 2
## Imperial 1 1 2
## Imperial County 0 1 1
## Kern 7 3 10
## Kern County 0 1 1
## Kings 1 0 1
## Los Angekes 1 0 1
## Los Angeles 106 87 193
## Los Angeles County 22 13 35
## Madera 0 1 1
## Marin 0 2 2
## Merced 1 1 2
## Monterey County 1 0 1
## Napa 1 0 1
## Orange 41 40 81
## Orange County 2 8 10
## other 5 1 6
## Other 5 4 9
## others 5 4 9
## Others 1 0 1
## Placer 1 1 2
## Riverside 128 97 225
## RIverside 1 2 3
## Riverside County 33 15 48
## Sacramento 0 3 3
## Sacramento County 0 1 1
## San Bernadino 9 13 22
## San Bernadino County 14 31 45
## San Bernardino 63 66 129
## San Diego 30 11 41
## San Diego County 1 8 9
## San Francisco 8 1 9
## San Francisco County 2 0 2
## San Joaquin 1 1 2
## San Joaquin County 1 0 1
## San Luis Obispo 1 0 1
## San Luis Obispo County 0 1 1
## San Mateo 0 2 2
## San Mateo County 1 0 1
## Santa Barbara 1 1 2
## Santa Barbra County 0 1 1
## Santa Clara 11 17 28
## Santa Clara County 4 1 5
## Santa Cruz 1 1 2
## Shasta 0 1 1
## Solano 1 0 1
## Stanislaus 0 1 1
## Tulare 2 3 5
## Tulare County 1 0 1
## Ventura 6 6 12
## Sum 540 469 1009
cat("\n")
Descriptive Analysis: Visualization
###By Admitted Honors Cohort:
data %>%
dplyr::count(`Admitted Honors Cohort`,`Capstone Project Completion`) %>%
group_by(`Admitted Honors Cohort`) %>%
ggplot(aes(reorder(`Admitted Honors Cohort`, -n),n,fill=factor(`Capstone Project Completion`),label = n)) +
geom_bar(stat = "identity") +
geom_text(size = 3.5, position = position_stack(vjust = 0.5),colour = "white") +
theme(axis.text.x = element_text(angle=90, hjust=1)) +
scale_fill_manual(labels = c("No", "Yes"),values = c("0"="#E41A1C", "1"="#377EB8"),name = "Capstone Completed") +
labs(title="Barplot of Capstone Completion by Admitted Honors Cohort",x="Admitted Honors Cohort",y="Frequency of Students")

###By UCR Entry Action:
data %>%
dplyr::count(`UCR Entry Action`,`Capstone Project Completion`) %>%
group_by(`UCR Entry Action`) %>%
ggplot(aes(reorder(`UCR Entry Action`, -n),n,fill=factor(`Capstone Project Completion`),label = n)) +
geom_bar(stat = "identity") +
geom_text(size = 3.5, position = position_stack(vjust = 0.5),colour = "white") +
scale_fill_manual(labels = c("No", "Yes"),values = c("0"="#E41A1C", "1"="#377EB8"),name = "Capstone Completed") +
labs(title="Barplot of Capstone Completion by UCR Entry Action",x="UCR Entry Action",y="Frequency of Students")

###By College:
data %>%
dplyr::count(`College`,`Capstone Project Completion`) %>%
group_by(`College`) %>%
ggplot(aes(reorder(`College`, -n),n,fill=factor(`Capstone Project Completion`),label = n)) +
geom_bar(stat = "identity") +
geom_text(size = 3.5, position = position_stack(vjust = 0.5),colour = "white") +
scale_fill_manual(labels = c("No", "Yes"),values = c("0"="#E41A1C", "1"="#377EB8"),name = "Capstone Completed") +
labs(title="Barplot of Capstone Completion by College",x="College",y="Frequency of Students")

###By Income
data %>%
dplyr::count(`Low Income`,`Capstone Project Completion`) %>%
group_by(`Low Income`) %>%
ggplot(aes(`Low Income`,n,fill=factor(`Capstone Project Completion`),label = n)) +
geom_bar(stat = "identity") +
geom_text(size = 3.5, position = position_stack(vjust = 0.5),colour = "white") +
scale_fill_manual(labels = c("No", "Yes"),values = c("0"="#E41A1C", "1"="#377EB8"),name = "Capstone Completed") +
labs(title="Barplot of Capstone Completion by Income",y="Frequency of Students")

###By First Generation status
data %>%
dplyr::count(`First Generation`,`Capstone Project Completion`) %>%
group_by(`First Generation`) %>%
ggplot(aes(`First Generation`,n,fill=factor(`Capstone Project Completion`),label = n)) +
geom_bar(stat = "identity") +
geom_text(size = 3.5, position = position_stack(vjust = 0.5),colour = "white") +
scale_fill_manual(labels = c("No", "Yes"),values = c("0"="#E41A1C", "1"="#377EB8"),name = "Capstone Completed") +
labs(title="Barplot of Capstone Completion by First Generation status",y="Frequency of Students")

###By Ethnicity
data %>%
dplyr::count(`IPEDS Ethnicity`,`Capstone Project Completion`) %>%
group_by(`IPEDS Ethnicity`) %>%
ggplot(aes(n,reorder(`IPEDS Ethnicity`, n),fill=factor(`Capstone Project Completion`),label = n)) +
geom_bar(stat = "identity") +
scale_fill_manual(labels = c("No", "Yes"),values = c("0"="#E41A1C", "1"="#377EB8"),name = "Capstone Completed") +
labs(title="Barplot of Capstone Completion by Ethnicity",x="Frequency of Students",y="Ethnicity")

###By Gender
data %>%
dplyr::count(`Gender`,`Capstone Project Completion`) %>%
group_by(`Gender`) %>%
ggplot(aes(`Gender`,n,fill=factor(`Capstone Project Completion`),label = n)) +
geom_bar(stat = "identity") +
geom_text(size = 3.5, position = position_stack(vjust = 0.5),colour = "white") +
scale_fill_manual(labels = c("No", "Yes"),values = c("0"="#E41A1C", "1"="#377EB8"),name = "Capstone Completed") +
labs(title="Barplot of Capstone Completion by Gender", y="Frequency of Students")

###By Admit Type
data %>%
dplyr::count(`Admit Type`,`Capstone Project Completion`) %>%
group_by(`Admit Type`) %>%
ggplot(aes(reorder(`Admit Type`, n),n,n,fill=factor(`Capstone Project Completion`),label = n)) +
geom_bar(stat = "identity") +
geom_text(size = 3.5, position = position_stack(vjust = 0.5),colour = "white") +
scale_fill_manual(labels = c("No", "Yes"),values = c("0"="#E41A1C", "1"="#377EB8"),name = "Capstone Completed") +
labs(title="Barplot of Capstone Completion by Admit Type", y="Frequency of Students")

### By Evaluated HS GPA
##sum(is.na(data$`Evaluated High School GPA`))
##number of na = 267
gpa_viz<-ggplot(data, aes(x=`Evaluated High School GPA`)) +
geom_histogram(color="white", fill="#377EB8")
gpa_viz + labs(title="Histogram of Honor Students GPA",
x="GPA",
y="Number of Students")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 267 rows containing non-finite values (stat_bin).

Data Analysis:
# series of chi-square tests for categorical data
# tests if capstone completion and these variables have significant association with each other
options(scipen=999)
t1 <- chisq.test(data$`Capstone Project Completion`, data$`Gender`)
## Warning in chisq.test(data$`Capstone Project Completion`, data$Gender): Chi-
## squared approximation may be incorrect
t2 <- chisq.test(data$`Capstone Project Completion`, data$`IPEDS Ethnicity`)
t3 <- chisq.test(data$`Capstone Project Completion`, data$`Low Income`)
t4 <- chisq.test(data$`Capstone Project Completion`, data$`First Generation`)
t5 <- chisq.test(data$`Capstone Project Completion`, data$`Admit Type`)
t6 <- chisq.test(data$`Capstone Project Completion`, data$`College`)
t7 <- chisq.test(data$`Capstone Project Completion`, data$`UCR Entry Action`)
t8 <- chisq.test(data$`Capstone Project Completion`, data$`Admitted Honors Cohort`)
t9 <- chisq.test(data$`Capstone Project Completion`, data$`Highlander Scholarship`)
t10 <- chisq.test(data$`Capstone Project Completion`, data$`Chancellor's Scholarship`)
t11 <- chisq.test(data$`Capstone Project Completion`, data$`Regents' Scholarship`)
t12 <- chisq.test(data$`Capstone Project Completion`, data$`Permanent.Region`)
## Warning in chisq.test(data$`Capstone Project Completion`,
## data$Permanent.Region): Chi-squared approximation may be incorrect
t13 <- chisq.test(data$`Capstone Project Completion`, data$`Permanent.County`)
## Warning in chisq.test(data$`Capstone Project Completion`,
## data$Permanent.County): Chi-squared approximation may be incorrect
tab <- map_df(list(t1,t2,t3,t4,t5,t6,t7,t8,t9,t10, t11, t12, t13), tidy)
p1 <- tab[c("p.value")]
variables <- c("Gender","IPEDS Ethnicity", "Low Income", "First Generation", "Admit Type", "College", "UCR Entry Action", "Admitted Honors Cohort", "Highlander Scholarship", "Chancellor's Scholarship", "Regents' Scholarship", "Permanent Region", "Permanent County")
conclusion <- c("not significant","not significant","not significant","significant","not significant","not significant","not significant","significant", "not significant", "significant", "significant", "not significant","significant")
cbind(variables,p1,conclusion)
## variables p.value conclusion
## 1 Gender 0.76460137472442890604896 not significant
## 2 IPEDS Ethnicity 0.07930674656709743586269 not significant
## 3 Low Income 0.28869858577139351218932 not significant
## 4 First Generation 0.00020253892438389735331 significant
## 5 Admit Type 0.57265602771752177613251 not significant
## 6 College 0.57000217789489582287388 not significant
## 7 UCR Entry Action 0.36289978937173972273200 not significant
## 8 Admitted Honors Cohort 0.00000000000000002192658 significant
## 9 Highlander Scholarship 0.10506833356879220764402 not significant
## 10 Chancellor's Scholarship 0.00003976320327142305526 significant
## 11 Regents' Scholarship 0.01081965404658243729008 significant
## 12 Permanent Region 0.42322303678668604653978 not significant
## 13 Permanent County 0.01403408682722000026810 significant
# separate students that completed capstone project and those who didn't
capstone <- data %>%
filter(`Capstone Project Completion` == 1)
no.capstone <- data %>%
filter(`Capstone Project Completion` == 0)
# capstone <- data %>%
# filter(`Capstone Project Completion` == 1) %>%
# pull(`Evaluated High School GPA`)
# no.capstone <- data %>%
# filter(`Capstone Project Completion` == 0) %>%
# pull(`Evaluated High School GPA`)
## F test
## H0: σ1 = σ2
## Ha: σ1 ≠ σ2
## two sample t test
## H0: µ1 = µ2
## Ha: µ1 ≠ µ2
# two-sample independent t-test to test if the mean is significantly different between students who completed capstone projects and those who didn’t
var.test(capstone$`Evaluated High School GPA`, no.capstone$`Evaluated High School GPA`, alternative = "two.sided") # reject H0 -> so σ1 ≠ σ2
##
## F test to compare two variances
##
## data: capstone$`Evaluated High School GPA` and no.capstone$`Evaluated High School GPA`
## F = 1.4152, num df = 350, denom df = 396, p-value = 0.0008058
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
## 1.155210 1.736054
## sample estimates:
## ratio of variances
## 1.415242
t.test(capstone$`Evaluated High School GPA`, no.capstone$`Evaluated High School GPA`, alternative = "two.sided", var.equal = FALSE) # fail to reject H0 -> average gpa of students who completed capstone project vs those who didn't are not significantly different
##
## Welch Two Sample t-test
##
## data: capstone$`Evaluated High School GPA` and no.capstone$`Evaluated High School GPA`
## t = 0.092699, df = 686.95, p-value = 0.9262
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.03700090 0.04066786
## sample estimates:
## mean of x mean of y
## 3.904302 3.902469
print("fail to reject H0 -> average gpa of students who completed capstone project vs those who didn't are not significantly different")
## [1] "fail to reject H0 -> average gpa of students who completed capstone project vs those who didn't are not significantly different"
set.seed(1)
data[sapply(data, is.character)] <- lapply(data[sapply(data, is.character)], as.factor)
sample <- sample(c(TRUE, FALSE), nrow(data), replace=TRUE, prob=c(0.7,0.3))
train <- data[sample, ]
test <- data[!sample, ]
model <- glm(`Capstone Project Completion` ~ `Admitted Honors Cohort` + `First Generation`+ `Chancellor's Scholarship` + `Regents' Scholarship` + Permanent.County, family = "binomial", data = train)
summary(model)
##
## Call:
## glm(formula = `Capstone Project Completion` ~ `Admitted Honors Cohort` +
## `First Generation` + `Chancellor's Scholarship` + `Regents' Scholarship` +
## Permanent.County, family = "binomial", data = train)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.1369 -1.0079 -0.5184 1.0706 2.0339
##
## Coefficients:
## Estimate Std. Error z value
## (Intercept) -0.7603 0.5807 -1.309
## `Admitted Honors Cohort`Incoming Transfer 0.7661 0.2791 2.745
## `Admitted Honors Cohort`Others 1.6086 0.3178 5.061
## `Admitted Honors Cohort`Second Year 0.9682 0.2460 3.936
## `Admitted Honors Cohort`Third Year 1.2981 0.2682 4.840
## `First Generation`Y -0.1890 0.1735 -1.089
## `Chancellor's Scholarship`Y 0.5940 0.3292 1.804
## `Regents' Scholarship`Y 1.6060 0.6910 2.324
## Permanent.CountyAlameda County -16.8527 1675.7452 -0.010
## Permanent.CountyContra Costa 0.3028 1.0937 0.277
## Permanent.CountyImperial -16.5849 2399.5448 -0.007
## Permanent.CountyKern -1.9286 1.2661 -1.523
## Permanent.CountyKings -17.1038 2399.5448 -0.007
## Permanent.CountyLos Angeles -0.2293 0.5828 -0.393
## Permanent.CountyLos Angeles County -0.2542 0.6954 -0.366
## Permanent.CountyMarin 16.1960 1687.5686 0.010
## Permanent.CountyMerced 0.2058 1.5725 0.131
## Permanent.CountyMonterey County -16.5849 2399.5448 -0.007
## Permanent.CountyOrange -0.2710 0.6233 -0.435
## Permanent.CountyOrange County 1.3219 1.0033 1.318
## Permanent.Countyother -0.8279 1.3648 -0.607
## Permanent.CountyOther 0.3208 1.0179 0.315
## Permanent.Countyothers -0.6167 0.9919 -0.622
## Permanent.CountyOthers -17.4143 2399.5448 -0.007
## Permanent.CountyPlacer -16.7739 2399.5448 -0.007
## Permanent.CountyRiverside -0.4331 0.5846 -0.741
## Permanent.CountyRIverside 0.9341 1.3993 0.668
## Permanent.CountyRiverside County -0.9838 0.6844 -1.437
## Permanent.CountySacramento 16.5028 1350.6475 0.012
## Permanent.CountySacramento County 15.7178 2399.5448 0.007
## Permanent.CountySan Bernadino 0.6903 0.8364 0.825
## Permanent.CountySan Bernadino County 1.3300 0.7083 1.878
## Permanent.CountySan Bernardino -0.1583 0.6041 -0.262
## Permanent.CountySan Diego -0.9900 0.6946 -1.425
## Permanent.CountySan Diego County 16.2303 961.4852 0.017
## Permanent.CountySan Francisco -1.5247 1.2963 -1.176
## Permanent.CountySan Francisco County -17.1038 2399.5448 -0.007
## Permanent.CountySan Joaquin -15.6167 2399.5448 -0.007
## Permanent.CountySan Joaquin County -15.8057 2399.5448 -0.007
## Permanent.CountySan Luis Obispo County 16.5604 2399.5448 0.007
## Permanent.CountySanta Barbara 0.2058 1.6138 0.128
## Permanent.CountySanta Barbra County 16.0283 2399.5448 0.007
## Permanent.CountySanta Clara 0.5781 0.7275 0.795
## Permanent.CountySanta Clara County -16.4128 1348.2026 -0.012
## Permanent.CountySanta Cruz 16.3582 2399.5448 0.007
## Permanent.CountyStanislaus 15.3129 2399.5448 0.006
## Permanent.CountyTulare 0.3657 1.1026 0.332
## Permanent.CountyVentura -0.7961 1.0330 -0.771
## Pr(>|z|)
## (Intercept) 0.19039
## `Admitted Honors Cohort`Incoming Transfer 0.00605 **
## `Admitted Honors Cohort`Others 0.000000416 ***
## `Admitted Honors Cohort`Second Year 0.000082968 ***
## `Admitted Honors Cohort`Third Year 0.000001299 ***
## `First Generation`Y 0.27608
## `Chancellor's Scholarship`Y 0.07117 .
## `Regents' Scholarship`Y 0.02011 *
## Permanent.CountyAlameda County 0.99198
## Permanent.CountyContra Costa 0.78191
## Permanent.CountyImperial 0.99449
## Permanent.CountyKern 0.12767
## Permanent.CountyKings 0.99431
## Permanent.CountyLos Angeles 0.69398
## Permanent.CountyLos Angeles County 0.71472
## Permanent.CountyMarin 0.99234
## Permanent.CountyMerced 0.89587
## Permanent.CountyMonterey County 0.99449
## Permanent.CountyOrange 0.66372
## Permanent.CountyOrange County 0.18765
## Permanent.Countyother 0.54412
## Permanent.CountyOther 0.75261
## Permanent.Countyothers 0.53412
## Permanent.CountyOthers 0.99421
## Permanent.CountyPlacer 0.99442
## Permanent.CountyRiverside 0.45878
## Permanent.CountyRIverside 0.50441
## Permanent.CountyRiverside County 0.15060
## Permanent.CountySacramento 0.99025
## Permanent.CountySacramento County 0.99477
## Permanent.CountySan Bernadino 0.40923
## Permanent.CountySan Bernadino County 0.06043 .
## Permanent.CountySan Bernardino 0.79332
## Permanent.CountySan Diego 0.15405
## Permanent.CountySan Diego County 0.98653
## Permanent.CountySan Francisco 0.23951
## Permanent.CountySan Francisco County 0.99431
## Permanent.CountySan Joaquin 0.99481
## Permanent.CountySan Joaquin County 0.99474
## Permanent.CountySan Luis Obispo County 0.99449
## Permanent.CountySanta Barbara 0.89852
## Permanent.CountySanta Barbra County 0.99467
## Permanent.CountySanta Clara 0.42679
## Permanent.CountySanta Clara County 0.99029
## Permanent.CountySanta Cruz 0.99456
## Permanent.CountyStanislaus 0.99491
## Permanent.CountyTulare 0.74018
## Permanent.CountyVentura 0.44090
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 962.56 on 695 degrees of freedom
## Residual deviance: 819.15 on 648 degrees of freedom
## (11 observations deleted due to missingness)
## AIC: 915.15
##
## Number of Fisher Scoring iterations: 15
model$xlevels[["Permanent.County"]] <- union(model$xlevels[["Permanent.County"]], levels(test$Permanent.County))
pR2(model)["McFadden"]
## fitting null model for pseudo-r2
## McFadden
## 0.1489914
predicted <- predict(model, test, type = "response")
## Warning in predict.lm(object, newdata, se.fit, scale = 1, type = if (type == :
## prediction from a rank-deficient fit may be misleading
predicted
## 1 2 3 4
## 0.63128364050167 0.48248863495859 0.69425253494619 0.26282203605030
## 5 6 7 8
## 0.64043091410467 0.49463573668957 0.56628569017398 0.43558712342004
## 9 10 11 12
## 0.48421377589113 0.39474643459579 0.71056140244713 0.27098236333172
## 13 14 15 16
## 0.27464712992695 0.27901468667326 0.66729500026103 0.23529298885274
## 17 18 19 20
## 0.27901468667326 0.59523595996519 0.57649971848523 0.44392002703446
## 21 22 23 24
## 0.43558712342004 0.23529298885274 0.62495359893500 0.52612888999849
## 25 26 27 28
## 0.73865192341826 0.60231806352256 0.31385582260057 0.56628569017398
## 29 30 31 32
## 0.77346671298049 0.56628569017398 0.52612888999849 0.38073388865796
## 33 34 35 36
## 0.20061242562509 0.35059588758209 0.35059588758209 0.43406079332750
## 37 38 39 40
## 0.23264175938907 0.69425253494619 0.23264175938907 0.52612888999849
## 41 42 43 44
## 0.44757465276438 0.20061242562509 0.20061242562509 0.64997809771371
## 45 46 47 48
## 0.51303521446204 0.38833434233277 0.49463573668957 0.73284268435016
## 49 50 51 52
## 0.99999976902488 0.12640114156411 0.84273213962884 0.23737764521967
## 53 54 55 56
## 0.99999994779239 0.41956151723541 0.23084662033193 0.23084662033193
## 57 58 59 60
## 0.12640114156411 0.55177599692566 0.27901468667326 0.75742394643531
## 61 62 63 64
## 0.57041512458506 0.77785210122331 0.99999994779239 0.39233998735734
## 65 66 67 68
## 0.52361440970679 0.12640114156411 0.48841869656955 0.64429783069062
## 69 70 71 72
## 0.00000005904950 0.48841869656955 0.39233998735734 0.14878737194285
## 73 74 75 76
## 0.48971753285919 0.84273213962884 0.00000017394363 0.43820020044767
## 77 78 79 80
## 0.75890470958106 0.59402596686064 0.00000008715774 0.31857304737321
## 81 82 83 84
## 0.87465728194068 0.23737764521967 0.97777932712586 0.12640114156411
## 85 86 87 88
## 0.89827818683266 0.23737764521967 0.53690312583771 0.82315010171942
## 89 90 91 92
## 0.82315010171942 0.84273213962884 0.00000002882166 0.75890470958106
## 93 94 95 96
## 0.41893718274061 0.75890470958106 0.23737764521967 0.27588332819478
## 97 98 99 100
## 0.27901468667326 0.90246915609572 0.99999979675735 0.23529298885274
## 101 102 103 104
## 0.39828618605924 0.69425253494619 0.55628938050162 0.64043091410467
## 105 106 107 108
## 0.51239232149888 0.35785160989078 0.51239232149888 0.46519695983301
## 109 110 111 112
## 0.68696234535168 0.44392002703446 0.44757465276438 0.20061242562509
## 113 114 115 116
## 0.39828618605924 0.60585596244420 0.69425253494619 0.39788515833969
## 117 118 119 120
## 0.65555317529583 0.39474643459579 0.52981548174707 0.41542722888657
## 121 122 123 124
## 0.64997809771371 0.44392002703446 0.22787147003986 0.60585596244420
## 125 126 127 128
## 0.56628569017398 0.63128364050167 0.78311979192397 0.38881987935297
## 129 130 131 132
## 0.37433506334654 0.48421377589113 0.55628938050162 0.49463573668957
## 133 134 135 136
## 0.23264175938907 0.20061242562509 0.20061242562509 0.55628938050162
## 137 138 139 140
## 0.55565635918266 0.65908802105977 0.12753500546129 0.23577661316400
## 141 142 143 144
## 0.44392002703446 0.39474643459579 0.44433321327261 0.44433321327261
## 145 146 147 148
## 0.46519695983301 0.56628569017398 0.57649971848523 0.22787147003986
## 149 150 151 152
## 0.34832522570697 0.55628938050162 0.65555317529583 0.49463573668957
## 153 154 155 156
## 0.65613398742937 0.24831412234201 0.57649971848523 0.52981548174707
## 157 158 159 160
## 0.66596253371309 0.52612888999849 0.47891096206494 0.77081223272953
## 161 162 163 164
## 0.55565635918266 0.55628938050162 0.66596253371309 0.31385582260057
## 165 166 167 168
## 0.27901468667326 0.60231806352256 0.35059588758209 0.22787147003986
## 169 170 171 172
## 0.44757465276438 0.15176872911268 0.23625769167886 0.48421377589113
## 173 174 175 176
## 0.51239232149888 0.35059588758209 0.23264175938907 0.55628938050162
## 177 178 179 180
## 0.46519695983301 0.84680966727698 0.34376160026625 0.39474643459579
## 181 182 183 184
## 0.64997809771371 NA 0.56628569017398 0.23529298885274
## 185 186 187 188
## 0.46193461459342 0.44392002703446 0.57649971848523 0.56628569017398
## 189 190 191 192
## 0.45430183258386 0.24831412234201 0.46193461459342 0.52981548174707
## 193 194 195 196
## 0.46193461459342 0.12571846397965 0.75321524991616 0.77081223272953
## 197 198 199 200
## 0.39788515833969 0.46519695983301 0.60585596244420 0.00000006389220
## 201 202 203 204
## NA 0.59374236973381 0.46519695983301 0.77081223272953
## 205 206 207 208
## 0.51239232149888 0.28524183919705 0.39828618605924 0.35059588758209
## 209 210 211 212
## 0.59374236973381 0.22787147003986 0.23264175938907 0.39474643459579
## 213 214 215 216
## 0.23264175938907 0.27098236333172 0.52612888999849 0.18155406030782
## 217 218 219 220
## 0.09237095963633 0.57649971848523 0.39185822005073 0.73284268435016
## 221 222 223 224
## 0.59374236973381 0.00000005219889 0.27464712992695 0.35059588758209
## 225 226 227 228
## 0.39788515833969 0.49463573668957 0.41542722888657 0.66596253371309
## 229 230 231 232
## 0.27098236333172 0.31857304737321 0.63128364050167 0.23529298885274
## 233 234 235 236
## 0.35445952570614 0.18155406030782 0.39788515833969 0.55177599692566
## 237 238 239 240
## 0.55628938050162 0.64043091410467 NA 0.35059588758209
## 241 242 243 244
## 0.49463573668957 0.46519695983301 0.39474643459579 0.46519695983301
## 245 246 247 248
## 0.80879666758164 0.20061242562509 0.64997809771371 0.41805320146836
## 249 250 251 252
## 0.31248851293247 0.20061242562509 0.52612888999849 0.55177599692566
## 253 254 255 256
## 0.57649971848523 0.73284268435016 0.27901468667326 0.64997809771371
## 257 258 259 260
## 0.38881987935297 0.27901468667326 0.51303521446204 0.46193461459342
## 261 262 263 264
## 0.39474643459579 0.69857181683979 0.31385582260057 0.24831412234201
## 265 266 267 268
## 0.35059588758209 0.43576454174099 0.23529298885274 0.64043091410467
## 269 270 271 272
## 0.20061242562509 0.62495359893500 0.39474643459579 0.65555317529583
## 273 274 275 276
## 0.20061242562509 NA 0.46519695983301 0.68696234535168
## 277 278 279 280
## 0.73572887517637 0.69425253494619 0.24831412234201 0.39474643459579
## 281 282 283 284
## 0.24831412234201 0.66596253371309 0.72580030241428 0.47891096206494
## 285 286 287 288
## 0.26674260171740 NA 0.88291379043469 0.44757465276438
## 289 290 291 292
## 0.57649971848523 0.00000031918926 0.35059588758209 0.24831412234201
## 293 294 295 296
## 0.23529298885274 0.23529298885274 0.76336098232387 0.14862181185923
## 297 298 299 300
## 0.39474643459579 0.60231806352256 0.35059588758209 0.23529298885274
## 301 302 303 304
## 0.27901468667326 0.55544920948967 0.50508518121070 0.64997809771371
## 305 306 307 308
## 0.35059588758209 0.23264175938907 0.28524183919705 0.52612888999849
confusionMatrix(test$`Capstone Project Completion`, predicted)
## 0 1
## 0 117 59
## 1 50 77
specificity(test$`Capstone Project Completion`, predicted)
## [1] 0.6923077
misClassError(test$`Capstone Project Completion`, predicted)
## [1] 0.3539
plotROC(test$`Capstone Project Completion`, predicted)
