This code was updated in April of 2023
library(plotly)
library(ipeds)
library(tidycensus)
library(tidyverse)
library(RODBC)
library(highcharter)
library(dplyr)
library(knitr)
library(ggpubr)
library(shiny)
library(shinyWidgets)
library(ggpmisc)
library(ggplot2)
library(ggcorrplot)
library(DT)
library(leaflet)
library(leaflet.extras)
library(formattable)
library(data.table)
#Be sure to program your census api key: census_api_key(“key goes here”)
#Get IPEDS DATA
IPEDSDatabase <- odbcDriverConnect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=C:/Users/mperki17/OneDrive/Dashboard/2023 Work/IPEDS202021.accdb")
#Get Institution Information from HD Table and Reduce to Desired Variables
institutioninformation <- sqlFetch(IPEDSDatabase, "HD2020")
institutioninformation <- subset(institutioninformation, CONTROL == 1)
institutioninformation <- institutioninformation %>%
select(UNITID, COUNTYCD, STABBR, INSTNM, IALIAS, F1SYSNAM, LONGITUD,
LATITUDE, LOCALE, C18SZSET)
institutioninformation <- mutate(institutioninformation, locale = case_when(LOCALE == 11 ~ "City",
LOCALE == 12 ~ "City",
LOCALE == 13 ~ "City",
LOCALE == 21 ~ "Suburb",
LOCALE == 22 ~ "Suburb",
LOCALE == 23 ~ "Suburb",
LOCALE == 31 ~ "Town",
LOCALE == 32 ~ "Town",
LOCALE == 33 ~ "Town",
LOCALE == 41 ~ "Rural",
LOCALE == 42 ~ "Rural",
LOCALE == 43 ~ "Rural",
LOCALE == -3 ~ "Unknown"))
institutioninformation <- mutate(institutioninformation,
Type = case_when(C18SZSET == 1 ~ "Two_Year",
C18SZSET == 2 ~ "Two_Year",
C18SZSET == 3 ~ "Two_Year",
C18SZSET == 4 ~ "Two_Year",
C18SZSET == 5 ~ "Two_Year",
C18SZSET == 6 ~ "Four_Year",
C18SZSET == 7 ~ "Four_Year",
C18SZSET == 8 ~ "Four_Year",
C18SZSET == 9 ~ "Four_Year",
C18SZSET == 10 ~ "Four_Year",
C18SZSET == 11 ~ "Four_Year",
C18SZSET == 12 ~ "Four_Year",
C18SZSET == 13 ~ "Four_Year",
C18SZSET == 14 ~ "Four_Year",
C18SZSET == 15 ~ "Four_Year",
C18SZSET == 16 ~ "Four_Year",
C18SZSET == 17 ~ "Four_Year",
C18SZSET == 18 ~ "Exclusively_Grad",
C18SZSET == -2 ~ "Not_Applicable"))
institutioninformation <- subset(institutioninformation,
Type!= "Not_Applicable")
institutioninformation <- subset(institutioninformation,
Type!= "Exclusively_Grad")
#Get Institution Enrollment from EF Table and Keep all Variables
enrollmentinformationrace <- sqlFetch(IPEDSDatabase, "EF2020A")
enrollmentinformationrace <- subset(enrollmentinformationrace, EFALEVEL == 1)
enrollmentinformationrace <- enrollmentinformationrace %>%
select(UNITID, EFAIANT, EFASIAT, EFBKAAT, EFHISPT,
EFNHPIT, EFNRALT, EFUNKNT, EF2MORT, EFWHITT)
#Get Institution Enrollment from EF Table and Keep all Variables
enrollmentinformationgender <- sqlFetch(IPEDSDatabase, "EF2020")
enrollmentinformationgender <- subset(enrollmentinformationgender, EFLEVEL == 10)
#Get CC Graduation Data fro GR Table and Reduce to Desired Variables
graduationinformation <- sqlFetch(IPEDSDatabase, "GR200_20")
graduationinformation <- graduationinformation %>%
select(UNITID, L4GR100, L4GR150, L4GR200)
#Get Four Graduation Data fro GR Table and Reduce to Desired Variables
universitygrad <- sqlFetch(IPEDSDatabase, "GR200_20")
universitygrad <- universitygrad %>%
select(UNITID, BAGR100, BAGR150, BAGR200)
#Get Institution Retention Data fro GR Table and Reduce to Desired Variables
retentioninformation <- sqlFetch(IPEDSDatabase, "EF2020D")
retentioninformation <- retentioninformation %>%
select(UNITID, RET_PCF, RET_PCP)
#Get Campus Cost Data fro GR Table and Reduce to Desired Variables
costinformation <- sqlFetch(IPEDSDatabase, "DRVIC2020")
costinformation <- costinformation %>%
select(UNITID, CINSOFF, CINSON)
###Get transfer information
transferinformation <- sqlFetch(IPEDSDatabase, "DRVGR2020")
transferinformation <- transferinformation %>%
select(UNITID, TRRTTOT)
#Join to one master file for IPEDS
ipedsdashdata <- left_join(institutioninformation, enrollmentinformationgender, by = "UNITID")%>%
left_join(enrollmentinformationrace, by = "UNITID")%>%
left_join(graduationinformation, by = "UNITID")%>%
left_join(universitygrad, by = "UNITID")%>%
left_join(retentioninformation, by = "UNITID")%>%
left_join(costinformation, by = "UNITID")%>%
left_join(transferinformation, by = "UNITID")
#Rename variable
ipedsdashdata <- ipedsdashdata %>%
rename("Institution" = INSTNM)
ipedsdashdata <- ipedsdashdata %>%
rename("Alias" = IALIAS)
ipedsdashdata <- ipedsdashdata %>%
rename("Inst_System" = F1SYSNAM)
ipedsdashdata <- ipedsdashdata %>%
rename("Longitude" = LONGITUD)
ipedsdashdata <- ipedsdashdata %>%
rename("Lattitude" = LATITUDE)
ipedsdashdata <- ipedsdashdata %>%
rename("Locale" = LOCALE)
ipedsdashdata <- ipedsdashdata %>%
rename("Community_Type" = locale)
ipedsdashdata <- ipedsdashdata %>%
rename("Inst_Level" = EFLEVEL)
ipedsdashdata <- ipedsdashdata %>%
rename("Tot_Enrolled" = EFTOTAL)
ipedsdashdata <- ipedsdashdata %>%
rename("Tot_Men" = EFMEN)
ipedsdashdata <- ipedsdashdata %>%
rename("Tot_Women" = EFWOM)
ipedsdashdata <- ipedsdashdata %>%
rename("Tot_Full_Time" = EFFT)
ipedsdashdata <- ipedsdashdata %>%
rename("Tot_Full_Time_Men" = EFFTMEN)
ipedsdashdata <- ipedsdashdata %>%
rename("Tot_Full_Time_Women" = EFFTWOM)
ipedsdashdata <- ipedsdashdata %>%
rename("Tot_Part_Time" = EFPT)
ipedsdashdata <- ipedsdashdata %>%
rename("Tot_Part_Time_Men" = EFPTMEN)
ipedsdashdata <- ipedsdashdata %>%
rename("Tot_Part_Time_Women" = EFPTWOM)
ipedsdashdata <- ipedsdashdata %>%
rename("TwoYGradRate100" = L4GR100)
ipedsdashdata <- ipedsdashdata %>%
rename("TwoYGradRate150" = L4GR150)
ipedsdashdata <- ipedsdashdata %>%
rename("TwoYGradRate200" = L4GR200)
ipedsdashdata <- ipedsdashdata %>%
rename("FourYGradRate100" = BAGR100)
ipedsdashdata <- ipedsdashdata %>%
rename("FourYGradRate150" = BAGR150)
ipedsdashdata <- ipedsdashdata %>%
rename("FourYGradRate200" = BAGR200)
ipedsdashdata <- ipedsdashdata %>%
rename("FT_Retention" = RET_PCF)
ipedsdashdata <- ipedsdashdata %>%
rename("PT_Retention" = RET_PCP)
ipedsdashdata <- ipedsdashdata %>%
rename("Cost_Off_Campus" = CINSOFF)
ipedsdashdata <- ipedsdashdata %>%
rename("Cost_on_Campus" = CINSON)
ipedsdashdata <- ipedsdashdata %>%
rename("State" = STABBR)
ipedsdashdata <- ipedsdashdata %>%
rename("Am_Indian" = EFAIANT)
ipedsdashdata <- ipedsdashdata %>%
rename("Asian" = EFASIAT)
ipedsdashdata <- ipedsdashdata %>%
rename("Black" = EFBKAAT)
ipedsdashdata <- ipedsdashdata %>%
rename("Hispanic" = EFHISPT)
ipedsdashdata <- ipedsdashdata %>%
rename("Hawaii_PI" = EFNHPIT)
ipedsdashdata <- ipedsdashdata %>%
rename("Non_Resident" = EFNRALT)
ipedsdashdata <- ipedsdashdata %>%
rename("Unknown" = EFUNKNT)
ipedsdashdata <- ipedsdashdata %>%
rename("Two_or_More" = EF2MORT)
ipedsdashdata <- ipedsdashdata %>%
rename("White" = EFWHITT)
ipedsdashdata <- ipedsdashdata %>%
rename("transfer" = TRRTTOT)
# Calculate gender enrollment rates
ipedsdashdata <- ipedsdashdata %>%
mutate("Percent_Women" = (Tot_Women / Tot_Enrolled)*100)
ipedsdashdata <- ipedsdashdata %>%
mutate("Percent_FT" = (Tot_Full_Time / Tot_Enrolled)*100)
ipedsdashdata <- ipedsdashdata %>%
mutate("Percent_Am_Indian" = (Am_Indian / Tot_Enrolled)*100)
ipedsdashdata <- ipedsdashdata %>%
mutate("Percent_Asian" = (Asian / Tot_Enrolled)*100)
ipedsdashdata <- ipedsdashdata %>%
mutate("Percent_Black" = (Black / Tot_Enrolled)*100)
ipedsdashdata <- ipedsdashdata %>%
mutate("Percent_Hispanic" = (Hispanic / Tot_Enrolled)*100)
ipedsdashdata <- ipedsdashdata %>%
mutate("Percent_Hawaii_PI" = (Hawaii_PI / Tot_Enrolled)*100)
ipedsdashdata <- ipedsdashdata %>%
mutate("Percent_Non_Resident" = (Non_Resident / Tot_Enrolled)*100)
ipedsdashdata <- ipedsdashdata %>%
mutate("Percent_Unknown" = (Unknown / Tot_Enrolled)*100)
ipedsdashdata <- ipedsdashdata %>%
mutate("Percent_Two_or_More" = (Two_or_More / Tot_Enrolled)*100)
ipedsdashdata <- ipedsdashdata %>%
mutate("Percent_White" = (White / Tot_Enrolled)*100)
ipedsdashdata$COUNTYCODE <- sprintf("%05d", ipedsdashdata$COUNTYCD)
write.csv(ipedsdashdata, "ipedsdashdata.csv")
IPEDSDatabase <- odbcDriverConnect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=C:/Users/mperki17/OneDrive/Dashboard/2023 Work/IPEDS202021.accdb")
institutioninformation <- sqlFetch(IPEDSDatabase, "HD2020")
institutioninformation <- subset(institutioninformation, CONTROL == 1)
institutioninformation <- institutioninformation %>%
select(UNITID, COUNTYCD, STABBR, INSTNM, IALIAS, F1SYSNAM, LONGITUD, LATITUDE, LOCALE, C18SZSET)
institutioninformation <- mutate(institutioninformation, locale = case_when(LOCALE == 11 ~ "City",
LOCALE == 12 ~ "City",
LOCALE == 13 ~ "City",
LOCALE == 21 ~ "Suburb",
LOCALE == 22 ~ "Suburb",
LOCALE == 23 ~ "Suburb",
LOCALE == 31 ~ "Town",
LOCALE == 32 ~ "Town",
LOCALE == 33 ~ "Town",
LOCALE == 41 ~ "Rural",
LOCALE == 42 ~ "Rural",
LOCALE == 43 ~ "Rural",
LOCALE == -3 ~ "Unknown"))
institutioninformation <- mutate(institutioninformation, Type = case_when(C18SZSET == 1 ~ "Two_Year",
C18SZSET == 2 ~ "Two_Year",
C18SZSET == 3 ~ "Two_Year",
C18SZSET == 4 ~ "Two_Year",
C18SZSET == 5 ~ "Two_Year",
C18SZSET == 6 ~ "Four_Year",
C18SZSET == 7 ~ "Four_Year",
C18SZSET == 8 ~ "Four_Year",
C18SZSET == 9 ~ "Four_Year",
C18SZSET == 10 ~ "Four_Year",
C18SZSET == 11 ~ "Four_Year",
C18SZSET == 12 ~ "Four_Year",
C18SZSET == 13 ~ "Four_Year",
C18SZSET == 14 ~ "Four_Year",
C18SZSET == 15 ~ "Four_Year",
C18SZSET == 16 ~ "Four_Year",
C18SZSET == 17 ~ "Four_Year",
C18SZSET == 18 ~ "Exclusively_Grad",
C18SZSET == -2 ~ "Not_Applicable"))
institutioninformation <- institutioninformation %>%
mutate("State" = STABBR)
institutioninformation <- institutioninformation %>%
mutate("Institution" = INSTNM)
# Get 2020 Enrollment Data
enrollment20 <- sqlFetch(IPEDSDatabase, "EF2020")
enrollment20 <- subset(enrollment20, EFLEVEL == 10)
enrollment20 <- enrollment20 %>%
select(UNITID, EFTOTAL)
# Get 2019 Enrollment Data
IPEDSDatabase <- odbcDriverConnect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ= C:/Users/mperki17/Documents/IPEDS201920.accdb")
enrollment19 <- sqlFetch(IPEDSDatabase, "EF2019")
enrollment19 <- subset(enrollment19, EFLEVEL == 10)
enrollment19 <- enrollment19 %>%
select(UNITID, EFTOTAL)
## Get 2018 Enrollment Data
IPEDSDatabase <- odbcDriverConnect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ= C:/Users/mperki17/Documents/IPEDS201819.accdb")
enrollment18 <- sqlFetch(IPEDSDatabase, "EF2018")
enrollment18 <- subset(enrollment18, EFLEVEL == 10)
enrollment18 <- enrollment18 %>%
select(UNITID, EFTOTAL)
##Get 2017 Enrollment Data
IPEDSDatabase <- odbcDriverConnect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=C:/Users/mperki17/Documents/IPEDS201718.accdb")
enrollment17 <- sqlFetch(IPEDSDatabase, "EF2017")
enrollment17 <- subset(enrollment17, EFLEVEL == 10)
enrollment17 <- enrollment17 %>%
select(UNITID, EFTOTAL)
#Rename all columns
enrollment20 <- enrollment19 %>%
rename("2020" = EFTOTAL)
enrollment19 <- enrollment19 %>%
rename("2019" = EFTOTAL)
enrollment18 <- enrollment18 %>%
rename("2018" = EFTOTAL)
enrollment17 <- enrollment17 %>%
rename("2017" = EFTOTAL)
##Join all the tables
enrollmentmaster1 <- left_join(enrollment20, enrollment19, by = "UNITID")
enrollmentmaster2 <- left_join(enrollmentmaster1, enrollment18, by = "UNITID")
enrollmentmaster3 <- left_join(enrollmentmaster2, enrollment17, by = "UNITID")
#Join enrollment with institution information
EnrollmentDB <- left_join(institutioninformation, enrollmentmaster3, by = "UNITID")
EnrollmentDB <- select(EnrollmentDB, UNITID, Institution, COUNTYCD, State, locale, Type, "2017", "2018", "2019", "2020")
EnrollmentDB <- gather(EnrollmentDB, Year, Enrollment, "2017":"2020", factor_key = TRUE)
EnrollmentDB$COUNTYCODE <- sprintf("%05d", EnrollmentDB$COUNTYCD)
write.csv(EnrollmentDB, "../2023 Work/ShinyFiles/EnrollmentDB.csv")
GradRate<- select(ipedsdashdata, Institution, State, Community_Type, Type, TwoYGradRate100, TwoYGradRate150, TwoYGradRate200, FourYGradRate100, FourYGradRate150, FourYGradRate200, PT_Retention, FT_Retention)
GradRate<- GradRate %>%
rename("100% Time Two Year" = TwoYGradRate100)
GradRate<- GradRate %>%
rename("150% Time Two Year" = TwoYGradRate150)
GradRate<- GradRate %>%
rename("200% Time Two Year" = TwoYGradRate200)
GradRate<- GradRate %>%
rename("100% Time Four Year" = FourYGradRate100)
GradRate<- GradRate %>%
rename("150% Time Four Year" = FourYGradRate150)
GradRate<- GradRate %>%
rename("200% Time Four Year" = FourYGradRate200)
GradRate<- GradRate %>%
rename("Full-Time Retention" = FT_Retention)
GradRate<- GradRate %>%
rename("Part-Time Retention" = PT_Retention)
GradRate <- gather(GradRate, RateLevel, Rate, '100% Time Two Year':'Full-Time Retention', factor_key=TRUE)
write.csv(GradRate, "GradRate.csv")
write.csv(GradRate, "../2023 Work/ShinyFiles/GradRate.csv")
library(questionr)
#Capture the aligned Census data and export to an excel file
key <- load_variables(2020, "acs5", cache = TRUE)
write.csv(key, "censuskey.csv")
#Estimated median household income in past 12 months, inflation adjusted 2015
income <- get_acs(geography = "county", variables = c(MedIncome = "B19013_001"))
income <- rename.variable(income, "estimate", "Median_Household_Income")
income <- select(income, GEOID, Median_Household_Income)
#Total in Population
HealthPro <- get_acs(geography = "county", variables = c(White = "C27001A_005", Black = "C27001B_005", Native = "C27001C_005", Asian = "C27001D_005", Pacific = "C27001E_005", Other = "C27001F_005", TwoMore = "C27001G_005", WhiteNot = "C27001H_005", Hispanic = "C27001I_005"))
HealthPro <- HealthPro %>%
select(GEOID, NAME, variable, estimate)
HealthPro <- HealthPro %>%
spread(variable, estimate)
HealthPro <- HealthPro %>%
replace_na(list(White = 0, Black = 0, Native = 0, Asian = 0, Pacific = 0, Other = 0, TwoMore = 0, WhiteNot = 0, Hispanic = 0))
HealthPro <- HealthPro %>%
mutate("HPro" = White + Black + Native + Asian + Pacific + Other + TwoMore + WhiteNot + Hispanic)
#Total with health
HealthTot <- get_acs(geography = "county", variables = c(Whitet = "C27001A_007", Blackt = "C27001B_007", Nativet = "C27001C_007", Asiant = "C27001D_007", Pacifict = "C27001E_007", Othert = "C27001F_007", TwoMoret = "C27001G_007", WhiteNott = "C27001H_007", Hispanict = "C27001I_007"))
HealthTot <- HealthTot %>%
select(GEOID, NAME, variable, estimate)
HealthTot <- HealthTot %>%
spread(variable, estimate)
HealthTot <- HealthTot %>%
replace_na(list(Whitet = 0, Blackt = 0, Nativet = 0, Asiant = 0, Pacifict = 0, Othert = 0, TwoMoret = 0, WhiteNott = 0, Hispanict = 0))
HealthTot <- HealthTot %>%
mutate("HTot" = Whitet + Blackt + Nativet + Asiant + Pacifict + Othert + TwoMoret + WhiteNott + Hispanict)
#Join the health variables
Health <- left_join(HealthTot, HealthPro, by = "GEOID")
Health <- rename.variable(Health, "NAME.x", "County")
Health <- select(Health, GEOID, County, HPro, HTot)
Health <- Health%>%
mutate("NoHealth" = HTot/HPro)
Health <- Health %>%
mutate("WithHealth" = 1-NoHealth)
#Total in Population
MalePop <- get_acs(geography = "county", variables = c(WhMalePop = "C23002A_004", BMalePop = "C23002B_004", NMalePop = "C23002C_004", AMalePop = "C23002D_004", PIPop = "C23002E_004", OMalePop = "C23002F_004", TwoMalePop = "C23002G_004", WAMalePop = "C23002H_004", HMalePopl = "C23002I_004"))
MalePop <- MalePop %>%
select(GEOID, NAME, variable, estimate)
MalePop <- MalePop %>%
spread(variable, estimate)
MalePop <- MalePop %>%
replace_na(list(WhMalePop = 0, BMalePop = 0, NMalePop = 0, AMalePop = 0, PIPop = 0, OMalePop = 0, TwoMalePop = 0, WAMalePop = 0, HMalePopl = 0))
MalePop <- MalePop %>%
mutate("MPop" = WhMalePop + BMalePop + NMalePop + AMalePop + PIPop + OMalePop + TwoMalePop + WAMalePop + HMalePopl)
MalePop <- MalePop %>%
select(GEOID, NAME, MPop)
#Total Unemployed
MaleEmp <- get_acs(geography = "county", variables = c(WhMaleEmp = "C23002A_008", BMaleEmp = "C23002B_008", NMaleEmp = "C23002C_008", AMaleEmp = "C23002D_008", PIMaleEmpl = "C23002E_008", OMaleEmp = "C23002F_008", TwoMaleEmp = "C23002G_008", WAMaleEmp = "C23002H_008", HMaleEmpl = "C23002I_008"))
MaleEmp <- MaleEmp %>%
select(GEOID, NAME, variable, estimate)
MaleEmp <- MaleEmp %>%
spread(variable, estimate)
MaleEmp <- MaleEmp %>%
replace_na(list(WhMaleEmp = 0, BMaleEmp = 0, NMaleEmp = 0, AMaleEmp = 0, PMaleEmpl = 0, OMaleEmp = 0, TwoMaleEmp = 0, WAMaleEmp = 0, HMaleEmpl = 0))
MaleEmp <- MaleEmp %>%
mutate("MUnEmp" = WhMaleEmp + BMaleEmp + NMaleEmp + AMaleEmp + PIMaleEmpl + OMaleEmp + TwoMaleEmp + WAMaleEmp + HMaleEmpl)
MaleEmp <- MaleEmp %>%
select(GEOID, MUnEmp)
MaleEmp <- left_join(MalePop, MaleEmp, by = "GEOID")
MaleEmp <- MaleEmp%>%
mutate("MUnEmploy" = MUnEmp/MPop)
#Total in Population
FemalePop <- get_acs(geography = "county", variables = c(WhFemalePop = "C23002A_016", BFemalePop = "C23002B_016", NFemalePop = "C23002C_016", AFemalePop = "C23002D_016", PIPop = "C23002E_016", OFemalePop = "C23002F_016", TwoFemalePop = "C23002G_016", WAFemalePop = "C23002H_016", HFemalePopl = "C23002I_016"))
FemalePop <- FemalePop %>%
select(GEOID, NAME, variable, estimate)
FemalePop <- FemalePop %>%
spread(variable, estimate)
FemalePop <- FemalePop %>%
replace_na(list(WhFemalePop = 0, BFemalePop = 0, NFemalePop = 0, AFemalePop = 0, PIPop = 0, OFemalePop = 0, TwoFemalePop = 0, WAFemalePop = 0, HFemalePopl = 0))
FemalePop <- FemalePop %>%
mutate("FPop" = WhFemalePop + BFemalePop + NFemalePop + AFemalePop + PIPop + OFemalePop + TwoFemalePop + WAFemalePop + HFemalePopl)
FemalePop <- FemalePop %>%
select(GEOID, NAME, FPop)
#Total Unemployed
FemaleEmp <- get_acs(geography = "county", variables = c(WhFemaleEmp = "C23002A_021", BFemaleEmp = "C23002B_021", NFemaleEmp = "C23002C_021", AFemaleEmp = "C23002D_021", PIFemaleEmpl = "C23002E_021", OFemaleEmp = "C23002F_021", TwoFemaleEmp = "C23002G_021", WAFemaleEmp = "C23002H_021", HFemaleEmpl = "C23002I_021"))
FemaleEmp <- FemaleEmp %>%
select(GEOID, NAME, variable, estimate)
FemaleEmp <- FemaleEmp %>%
spread(variable, estimate)
FemaleEmp <- FemaleEmp %>%
replace_na(list(WhFemaleEmp = 0, BFemaleEmp = 0, NFemaleEmp = 0, AFemaleEmp = 0, PFemaleEmpl = 0, OFemaleEmp = 0, TwoFemaleEmp = 0, WAFemaleEmp = 0, HFemaleEmpl = 0))
FemaleEmp <- FemaleEmp %>%
mutate("FUnEmp" = WhFemaleEmp + BFemaleEmp + NFemaleEmp + AFemaleEmp + PIFemaleEmpl + OFemaleEmp + TwoFemaleEmp + WAFemaleEmp + HFemaleEmpl)
FemaleEmp <- FemaleEmp %>%
select(GEOID, FUnEmp)
FemaleEmp <- left_join(FemalePop, FemaleEmp, by = "GEOID")
FemaleEmp <- FemaleEmp%>%
mutate("FUnEmploy" = FUnEmp / FPop)
employ <- left_join(MaleEmp, FemaleEmp, by = "GEOID")
employ <- select(employ, GEOID, NAME.x, MPop, MUnEmp, FPop, FUnEmp)
employ <- rename.variable(employ, "NAME.x", "County")
employ <- employ %>%
mutate("total" = MPop+FPop)
employ <- employ %>%
mutate("popemployed" = MUnEmp+FUnEmp)
employ <- employ %>%
mutate ("County_Percent_Unemployed" = (popemployed / total)*100)
#Total in Population
RacePop <- get_acs(geography = "county", variables = c(RacePop = "B01001_001"))
RacePop <- rename.variable(RacePop, "estimate", "RacePop")
#Total White
WhiteTot <- get_acs(geography = "county", variables = c(WhiteTot = "B01001A_001"))
WhiteTot <- rename.variable(WhiteTot, "estimate", "WhiteTot")
#Join the Race/Ethnicity
TotalWhite <- left_join(RacePop, WhiteTot, by = "GEOID")
TotalWhite <- rename.variable(TotalWhite, "NAME.x", "County")
TotalWhite <- select(TotalWhite, GEOID, County, RacePop, WhiteTot)
TotalWhite <- TotalWhite%>%
mutate("County_Percent_White" = (WhiteTot / RacePop)*100)
#Total Veterans
VetPop <- get_acs(geography = "county", variables = c(VetPop = "B21001_002"))
VetPop <- rename.variable(VetPop, "estimate", "VetPop")
#Total NonVeterans
VetNon <- get_acs(geography = "county", variables = c(VetNon = "B21001_003"))
VetNon <- rename.variable(VetNon, "estimate", "VetNon")
#Join the Veterans
Veteran <- left_join(VetPop, VetNon, by = "GEOID")
Veteran <- rename.variable(Veteran, "NAME.x", "County")
Veteran <- select(Veteran, GEOID, County, VetNon, VetPop)
Veteran <- Veteran%>%
mutate("TotalPopV" = VetNon + VetPop)
Veteran <- Veteran%>%
mutate("County_Percent_Veteran" = (VetPop / TotalPopV)*100)
#Total Pop
House <- get_acs(geography = "county", variables = c(House = "B07401_001"))
House <- rename.variable(House, "estimate", "House")
House <- select(House, GEOID, House)
#Total Same House
HouseSame <- get_acs(geography = "county", variables = c(HouseSame = "B07401_017"))
HouseSame <- rename.variable(HouseSame, "estimate", "HouseSame")
HouseSame <- select(HouseSame, GEOID, HouseSame)
HousePercent <- left_join(HouseSame, House, by = "GEOID")
HousePercent <-
HousePercent%>%
mutate("County_Percent_in_Same_House" = (HouseSame / House)*100)
HousePercent <- select(HousePercent, GEOID, County_Percent_in_Same_House)
#Get Married Statuses
Married <- get_acs(geography = "county", variables = c(MarriedPop = "B07408_001", NeverMarried = "B07408_002", Married = "B07408_003", Divorced = "B07408_004", Separated = "B07408_005", Widowed = "B07408_006"))
Married <- select(Married, GEOID, variable, estimate)
#Tranverse the data with spread
Married <- Married %>%
spread(variable, estimate)
#Calculate Proportions and clean dataset
Married <- Married %>%
mutate("County_Percent_Never_Married" = (NeverMarried / MarriedPop)*100)
Married <- Married %>%
mutate("County_Percent_Married" = (Married / MarriedPop)*100)
Married <- Married %>%
mutate("County_Percent_Divorced" = (Divorced / MarriedPop)*100)
Married <- Married %>%
mutate("County_Percent_Separated" = (Separated / MarriedPop)*100)
Married <- Married %>%
mutate("County_Percent_Widowed" = (Widowed / MarriedPop)*100)
Married <- Married %>%
mutate("County_Percent_Single" = ((100 - County_Percent_Married) + (County_Percent_Widowed)))
#Clean the dataset
Married <- select(Married, GEOID, County_Percent_Never_Married, County_Percent_Married, County_Percent_Divorced, County_Percent_Separated, County_Percent_Widowed, County_Percent_Single)
Education <- get_acs(geography = "county", variables = c(EduPop = "B07409_001", LessHS = "B07409_002", HSorEquiv = "B07409_003", SomeCollegeAss = "B07409_004", Bach = "B07409_005", GradorProf = "B07409_006"))
Education <- select(Education, GEOID, variable, estimate)
#Tranverse the data with spread
Education <- Education %>%
spread(variable, estimate)
#Calculate Proportions and clean dataset
Education <- Education %>%
mutate("County_Percent_Less_than_HS" = (LessHS / EduPop)*100)
Education <- Education %>%
mutate("County_Percent_HS" = ((HSorEquiv+SomeCollegeAss+Bach+GradorProf) / EduPop)*100)
Education <- Education %>%
mutate("County_Percent_Some_or_AS" = ((SomeCollegeAss+Bach+GradorProf) / EduPop)*100)
Education <- Education %>%
mutate("County_Percent_Bach" = ((Bach+GradorProf) / EduPop)*100)
Education <- Education %>%
mutate("County_Percent_Grad_or_Pro" = (GradorProf / EduPop)*100)
Education <- select(Education, GEOID, County_Percent_Less_than_HS, County_Percent_HS, County_Percent_Some_or_AS, County_Percent_Bach, County_Percent_Grad_or_Pro)
Tribes <- get_acs(geography = "county", variables = c(pop = "B01003_001", tribes = "B02014_002"))
Tribes <- select(Tribes, GEOID, variable, estimate)
# Transpose with spread
Tribes <- Tribes %>%
spread(variable, estimate)
# Calculate the proportion
Tribes <- Tribes %>%
mutate("County_Percent_Tribe" = (tribes / pop)*100)
Tribes <- select(Tribes, GEOID, County_Percent_Tribe)
Parenting <- get_acs(geography = "county", variables = c(totkids = "B05009_001", undersixoneparent = "B05009_013", sixtoseventeeenoneparent = "B05009_031"))
Parenting <- select(Parenting, GEOID, variable, estimate)
#Tranverse the data with spread
Parenting <- Parenting %>%
spread(variable, estimate)
#Calculate proportion
Parenting <- Parenting %>%
mutate("County_Percent_Single_Parent" = ((sixtoseventeeenoneparent + undersixoneparent) / totkids)*100)
Parenting <- select(Parenting, GEOID, County_Percent_Single_Parent)
Citizen <- get_acs(geography = "county", variables = c(totalpop = "B01003_001", totalmig = "B05011_001", notcitizen = "B05011_002", naturalized = "B05011_003"))
Citizen <- select(Citizen, GEOID, variable, estimate)
#Tranverse the data with spread
Citizen <- Citizen %>%
spread(variable, estimate)
#Calculate proportion
Citizen <- Citizen %>%
mutate("County_Percent_Not_Citizen" = (notcitizen / totalpop)*100)
Citizen <- Citizen %>%
mutate("County_Percent_Imigrant" = (totalmig / totalpop)*100)
Citizen <- select(Citizen, GEOID, County_Percent_Not_Citizen, County_Percent_Imigrant)
Renters <- get_acs(geography = "county", variables = c(totalh = "B07013_001", renters = "B07013_003"))
Renters <- select(Renters, GEOID, variable, estimate)
#Transverse the data with spread
Renters <- Renters %>%
spread(variable, estimate)
#Calculate proportions
Renters <- Renters %>%
mutate("County_Percent_Rent" = (renters / totalh)*100)
Renters <- select(Renters, GEOID, County_Percent_Rent)
census <- left_join(employ, Health, by = "GEOID")%>%
rename.variable("GEOID.x", "GEOID")%>%
left_join(income, by = "GEOID")%>%
left_join(TotalWhite, by = "GEOID")%>%
left_join(Veteran, by = "GEOID")%>%
left_join(HousePercent, by = "GEOID")%>%
left_join(Married, by = "GEOID")%>%
left_join(Education, by = "GEOID")%>%
left_join(Tribes, by = "GEOID")%>%
left_join(Parenting, by = "GEOID")%>%
left_join(Citizen, by = "GEOID")%>%
left_join(Renters, by = "GEOID")%>%
rename.variable("County.x", "County")
write.csv(census, "census.csv")
###Join with IPEDS Database
ipedscensusdata <- left_join(ipedsdashdata, census, by = c("COUNTYCODE" = "GEOID"))
write.csv(ipedscensusdata, "ipedscensusdata.csv")
write.csv(ipedscensusdata, "../2023 Work/ShinyFiles/ipedscensusdata.csv")
Demograph<- ipedscensusdata%>%
select(Institution, State, Type, Percent_FT, Percent_Women,Percent_Am_Indian, Percent_Asian, Percent_Black, Percent_Hispanic, Percent_Hawaii_PI, Percent_Non_Resident, Percent_Unknown, Percent_Two_or_More, Percent_White)
Demograph <- gather(Demograph, Demographic, Percent, Percent_FT:Percent_White, factor_key=TRUE)
Demograph<- na.exclude(Demograph)
Demograph<- Demograph %>%
mutate(across(where(is.numeric), ~ round(., digits = 2)))
write.csv(Demograph, "../2023 Work/ShinyFiles/Demograph.csv")
mapdata <- select (ipedscensusdata, Institution, State, County, Type, Community_Type, Longitude, Lattitude, Tot_Enrolled, TwoYGradRate150, FourYGradRate150, Cost_Off_Campus)
write.csv(mapdata, "../2023 Work/ShinyFiles/mapdata.csv")
CIPSOC<- read.csv("CIP_SOC.csv")
CIPSOC<- CIPSOC %>%
rename("CIPCODE" = CIP2020Code)
CIPSOC<- CIPSOC %>%
rename("Degree_Title" = CIP2020Title)
CIPSOC<- CIPSOC %>%
rename("Career_Title" = SOC2018Title)
CIPSOC<- CIPSOC %>%
rename("SOCCODE" = SOC2018Code)
Wage<- read.delim("Wage.txt")
Wage<- Wage%>%
rename("SOCCODE" = OCC_CODE)
Wage<- Wage%>%
rename("Total_Employed" = TOT_EMP)
Wage<- Wage%>%
rename("Standard_Error" = EMP_PRSE)
Wage<- Wage%>%
rename("Mean_Hourly" = H_MEAN)
Wage<- Wage%>%
rename("Mean_Annual" = A_MEAN)
Wage<- Wage%>%
rename("Meand_Standard_Error" = MEAN_PRSE)
Wage<- Wage%>%
rename("Tenth_%ile_Hourly" = H_PCT10)
Wage<- Wage%>%
rename("Twenty_fifth_%ile_Hourly" = H_PCT25)
Wage<- Wage%>%
rename("Hourly_Median" = H_MEDIAN)
Wage<- Wage%>%
rename("Seventy_Fifth_%ile_Hourly" = H_PCT75)
Wage<- Wage%>%
rename("Ninetieth_%ile_Hourly" = H_PCT90)
Wage<- Wage%>%
rename("Tenth_%ile_Annual" = A_PCT10)
Wage<- Wage%>%
rename("Twenty_Fifth_%ile_Annual" = A_PCT25)
Wage<- Wage%>%
rename("Median_Annual" = A_MEDIAN)
Wage<- Wage%>%
rename("Seventy_Fifth_%ile_Annual" = A_PCT75)
Wage<- Wage%>%
rename("Ninetieth_%ile_Annual" = A_PCT90)
Wage<- left_join(CIPSOC, Wage, by ="SOCCODE")
IPEDSDatabase <- odbcDriverConnect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=C:/Users/mperki17/OneDrive/Dashboard/2023 Work/IPEDS202021.accdb")
instdegree<-sqlFetch(IPEDSDatabase, "C2020DEP")
instdegree<- instdegree %>%
select(UNITID, CIPCODE, PTOTAL)
instdegree<- instdegree %>%
rename("Total_Programs" = PTOTAL)
instname<- institutioninformation%>%
select(UNITID, INSTNM, STABBR, COUNTYCD, locale, Type)
instname<- instname %>%
rename("Institution_Name" = INSTNM)
instname<- instname %>%
rename("Select_a_State" = STABBR)
instname<- instname %>%
rename("Community_Type" = locale)
degree<- left_join(instname, instdegree, by = "UNITID")
degree<- left_join(degree, Wage, by = "CIPCODE")
Degrees_and_Jobs<- degree%>%
select(Institution_Name, Select_a_State, Degree_Title, Career_Title, CIPCODE, SOCCODE, Total_Programs, Total_Employed, Mean_Hourly, Mean_Annual, Hourly_Median, Median_Annual)
write.csv(Degrees_and_Jobs, "Degrees_and_Jobs.csv", fileEncoding = "UTF-8")
write.csv(Degrees_and_Jobs, "../2023 Work/ShinyFiles/Degrees_and_Jobs.csv", fileEncoding = "UTF-8")
CIPSOC<- read.csv("CIP_SOC.csv")
CIPSOC<- CIPSOC %>%
rename("CIPCODE" = CIP2020Code)
CIPSOC<- CIPSOC %>%
rename("Degree_Title" = CIP2020Title)
CIPSOC<- CIPSOC %>%
rename("Career_Title" = SOC2018Title)
CIPSOC<- CIPSOC %>%
rename("SOCCODE" = SOC2018Code)
projections<- read.csv("Projections.csv")
projections<-
projections%>%
rename("SOCCODE" = 'X2021.National.Employment.Matrix.code')
projections<-
projections%>%
rename("Career_Title" = 'X2021.National.Employment.Matrix.title')
projections2<- left_join(projections, CIPSOC, by = "SOCCODE")
degree2<- left_join(instname, instdegree, by = "UNITID")
Career_Projections<- left_join(degree2, projections2, by = "CIPCODE")%>%
rename("Employment_Change_Number" = Employment.change..2021.31)%>%
rename("Employment_Change_Percent" = Percent.employment.change..2021.31)%>%
rename("Average_Annual_Openings" = Occupational.openings..2021.31.annual.average)%>%
rename("Median_Annual_Wage" = Median.annual.wage..2021.1.)%>%
rename("Required_Education" = Typical.education.needed.for.entry)%>%
rename("Required_Job_Experience" = Work.experience.in.a.related.occupation)
write.csv(Career_Projections, "Career_Projections.csv", fileEncoding = "UTF-8")
write.csv(Career_Projections, "../2023 Work/ShinyFiles/Career_Projections.csv", fileEncoding = "UTF-8")
library(shinyWidgets)
Community<-
ipedscensusdata%>%
select(Institution, State, Community_Type, Type, County, FT_Retention, PT_Retention, TwoYGradRate100, TwoYGradRate150, TwoYGradRate200, FourYGradRate100, FourYGradRate150, FourYGradRate200, Cost_Off_Campus, Cost_on_Campus, Percent_Women, Percent_FT, Percent_White, Median_Household_Income, County_Percent_Veteran, County_Percent_in_Same_House, County_Percent_Never_Married, County_Percent_Married, County_Percent_Divorced, County_Percent_Separated, County_Percent_Widowed, County_Percent_Single, County_Percent_Less_than_HS, County_Percent_HS, County_Percent_Some_or_AS, County_Percent_Bach, County_Percent_Grad_or_Pro, County_Percent_Single_Parent, County_Percent_Not_Citizen, County_Percent_Imigrant, County_Percent_Rent, County_Percent_Unemployed, County_Percent_White)
Community$County<- iconv(Community$County, from = 'UTF-8', to = 'ASCII//TRANSLIT')
write.csv(Community, "../2023 Work/ShinyFiles/Community.csv", row.names = FALSE)
corrdata<-
Community %>%
select(Institution, State, Community_Type, Type, County, FT_Retention, PT_Retention, TwoYGradRate100, TwoYGradRate150, TwoYGradRate200, FourYGradRate100, FourYGradRate150, FourYGradRate200, Cost_Off_Campus, Cost_on_Campus, Percent_Women, Percent_FT, Percent_White, Median_Household_Income, County_Percent_Veteran, County_Percent_in_Same_House, County_Percent_Never_Married, County_Percent_Married, County_Percent_Divorced, County_Percent_Separated, County_Percent_Widowed, County_Percent_Single, County_Percent_Less_than_HS, County_Percent_HS, County_Percent_Some_or_AS, County_Percent_Bach, County_Percent_Grad_or_Pro, County_Percent_Single_Parent, County_Percent_Not_Citizen, County_Percent_Imigrant, County_Percent_Rent, County_Percent_Unemployed, County_Percent_White)
write.csv(corrdata, "../2023 Work/ShinyFiles/corrdata.csv")
library(tibble)
corrdata<-read.csv("../2023 Work/ShinyFiles/corrdata.csv")
corrdata<- select(corrdata, Institution, State, Community_Type, Type, County, FT_Retention, PT_Retention, TwoYGradRate100, TwoYGradRate150, TwoYGradRate200, FourYGradRate100, FourYGradRate150, FourYGradRate200, Cost_Off_Campus, Cost_on_Campus, Percent_Women, Percent_FT, Percent_White, Median_Household_Income, County_Percent_Veteran, County_Percent_in_Same_House, County_Percent_Never_Married, County_Percent_Married, County_Percent_Divorced, County_Percent_Separated, County_Percent_Widowed, County_Percent_Single, County_Percent_Less_than_HS, County_Percent_HS, County_Percent_Some_or_AS, County_Percent_Bach, County_Percent_Grad_or_Pro, County_Percent_Single_Parent, County_Percent_Not_Citizen, County_Percent_Imigrant, County_Percent_Rent, County_Percent_Unemployed, County_Percent_White)
corrmatrix <-
as.data.frame(round(cor(corrdata[sapply(corrdata, is.numeric)], use='pairwise'),2))
corrmatrix <- tibble::rownames_to_column(corrmatrix, "variable")
write.csv(corrmatrix, "../2023 Work/ShinyFiles/corrmatrix2.csv")