This code was updated in April of 2023

#Libraries

library(plotly)
library(ipeds)
library(tidycensus)
library(tidyverse)
library(RODBC)
library(highcharter)
library(dplyr)
library(knitr)
library(ggpubr)
library(shiny)
library(shinyWidgets)
library(ggpmisc)
library(ggplot2)
library(ggcorrplot)
library(DT)
library(leaflet)
library(leaflet.extras)
library(formattable)
library(data.table)

#Be sure to program your census api key: census_api_key(“key goes here”)

IPEDS Data Munge

#Get IPEDS DATA
IPEDSDatabase <- odbcDriverConnect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=C:/Users/mperki17/OneDrive/Dashboard/2023 Work/IPEDS202021.accdb")

#Get Institution Information from HD Table and Reduce to Desired Variables
institutioninformation <-  sqlFetch(IPEDSDatabase, "HD2020")

institutioninformation <-  subset(institutioninformation, CONTROL == 1)
 
institutioninformation <-  institutioninformation %>%
  select(UNITID, COUNTYCD, STABBR, INSTNM, IALIAS, F1SYSNAM, LONGITUD, 
         LATITUDE, LOCALE, C18SZSET)



institutioninformation <-  mutate(institutioninformation, locale = case_when(LOCALE == 11 ~ "City",
             LOCALE == 12 ~ "City",
             LOCALE == 13 ~ "City",
             LOCALE == 21 ~ "Suburb",
             LOCALE == 22 ~ "Suburb",
             LOCALE == 23 ~ "Suburb",
             LOCALE == 31 ~ "Town",
             LOCALE == 32 ~ "Town",
             LOCALE == 33 ~ "Town",
             LOCALE == 41 ~ "Rural",
             LOCALE == 42 ~ "Rural",
             LOCALE == 43 ~ "Rural",
             LOCALE == -3 ~ "Unknown"))

institutioninformation <-  mutate(institutioninformation, 
                                Type = case_when(C18SZSET == 1 ~ "Two_Year",
                 C18SZSET == 2 ~ "Two_Year",
                 C18SZSET == 3 ~ "Two_Year",
                 C18SZSET == 4 ~ "Two_Year",
                 C18SZSET == 5 ~ "Two_Year",
                 C18SZSET == 6 ~ "Four_Year",
                 C18SZSET == 7 ~ "Four_Year",
                 C18SZSET == 8 ~ "Four_Year",
                 C18SZSET == 9 ~ "Four_Year",
                 C18SZSET == 10 ~ "Four_Year",
                 C18SZSET == 11 ~ "Four_Year",
                 C18SZSET == 12 ~ "Four_Year",
                 C18SZSET == 13 ~ "Four_Year",
                 C18SZSET == 14 ~ "Four_Year",
                 C18SZSET == 15 ~ "Four_Year",
                 C18SZSET == 16 ~ "Four_Year",
                 C18SZSET == 17 ~ "Four_Year",
                 C18SZSET == 18 ~ "Exclusively_Grad",
                 C18SZSET == -2 ~ "Not_Applicable"))


institutioninformation <-  subset(institutioninformation, 
                                Type!= "Not_Applicable")
institutioninformation <-  subset(institutioninformation, 
                                Type!= "Exclusively_Grad")

#Get Institution Enrollment from EF Table and Keep all Variables
enrollmentinformationrace <-  sqlFetch(IPEDSDatabase, "EF2020A")
enrollmentinformationrace <-  subset(enrollmentinformationrace, EFALEVEL == 1)
enrollmentinformationrace <-  enrollmentinformationrace %>%
 select(UNITID, EFAIANT, EFASIAT, EFBKAAT, EFHISPT,
        EFNHPIT, EFNRALT, EFUNKNT, EF2MORT, EFWHITT)

#Get Institution Enrollment from EF Table and Keep all Variables
enrollmentinformationgender <-  sqlFetch(IPEDSDatabase, "EF2020")
enrollmentinformationgender <-  subset(enrollmentinformationgender, EFLEVEL == 10)

#Get CC Graduation Data fro GR Table and Reduce to Desired Variables

graduationinformation <-  sqlFetch(IPEDSDatabase, "GR200_20")
graduationinformation <-  graduationinformation %>%
 select(UNITID, L4GR100, L4GR150, L4GR200)

#Get Four Graduation Data fro GR Table and Reduce to Desired Variables
universitygrad <-  sqlFetch(IPEDSDatabase, "GR200_20")
universitygrad <-  universitygrad %>%
 select(UNITID, BAGR100, BAGR150, BAGR200)

#Get Institution Retention Data fro GR Table and Reduce to Desired Variables

retentioninformation <-  sqlFetch(IPEDSDatabase, "EF2020D")
retentioninformation <-  retentioninformation %>%
 select(UNITID, RET_PCF, RET_PCP)

#Get Campus Cost Data fro GR Table and Reduce to Desired Variables

costinformation <-  sqlFetch(IPEDSDatabase, "DRVIC2020")
costinformation <-  costinformation %>%
 select(UNITID, CINSOFF, CINSON)

###Get transfer information

transferinformation <-  sqlFetch(IPEDSDatabase, "DRVGR2020")
transferinformation <-  transferinformation %>%
 select(UNITID, TRRTTOT)


#Join to one master file for IPEDS
ipedsdashdata <-  left_join(institutioninformation, enrollmentinformationgender, by = "UNITID")%>%
left_join(enrollmentinformationrace, by = "UNITID")%>%
left_join(graduationinformation, by = "UNITID")%>%
left_join(universitygrad, by = "UNITID")%>%
left_join(retentioninformation, by = "UNITID")%>%
left_join(costinformation, by = "UNITID")%>%
left_join(transferinformation, by = "UNITID")

#Rename variable
ipedsdashdata <-  ipedsdashdata %>%
  rename("Institution" = INSTNM)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Alias" = IALIAS)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Inst_System" = F1SYSNAM)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Longitude" = LONGITUD)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Lattitude" = LATITUDE)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Locale" = LOCALE)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Community_Type" = locale)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Inst_Level" = EFLEVEL)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Tot_Enrolled" = EFTOTAL)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Tot_Men" = EFMEN)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Tot_Women" = EFWOM)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Tot_Full_Time" = EFFT)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Tot_Full_Time_Men" = EFFTMEN)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Tot_Full_Time_Women" = EFFTWOM)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Tot_Part_Time" = EFPT)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Tot_Part_Time_Men" = EFPTMEN)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Tot_Part_Time_Women" = EFPTWOM)
ipedsdashdata <-  ipedsdashdata %>%
 rename("TwoYGradRate100" = L4GR100)
ipedsdashdata <-  ipedsdashdata %>%
 rename("TwoYGradRate150" = L4GR150)
ipedsdashdata <-  ipedsdashdata %>%
 rename("TwoYGradRate200" = L4GR200)

ipedsdashdata <-  ipedsdashdata %>%
 rename("FourYGradRate100" = BAGR100)
ipedsdashdata <-  ipedsdashdata %>%
 rename("FourYGradRate150" = BAGR150)
ipedsdashdata <-  ipedsdashdata %>%
 rename("FourYGradRate200" = BAGR200)

ipedsdashdata <-  ipedsdashdata %>%
 rename("FT_Retention" = RET_PCF)
ipedsdashdata <-  ipedsdashdata %>%
 rename("PT_Retention" = RET_PCP)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Cost_Off_Campus" = CINSOFF)
ipedsdashdata <-  ipedsdashdata %>%
 rename("Cost_on_Campus" = CINSON)
ipedsdashdata <-  ipedsdashdata %>%
 rename("State" = STABBR)

ipedsdashdata <-  ipedsdashdata %>%
  rename("Am_Indian" = EFAIANT)
ipedsdashdata <-  ipedsdashdata %>%
  rename("Asian" = EFASIAT)
ipedsdashdata <-  ipedsdashdata %>%
  rename("Black" = EFBKAAT)
ipedsdashdata <-  ipedsdashdata %>%
  rename("Hispanic" = EFHISPT)
ipedsdashdata <-  ipedsdashdata %>%
  rename("Hawaii_PI" = EFNHPIT)
ipedsdashdata <-  ipedsdashdata %>%
  rename("Non_Resident" = EFNRALT)
ipedsdashdata <-  ipedsdashdata %>%
  rename("Unknown" = EFUNKNT)
ipedsdashdata <-  ipedsdashdata %>%
  rename("Two_or_More" = EF2MORT)
ipedsdashdata <-  ipedsdashdata %>%
  rename("White" = EFWHITT)
ipedsdashdata <-  ipedsdashdata %>%
  rename("transfer" = TRRTTOT)


# Calculate gender enrollment rates
ipedsdashdata <-  ipedsdashdata %>%
 mutate("Percent_Women" = (Tot_Women / Tot_Enrolled)*100)
ipedsdashdata <-  ipedsdashdata %>%
 mutate("Percent_FT" = (Tot_Full_Time / Tot_Enrolled)*100)
ipedsdashdata <-  ipedsdashdata %>%
 mutate("Percent_Am_Indian" = (Am_Indian / Tot_Enrolled)*100)
ipedsdashdata <-  ipedsdashdata %>%
 mutate("Percent_Asian" = (Asian / Tot_Enrolled)*100)
ipedsdashdata <-  ipedsdashdata %>%
 mutate("Percent_Black" = (Black / Tot_Enrolled)*100)
ipedsdashdata <-  ipedsdashdata %>%
 mutate("Percent_Hispanic" = (Hispanic / Tot_Enrolled)*100)
ipedsdashdata <-  ipedsdashdata %>%
 mutate("Percent_Hawaii_PI" = (Hawaii_PI / Tot_Enrolled)*100)
ipedsdashdata <-  ipedsdashdata %>%
 mutate("Percent_Non_Resident" = (Non_Resident / Tot_Enrolled)*100)
ipedsdashdata <-  ipedsdashdata %>%
 mutate("Percent_Unknown" = (Unknown / Tot_Enrolled)*100)
ipedsdashdata <-  ipedsdashdata %>%
 mutate("Percent_Two_or_More" = (Two_or_More / Tot_Enrolled)*100)
ipedsdashdata <-  ipedsdashdata %>%
 mutate("Percent_White" = (White / Tot_Enrolled)*100)


ipedsdashdata$COUNTYCODE  <-  sprintf("%05d", ipedsdashdata$COUNTYCD)
write.csv(ipedsdashdata, "ipedsdashdata.csv")

IPEDSDatabase <- odbcDriverConnect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=C:/Users/mperki17/OneDrive/Dashboard/2023 Work/IPEDS202021.accdb")

institutioninformation <- sqlFetch(IPEDSDatabase, "HD2020")
institutioninformation <-  subset(institutioninformation, CONTROL == 1)

institutioninformation <-  institutioninformation %>%
 select(UNITID, COUNTYCD, STABBR, INSTNM, IALIAS, F1SYSNAM, LONGITUD, LATITUDE, LOCALE, C18SZSET)


institutioninformation <-  mutate(institutioninformation, locale = case_when(LOCALE == 11 ~ "City", 
             LOCALE == 12 ~ "City", 
             LOCALE == 13 ~ "City", 
             LOCALE == 21 ~ "Suburb", 
             LOCALE == 22 ~ "Suburb", 
             LOCALE == 23 ~ "Suburb", 
             LOCALE == 31 ~ "Town", 
             LOCALE == 32 ~ "Town", 
             LOCALE == 33 ~ "Town", 
             LOCALE == 41 ~ "Rural", 
             LOCALE == 42 ~ "Rural", 
             LOCALE == 43 ~ "Rural", 
             LOCALE == -3 ~ "Unknown"))

institutioninformation <-  mutate(institutioninformation, Type = case_when(C18SZSET == 1 ~ "Two_Year", 
                 C18SZSET == 2 ~ "Two_Year", 
                 C18SZSET == 3 ~ "Two_Year", 
                 C18SZSET == 4 ~ "Two_Year", 
                 C18SZSET == 5 ~ "Two_Year", 
                 C18SZSET == 6 ~ "Four_Year", 
                 C18SZSET == 7 ~ "Four_Year", 
                 C18SZSET == 8 ~ "Four_Year", 
                 C18SZSET == 9 ~ "Four_Year", 
                 C18SZSET == 10 ~ "Four_Year", 
                 C18SZSET == 11 ~ "Four_Year", 
                 C18SZSET == 12 ~ "Four_Year", 
                 C18SZSET == 13 ~ "Four_Year", 
                 C18SZSET == 14 ~ "Four_Year", 
                 C18SZSET == 15 ~ "Four_Year", 
                 C18SZSET == 16 ~ "Four_Year", 
                 C18SZSET == 17 ~ "Four_Year", 
                 C18SZSET == 18 ~ "Exclusively_Grad", 
                 C18SZSET == -2 ~ "Not_Applicable"))

institutioninformation <-  institutioninformation %>%
 mutate("State" = STABBR)
institutioninformation <-  institutioninformation %>%
 mutate("Institution" = INSTNM)

# Get 2020 Enrollment Data
enrollment20 <-  sqlFetch(IPEDSDatabase, "EF2020")
enrollment20 <-  subset(enrollment20, EFLEVEL == 10)
enrollment20 <-  enrollment20 %>%
 select(UNITID, EFTOTAL)

# Get 2019 Enrollment Data
IPEDSDatabase <- odbcDriverConnect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ= C:/Users/mperki17/Documents/IPEDS201920.accdb")
enrollment19 <-  sqlFetch(IPEDSDatabase, "EF2019")
enrollment19 <-  subset(enrollment19, EFLEVEL == 10)
enrollment19 <-  enrollment19 %>%
 select(UNITID, EFTOTAL)

## Get 2018 Enrollment Data
IPEDSDatabase <- odbcDriverConnect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ= C:/Users/mperki17/Documents/IPEDS201819.accdb")

enrollment18 <-  sqlFetch(IPEDSDatabase, "EF2018")
enrollment18 <-  subset(enrollment18, EFLEVEL == 10)
enrollment18 <-  enrollment18 %>%
 select(UNITID, EFTOTAL)

##Get 2017 Enrollment Data
IPEDSDatabase <- odbcDriverConnect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=C:/Users/mperki17/Documents/IPEDS201718.accdb")

enrollment17 <-  sqlFetch(IPEDSDatabase, "EF2017")
enrollment17 <-  subset(enrollment17, EFLEVEL == 10)
enrollment17 <-  enrollment17 %>%
 select(UNITID, EFTOTAL)



#Rename all columns
enrollment20 <-  enrollment19 %>%
 rename("2020" = EFTOTAL)
enrollment19 <-  enrollment19 %>%
 rename("2019" = EFTOTAL)
enrollment18 <-  enrollment18 %>%
 rename("2018" = EFTOTAL)
enrollment17 <-  enrollment17 %>%
 rename("2017" = EFTOTAL)


##Join all the tables
enrollmentmaster1 <-  left_join(enrollment20, enrollment19, by = "UNITID")
enrollmentmaster2 <-  left_join(enrollmentmaster1, enrollment18, by = "UNITID")
enrollmentmaster3 <-  left_join(enrollmentmaster2, enrollment17, by = "UNITID")


#Join enrollment with institution information
EnrollmentDB <- left_join(institutioninformation, enrollmentmaster3, by = "UNITID")
EnrollmentDB <-  select(EnrollmentDB, UNITID, Institution, COUNTYCD, State, locale, Type, "2017", "2018", "2019", "2020")

EnrollmentDB  <-  gather(EnrollmentDB, Year, Enrollment, "2017":"2020", factor_key = TRUE)

EnrollmentDB$COUNTYCODE  <-  sprintf("%05d", EnrollmentDB$COUNTYCD)
write.csv(EnrollmentDB, "../2023 Work/ShinyFiles/EnrollmentDB.csv")

Graduation rate file

GradRate<- select(ipedsdashdata, Institution, State, Community_Type, Type, TwoYGradRate100, TwoYGradRate150, TwoYGradRate200, FourYGradRate100, FourYGradRate150, FourYGradRate200, PT_Retention, FT_Retention)
GradRate<- GradRate %>%
  rename("100% Time Two Year" = TwoYGradRate100)
GradRate<- GradRate %>%
  rename("150% Time Two Year" = TwoYGradRate150)
GradRate<- GradRate %>%
  rename("200% Time Two Year" = TwoYGradRate200)
GradRate<- GradRate %>%
  rename("100% Time Four Year" = FourYGradRate100)
GradRate<- GradRate %>%
  rename("150% Time Four Year" = FourYGradRate150)
GradRate<- GradRate %>%
  rename("200% Time Four Year" = FourYGradRate200)
GradRate<- GradRate %>%
  rename("Full-Time Retention" = FT_Retention)
GradRate<- GradRate %>%
  rename("Part-Time Retention" = PT_Retention)
GradRate <- gather(GradRate, RateLevel, Rate, '100% Time Two Year':'Full-Time Retention', factor_key=TRUE)

write.csv(GradRate, "GradRate.csv")
write.csv(GradRate, "../2023 Work/ShinyFiles/GradRate.csv")

Census Data Munge

library(questionr)

#Capture the aligned Census data and export to an excel file
key  <-  load_variables(2020, "acs5", cache = TRUE)

write.csv(key, "censuskey.csv")

#Estimated median household income in past 12 months, inflation adjusted 2015
income  <-  get_acs(geography = "county", variables = c(MedIncome = "B19013_001"))
income <- rename.variable(income, "estimate", "Median_Household_Income")
income <-  select(income, GEOID, Median_Household_Income)


#Total in Population

HealthPro <- get_acs(geography = "county", variables = c(White = "C27001A_005", Black = "C27001B_005", Native = "C27001C_005", Asian = "C27001D_005", Pacific = "C27001E_005", Other = "C27001F_005", TwoMore = "C27001G_005", WhiteNot = "C27001H_005", Hispanic = "C27001I_005"))
HealthPro <-  HealthPro %>%
 select(GEOID, NAME, variable, estimate)
HealthPro <-  HealthPro %>%
 spread(variable, estimate)
HealthPro <-  HealthPro %>%
 replace_na(list(White = 0, Black = 0, Native = 0, Asian = 0, Pacific = 0, Other = 0, TwoMore = 0, WhiteNot = 0, Hispanic = 0))
HealthPro <-  HealthPro %>%
 mutate("HPro" = White + Black + Native + Asian + Pacific + Other + TwoMore + WhiteNot + Hispanic)

#Total with health

HealthTot <- get_acs(geography = "county", variables = c(Whitet = "C27001A_007", Blackt = "C27001B_007", Nativet = "C27001C_007", Asiant = "C27001D_007", Pacifict = "C27001E_007", Othert = "C27001F_007", TwoMoret = "C27001G_007", WhiteNott = "C27001H_007", Hispanict = "C27001I_007"))
HealthTot <-  HealthTot %>%
 select(GEOID, NAME, variable, estimate)
HealthTot <-  HealthTot %>%
 spread(variable, estimate)
HealthTot <-  HealthTot %>%
 replace_na(list(Whitet = 0, Blackt = 0, Nativet = 0, Asiant = 0, Pacifict = 0, Othert = 0, TwoMoret = 0, WhiteNott = 0, Hispanict = 0))
HealthTot <-  HealthTot %>%
 mutate("HTot" = Whitet + Blackt + Nativet + Asiant + Pacifict + Othert + TwoMoret + WhiteNott + Hispanict)


#Join the health variables
Health <- left_join(HealthTot, HealthPro, by = "GEOID")
Health <- rename.variable(Health, "NAME.x", "County")
Health <-  select(Health, GEOID, County, HPro, HTot)
Health <-  Health%>%
 mutate("NoHealth" = HTot/HPro)
Health <-  Health %>%
 mutate("WithHealth" = 1-NoHealth)

#Total in Population
MalePop <- get_acs(geography = "county", variables = c(WhMalePop = "C23002A_004", BMalePop = "C23002B_004", NMalePop = "C23002C_004", AMalePop = "C23002D_004", PIPop = "C23002E_004", OMalePop = "C23002F_004", TwoMalePop = "C23002G_004", WAMalePop = "C23002H_004", HMalePopl = "C23002I_004"))
MalePop <-  MalePop %>%
 select(GEOID, NAME, variable, estimate)
MalePop <-  MalePop %>%
 spread(variable, estimate)
MalePop <-  MalePop %>%
 replace_na(list(WhMalePop = 0, BMalePop = 0, NMalePop = 0, AMalePop = 0, PIPop = 0, OMalePop = 0, TwoMalePop = 0, WAMalePop = 0, HMalePopl = 0))
MalePop <-  MalePop %>%
 mutate("MPop" = WhMalePop + BMalePop + NMalePop + AMalePop + PIPop + OMalePop + TwoMalePop + WAMalePop + HMalePopl)
MalePop <-  MalePop %>%
select(GEOID, NAME, MPop)


#Total Unemployed
MaleEmp <- get_acs(geography = "county", variables = c(WhMaleEmp = "C23002A_008", BMaleEmp = "C23002B_008", NMaleEmp = "C23002C_008", AMaleEmp = "C23002D_008", PIMaleEmpl = "C23002E_008", OMaleEmp = "C23002F_008", TwoMaleEmp = "C23002G_008", WAMaleEmp = "C23002H_008", HMaleEmpl = "C23002I_008"))
MaleEmp <-  MaleEmp %>%
 select(GEOID, NAME, variable, estimate)
MaleEmp <-  MaleEmp %>%
 spread(variable, estimate)
MaleEmp <-  MaleEmp %>%
 replace_na(list(WhMaleEmp = 0, BMaleEmp = 0, NMaleEmp = 0, AMaleEmp = 0, PMaleEmpl = 0, OMaleEmp = 0, TwoMaleEmp = 0, WAMaleEmp = 0, HMaleEmpl = 0))
MaleEmp <-  MaleEmp %>%
 mutate("MUnEmp" = WhMaleEmp + BMaleEmp + NMaleEmp + AMaleEmp + PIMaleEmpl + OMaleEmp + TwoMaleEmp + WAMaleEmp + HMaleEmpl)

MaleEmp <-  MaleEmp %>%
select(GEOID, MUnEmp)

MaleEmp <-  left_join(MalePop, MaleEmp, by = "GEOID")

MaleEmp <-  MaleEmp%>%
mutate("MUnEmploy" = MUnEmp/MPop)

#Total in Population
FemalePop <- get_acs(geography = "county", variables = c(WhFemalePop = "C23002A_016", BFemalePop = "C23002B_016", NFemalePop = "C23002C_016", AFemalePop = "C23002D_016", PIPop = "C23002E_016", OFemalePop = "C23002F_016", TwoFemalePop = "C23002G_016", WAFemalePop = "C23002H_016", HFemalePopl = "C23002I_016"))
FemalePop <-  FemalePop %>%
 select(GEOID, NAME, variable, estimate)
FemalePop <-  FemalePop %>%
 spread(variable, estimate)
FemalePop <-  FemalePop %>%
 replace_na(list(WhFemalePop = 0, BFemalePop = 0, NFemalePop = 0, AFemalePop = 0, PIPop = 0, OFemalePop = 0, TwoFemalePop = 0, WAFemalePop = 0, HFemalePopl = 0))
FemalePop <-  FemalePop %>%
 mutate("FPop" = WhFemalePop + BFemalePop + NFemalePop + AFemalePop + PIPop + OFemalePop + TwoFemalePop + WAFemalePop + HFemalePopl)
FemalePop <-  FemalePop %>%
select(GEOID, NAME, FPop)

#Total Unemployed
FemaleEmp <- get_acs(geography = "county", variables = c(WhFemaleEmp = "C23002A_021", BFemaleEmp = "C23002B_021", NFemaleEmp = "C23002C_021", AFemaleEmp = "C23002D_021", PIFemaleEmpl = "C23002E_021", OFemaleEmp = "C23002F_021", TwoFemaleEmp = "C23002G_021", WAFemaleEmp = "C23002H_021", HFemaleEmpl = "C23002I_021"))
FemaleEmp <-  FemaleEmp %>%
 select(GEOID, NAME, variable, estimate)
FemaleEmp <-  FemaleEmp %>%
 spread(variable, estimate)
FemaleEmp <-  FemaleEmp %>%
 replace_na(list(WhFemaleEmp = 0, BFemaleEmp = 0, NFemaleEmp = 0, AFemaleEmp = 0, PFemaleEmpl = 0, OFemaleEmp = 0, TwoFemaleEmp = 0, WAFemaleEmp = 0, HFemaleEmpl = 0))
FemaleEmp <-  FemaleEmp %>%
 mutate("FUnEmp" = WhFemaleEmp + BFemaleEmp + NFemaleEmp + AFemaleEmp + PIFemaleEmpl + OFemaleEmp + TwoFemaleEmp + WAFemaleEmp + HFemaleEmpl)

FemaleEmp <-  FemaleEmp %>%
select(GEOID, FUnEmp)

FemaleEmp <-  left_join(FemalePop, FemaleEmp, by = "GEOID")

FemaleEmp <-  FemaleEmp%>%
mutate("FUnEmploy" = FUnEmp / FPop)

employ <- left_join(MaleEmp, FemaleEmp, by = "GEOID")
employ <-  select(employ, GEOID, NAME.x, MPop, MUnEmp, FPop, FUnEmp)
employ <- rename.variable(employ, "NAME.x", "County")
employ <-  employ %>%
 mutate("total" = MPop+FPop)
employ <-  employ %>%
 mutate("popemployed" = MUnEmp+FUnEmp)
employ <-  employ %>%
 mutate ("County_Percent_Unemployed" = (popemployed / total)*100)

#Total in Population
RacePop <- get_acs(geography = "county", variables = c(RacePop = "B01001_001"))
RacePop <-  rename.variable(RacePop, "estimate", "RacePop")

#Total White
WhiteTot <- get_acs(geography = "county", variables = c(WhiteTot = "B01001A_001"))
WhiteTot <- rename.variable(WhiteTot, "estimate", "WhiteTot")

#Join the Race/Ethnicity
TotalWhite <- left_join(RacePop, WhiteTot, by = "GEOID")
TotalWhite <- rename.variable(TotalWhite, "NAME.x", "County")
TotalWhite <-  select(TotalWhite, GEOID, County, RacePop, WhiteTot)
TotalWhite <-  TotalWhite%>%
mutate("County_Percent_White" = (WhiteTot / RacePop)*100)

#Total Veterans
VetPop <- get_acs(geography = "county", variables = c(VetPop = "B21001_002"))
VetPop <-  rename.variable(VetPop, "estimate", "VetPop")

#Total NonVeterans
VetNon <- get_acs(geography = "county", variables = c(VetNon = "B21001_003"))
VetNon <- rename.variable(VetNon, "estimate", "VetNon")

#Join the Veterans
Veteran <- left_join(VetPop, VetNon, by = "GEOID")
Veteran <- rename.variable(Veteran, "NAME.x", "County")
Veteran <-  select(Veteran, GEOID, County, VetNon, VetPop)
Veteran <-  Veteran%>%
mutate("TotalPopV" = VetNon + VetPop)
Veteran <-  Veteran%>%
mutate("County_Percent_Veteran" = (VetPop / TotalPopV)*100)

#Total Pop
House <- get_acs(geography = "county", variables = c(House = "B07401_001"))
House <-  rename.variable(House, "estimate", "House")
House <-  select(House, GEOID, House)

#Total Same House
HouseSame <- get_acs(geography = "county", variables = c(HouseSame = "B07401_017"))
HouseSame <- rename.variable(HouseSame, "estimate", "HouseSame")
HouseSame <-  select(HouseSame, GEOID, HouseSame)

HousePercent <-  left_join(HouseSame, House, by = "GEOID")

HousePercent <- 
 HousePercent%>%
 mutate("County_Percent_in_Same_House" = (HouseSame / House)*100)
HousePercent <-  select(HousePercent, GEOID, County_Percent_in_Same_House)

#Get Married Statuses
Married <- get_acs(geography = "county", variables = c(MarriedPop = "B07408_001", NeverMarried = "B07408_002", Married = "B07408_003", Divorced = "B07408_004", Separated = "B07408_005", Widowed = "B07408_006"))
Married <-  select(Married, GEOID, variable, estimate)

#Tranverse the data with spread
Married <-  Married %>%
 spread(variable, estimate)

#Calculate Proportions and clean dataset
Married <-  Married %>%
 mutate("County_Percent_Never_Married" = (NeverMarried / MarriedPop)*100)

Married <-  Married %>%
 mutate("County_Percent_Married" = (Married / MarriedPop)*100)

Married <-  Married %>%
 mutate("County_Percent_Divorced" = (Divorced / MarriedPop)*100)

Married <-  Married %>%
 mutate("County_Percent_Separated" = (Separated / MarriedPop)*100)

Married <-  Married %>%
 mutate("County_Percent_Widowed" = (Widowed / MarriedPop)*100)

Married <-  Married %>%
 mutate("County_Percent_Single" = ((100 - County_Percent_Married) + (County_Percent_Widowed)))

#Clean the dataset
Married <-  select(Married, GEOID, County_Percent_Never_Married, County_Percent_Married, County_Percent_Divorced, County_Percent_Separated, County_Percent_Widowed, County_Percent_Single)

Education <- get_acs(geography = "county", variables = c(EduPop = "B07409_001", LessHS = "B07409_002", HSorEquiv = "B07409_003", SomeCollegeAss = "B07409_004", Bach = "B07409_005", GradorProf = "B07409_006"))
Education <-  select(Education, GEOID, variable, estimate)

#Tranverse the data with spread
Education <-  Education %>%
 spread(variable, estimate)

#Calculate Proportions and clean dataset
Education <-  Education %>%
 mutate("County_Percent_Less_than_HS" = (LessHS / EduPop)*100)

Education <-  Education %>%
 mutate("County_Percent_HS" = ((HSorEquiv+SomeCollegeAss+Bach+GradorProf) / EduPop)*100)

Education <-  Education %>%
 mutate("County_Percent_Some_or_AS" = ((SomeCollegeAss+Bach+GradorProf) / EduPop)*100)

Education <-  Education %>%
 mutate("County_Percent_Bach" = ((Bach+GradorProf) / EduPop)*100)

Education <-  Education %>%
 mutate("County_Percent_Grad_or_Pro" = (GradorProf / EduPop)*100)

Education <-  select(Education, GEOID, County_Percent_Less_than_HS, County_Percent_HS, County_Percent_Some_or_AS, County_Percent_Bach, County_Percent_Grad_or_Pro)


Tribes <- get_acs(geography = "county", variables = c(pop = "B01003_001", tribes = "B02014_002"))
Tribes <-  select(Tribes, GEOID, variable, estimate)

# Transpose with spread
Tribes <-  Tribes %>%
 spread(variable, estimate)

# Calculate the proportion
Tribes <-  Tribes %>%
 mutate("County_Percent_Tribe" = (tribes / pop)*100)

Tribes <-  select(Tribes, GEOID, County_Percent_Tribe)

Parenting <- get_acs(geography = "county", variables = c(totkids = "B05009_001", undersixoneparent = "B05009_013", sixtoseventeeenoneparent = "B05009_031"))
Parenting <-  select(Parenting, GEOID, variable, estimate)

#Tranverse the data with spread
Parenting <-  Parenting %>%
 spread(variable, estimate)

#Calculate proportion
Parenting <-  Parenting %>%
 mutate("County_Percent_Single_Parent" = ((sixtoseventeeenoneparent + undersixoneparent) / totkids)*100)
Parenting <-  select(Parenting, GEOID, County_Percent_Single_Parent)

Citizen <- get_acs(geography = "county", variables = c(totalpop = "B01003_001", totalmig = "B05011_001", notcitizen = "B05011_002", naturalized = "B05011_003"))
Citizen <-  select(Citizen, GEOID, variable, estimate)

#Tranverse the data with spread
Citizen <-  Citizen %>%
 spread(variable, estimate)

#Calculate proportion
Citizen <-  Citizen %>%
 mutate("County_Percent_Not_Citizen" = (notcitizen / totalpop)*100)

Citizen <-  Citizen %>%
 mutate("County_Percent_Imigrant" = (totalmig / totalpop)*100)

Citizen <-  select(Citizen, GEOID, County_Percent_Not_Citizen, County_Percent_Imigrant)

Renters <-  get_acs(geography = "county", variables = c(totalh = "B07013_001", renters = "B07013_003"))
Renters <-  select(Renters, GEOID, variable, estimate)

#Transverse the data with spread
Renters <-  Renters %>%
 spread(variable, estimate)

#Calculate proportions
Renters <-  Renters %>%
 mutate("County_Percent_Rent" = (renters / totalh)*100)

Renters <-  select(Renters, GEOID, County_Percent_Rent)

census <- left_join(employ, Health, by = "GEOID")%>%
rename.variable("GEOID.x", "GEOID")%>%
left_join(income, by = "GEOID")%>%
left_join(TotalWhite, by = "GEOID")%>%
left_join(Veteran, by = "GEOID")%>%
left_join(HousePercent, by = "GEOID")%>%
left_join(Married, by = "GEOID")%>%
left_join(Education, by = "GEOID")%>%
left_join(Tribes, by = "GEOID")%>%
left_join(Parenting, by = "GEOID")%>%
left_join(Citizen, by = "GEOID")%>%
left_join(Renters, by = "GEOID")%>%
rename.variable("County.x", "County")


write.csv(census, "census.csv")

###Join with IPEDS Database
ipedscensusdata <-  left_join(ipedsdashdata, census, by = c("COUNTYCODE" = "GEOID"))
write.csv(ipedscensusdata, "ipedscensusdata.csv")
write.csv(ipedscensusdata, "../2023 Work/ShinyFiles/ipedscensusdata.csv")

Demographics

Demograph<- ipedscensusdata%>%
  select(Institution, State, Type, Percent_FT, Percent_Women,Percent_Am_Indian, Percent_Asian, Percent_Black, Percent_Hispanic, Percent_Hawaii_PI, Percent_Non_Resident, Percent_Unknown, Percent_Two_or_More, Percent_White)

Demograph <- gather(Demograph, Demographic, Percent, Percent_FT:Percent_White, factor_key=TRUE)
Demograph<- na.exclude(Demograph)
Demograph<- Demograph %>%
   mutate(across(where(is.numeric), ~ round(., digits = 2)))

write.csv(Demograph, "../2023 Work/ShinyFiles/Demograph.csv")

mapdata <-  select (ipedscensusdata, Institution, State, County, Type, Community_Type, Longitude, Lattitude, Tot_Enrolled, TwoYGradRate150, FourYGradRate150, Cost_Off_Campus)
write.csv(mapdata, "../2023 Work/ShinyFiles/mapdata.csv")

Degrees and Jobs Data Munge

CIPSOC<- read.csv("CIP_SOC.csv")

CIPSOC<- CIPSOC %>%
  rename("CIPCODE" = CIP2020Code)
CIPSOC<- CIPSOC %>%
  rename("Degree_Title" = CIP2020Title)
CIPSOC<- CIPSOC %>%
  rename("Career_Title" = SOC2018Title)
CIPSOC<- CIPSOC %>%
  rename("SOCCODE" = SOC2018Code)


Wage<- read.delim("Wage.txt")

Wage<- Wage%>%
  rename("SOCCODE" = OCC_CODE)
Wage<- Wage%>%
  rename("Total_Employed" = TOT_EMP)
Wage<- Wage%>%
  rename("Standard_Error" = EMP_PRSE)
Wage<- Wage%>%
  rename("Mean_Hourly" = H_MEAN)
Wage<- Wage%>%
  rename("Mean_Annual" = A_MEAN)
Wage<- Wage%>%
  rename("Meand_Standard_Error" = MEAN_PRSE)
Wage<- Wage%>%
  rename("Tenth_%ile_Hourly" = H_PCT10)
Wage<- Wage%>%
  rename("Twenty_fifth_%ile_Hourly" = H_PCT25)
Wage<- Wage%>%
  rename("Hourly_Median" = H_MEDIAN)
Wage<- Wage%>%
  rename("Seventy_Fifth_%ile_Hourly" = H_PCT75)
Wage<- Wage%>%
  rename("Ninetieth_%ile_Hourly" = H_PCT90)
Wage<- Wage%>%
  rename("Tenth_%ile_Annual" = A_PCT10)
Wage<- Wage%>%
  rename("Twenty_Fifth_%ile_Annual" = A_PCT25)
Wage<- Wage%>%
  rename("Median_Annual" = A_MEDIAN)
Wage<- Wage%>%
  rename("Seventy_Fifth_%ile_Annual" = A_PCT75)
Wage<- Wage%>%
  rename("Ninetieth_%ile_Annual" = A_PCT90)

Wage<- left_join(CIPSOC, Wage, by ="SOCCODE")

IPEDSDatabase <- odbcDriverConnect("Driver={Microsoft Access Driver (*.mdb, *.accdb)};DBQ=C:/Users/mperki17/OneDrive/Dashboard/2023 Work/IPEDS202021.accdb")

instdegree<-sqlFetch(IPEDSDatabase, "C2020DEP")

instdegree<- instdegree %>%
    select(UNITID, CIPCODE, PTOTAL)

instdegree<- instdegree %>%
    rename("Total_Programs" = PTOTAL)

instname<- institutioninformation%>%
  select(UNITID, INSTNM, STABBR, COUNTYCD, locale, Type)

instname<- instname %>%
  rename("Institution_Name" = INSTNM)

instname<- instname %>%
  rename("Select_a_State" = STABBR)

instname<- instname %>%
  rename("Community_Type" = locale)

degree<- left_join(instname, instdegree, by = "UNITID")
degree<- left_join(degree, Wage, by = "CIPCODE")


Degrees_and_Jobs<- degree%>%
select(Institution_Name, Select_a_State, Degree_Title, Career_Title, CIPCODE, SOCCODE, Total_Programs, Total_Employed, Mean_Hourly, Mean_Annual, Hourly_Median,  Median_Annual)

write.csv(Degrees_and_Jobs, "Degrees_and_Jobs.csv", fileEncoding = "UTF-8")
write.csv(Degrees_and_Jobs, "../2023 Work/ShinyFiles/Degrees_and_Jobs.csv", fileEncoding = "UTF-8")

Projections Data Munge

CIPSOC<- read.csv("CIP_SOC.csv")

CIPSOC<- CIPSOC %>%
  rename("CIPCODE" = CIP2020Code)
CIPSOC<- CIPSOC %>%
  rename("Degree_Title" = CIP2020Title)
CIPSOC<- CIPSOC %>%
  rename("Career_Title" = SOC2018Title)
CIPSOC<- CIPSOC %>%
  rename("SOCCODE" = SOC2018Code)

projections<- read.csv("Projections.csv")
projections<-
            projections%>%
            rename("SOCCODE" = 'X2021.National.Employment.Matrix.code')
projections<-
            projections%>%
            rename("Career_Title" = 'X2021.National.Employment.Matrix.title')
projections2<- left_join(projections, CIPSOC, by = "SOCCODE")
degree2<- left_join(instname, instdegree, by = "UNITID")
Career_Projections<- left_join(degree2, projections2, by = "CIPCODE")%>%
  rename("Employment_Change_Number" = Employment.change..2021.31)%>%
  rename("Employment_Change_Percent" = Percent.employment.change..2021.31)%>%
  rename("Average_Annual_Openings" = Occupational.openings..2021.31.annual.average)%>%
  rename("Median_Annual_Wage" = Median.annual.wage..2021.1.)%>%
  rename("Required_Education" = Typical.education.needed.for.entry)%>%
  rename("Required_Job_Experience" = Work.experience.in.a.related.occupation)

write.csv(Career_Projections, "Career_Projections.csv", fileEncoding = "UTF-8")
write.csv(Career_Projections, "../2023 Work/ShinyFiles/Career_Projections.csv", fileEncoding = "UTF-8")

Community Data for Correlations

library(shinyWidgets)
Community<- 
ipedscensusdata%>%
select(Institution, State, Community_Type, Type, County, FT_Retention, PT_Retention, TwoYGradRate100, TwoYGradRate150, TwoYGradRate200, FourYGradRate100, FourYGradRate150, FourYGradRate200, Cost_Off_Campus, Cost_on_Campus, Percent_Women, Percent_FT, Percent_White, Median_Household_Income, County_Percent_Veteran, County_Percent_in_Same_House, County_Percent_Never_Married, County_Percent_Married, County_Percent_Divorced, County_Percent_Separated, County_Percent_Widowed, County_Percent_Single, County_Percent_Less_than_HS, County_Percent_HS, County_Percent_Some_or_AS, County_Percent_Bach, County_Percent_Grad_or_Pro, County_Percent_Single_Parent, County_Percent_Not_Citizen, County_Percent_Imigrant, County_Percent_Rent, County_Percent_Unemployed, County_Percent_White)

Community$County<- iconv(Community$County, from = 'UTF-8', to = 'ASCII//TRANSLIT')


write.csv(Community, "../2023 Work/ShinyFiles/Community.csv", row.names = FALSE)

corrdata<- 
  Community %>%
  select(Institution, State, Community_Type, Type, County, FT_Retention, PT_Retention, TwoYGradRate100, TwoYGradRate150, TwoYGradRate200, FourYGradRate100, FourYGradRate150, FourYGradRate200, Cost_Off_Campus, Cost_on_Campus, Percent_Women, Percent_FT, Percent_White, Median_Household_Income, County_Percent_Veteran, County_Percent_in_Same_House, County_Percent_Never_Married, County_Percent_Married, County_Percent_Divorced, County_Percent_Separated, County_Percent_Widowed, County_Percent_Single, County_Percent_Less_than_HS, County_Percent_HS, County_Percent_Some_or_AS, County_Percent_Bach, County_Percent_Grad_or_Pro, County_Percent_Single_Parent, County_Percent_Not_Citizen, County_Percent_Imigrant, County_Percent_Rent, County_Percent_Unemployed, County_Percent_White)

write.csv(corrdata, "../2023 Work/ShinyFiles/corrdata.csv")

Second Correlation Matrix

library(tibble)

corrdata<-read.csv("../2023 Work/ShinyFiles/corrdata.csv")
corrdata<- select(corrdata, Institution, State, Community_Type, Type, County, FT_Retention, PT_Retention, TwoYGradRate100, TwoYGradRate150, TwoYGradRate200, FourYGradRate100, FourYGradRate150, FourYGradRate200, Cost_Off_Campus, Cost_on_Campus, Percent_Women, Percent_FT, Percent_White, Median_Household_Income, County_Percent_Veteran, County_Percent_in_Same_House, County_Percent_Never_Married, County_Percent_Married, County_Percent_Divorced, County_Percent_Separated, County_Percent_Widowed, County_Percent_Single, County_Percent_Less_than_HS, County_Percent_HS, County_Percent_Some_or_AS, County_Percent_Bach, County_Percent_Grad_or_Pro, County_Percent_Single_Parent, County_Percent_Not_Citizen, County_Percent_Imigrant, County_Percent_Rent, County_Percent_Unemployed, County_Percent_White)

corrmatrix <- 
as.data.frame(round(cor(corrdata[sapply(corrdata, is.numeric)], use='pairwise'),2))
corrmatrix <- tibble::rownames_to_column(corrmatrix, "variable")

write.csv(corrmatrix, "../2023 Work/ShinyFiles/corrmatrix2.csv")

College and Community Information data Munge

Mark Perkins, Ph.D.

2023

#Libraries

IPEDS Data Munge

Graduation rate file

Census Data Munge

Demographics

Degrees and Jobs Data Munge

Projections Data Munge

Community Data for Correlations

Second Correlation Matrix