Do people who live in blue voting regions have higher education levels than people who live in red voting regions? In other words, which regions have the most highly educated Americans (according to the map on https://blogs.cfainstitute.org/investor/2018/03/13/red-states-blue-states-two-economies-one-nation/)?
library(readr)
library(dplyr)
library(ggplot2)
library(tidyverse)
library(magrittr)
Education <- read_csv("C:/Users/abbys/Downloads/R12019574_SL020.csv", col_names = TRUE)
print(Education)
## # A tibble: 4 x 63
## Geo_FIPS Geo_GEOID Geo_NAME Geo_QName Geo_STUSAB Geo_SUMLEV Geo_GEOCOMP
## <dbl> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 1 02000US1 Northea~ Northeas~ us 020 00
## 2 2 02000US2 Midwest~ Midwest ~ us 020 00
## 3 3 02000US3 South R~ South Re~ us 020 00
## 4 4 02000US4 West Re~ West Reg~ us 020 00
## # ... with 56 more variables: Geo_FILEID <chr>, Geo_LOGRECNO <chr>,
## # Geo_US <lgl>, Geo_REGION <dbl>, Geo_DIVISION <lgl>, Geo_STATECE <lgl>,
## # Geo_STATE <lgl>, Geo_COUNTY <lgl>, Geo_COUSUB <lgl>, Geo_PLACE <lgl>,
## # Geo_PLACESE <lgl>, Geo_TRACT <lgl>, Geo_BLKGRP <lgl>,
## # Geo_CONCIT <lgl>, Geo_AIANHH <lgl>, Geo_AIANHHFP <lgl>,
## # Geo_AIHHTLI <lgl>, Geo_AITSCE <lgl>, Geo_AITS <lgl>, Geo_ANRC <lgl>,
## # Geo_CBSA <lgl>, Geo_CSA <lgl>, Geo_METDIV <lgl>, Geo_MACC <lgl>,
## # Geo_MEMI <lgl>, Geo_NECTA <lgl>, Geo_CNECTA <lgl>, Geo_NECTADIV <lgl>,
## # Geo_UA <lgl>, Geo_UACP <lgl>, Geo_CDCURR <lgl>, Geo_SLDU <lgl>,
## # Geo_SLDL <lgl>, Geo_VTD <lgl>, Geo_ZCTA3 <lgl>, Geo_ZCTA5 <lgl>,
## # Geo_SUBMCD <lgl>, Geo_SDELM <lgl>, Geo_SDSEC <lgl>, Geo_SDUNI <lgl>,
## # Geo_UR <lgl>, Geo_PCI <lgl>, Geo_TAZ <lgl>, Geo_UGA <lgl>,
## # Geo_BTTR <lgl>, Geo_BTBG <lgl>, Geo_PUMA5 <lgl>, Geo_PUMA1 <lgl>,
## # SE_T150_001 <dbl>, SE_T150_002 <dbl>, SE_T150_003 <dbl>,
## # SE_T150_004 <dbl>, SE_T150_005 <dbl>, SE_T150_006 <dbl>,
## # SE_T150_007 <dbl>, SE_T150_008 <dbl>
Education1<-rename(Education, "US_Region"=Geo_NAME,
"Total_Pop_25_Years_and_Over"=SE_T150_001,
"Less_than_High_School"=SE_T150_002,
"Atleast_High_School_Graduate"=SE_T150_003,
"Atleast_Some_College"=SE_T150_004,
"BA_Or_Above"=SE_T150_005,
"MA_Or_Above"=SE_T150_006,
"Professional_Degree_or_More"=SE_T150_007,
"Doctorate"=SE_T150_008)
names(Education1)
## [1] "Geo_FIPS" "Geo_GEOID"
## [3] "US_Region" "Geo_QName"
## [5] "Geo_STUSAB" "Geo_SUMLEV"
## [7] "Geo_GEOCOMP" "Geo_FILEID"
## [9] "Geo_LOGRECNO" "Geo_US"
## [11] "Geo_REGION" "Geo_DIVISION"
## [13] "Geo_STATECE" "Geo_STATE"
## [15] "Geo_COUNTY" "Geo_COUSUB"
## [17] "Geo_PLACE" "Geo_PLACESE"
## [19] "Geo_TRACT" "Geo_BLKGRP"
## [21] "Geo_CONCIT" "Geo_AIANHH"
## [23] "Geo_AIANHHFP" "Geo_AIHHTLI"
## [25] "Geo_AITSCE" "Geo_AITS"
## [27] "Geo_ANRC" "Geo_CBSA"
## [29] "Geo_CSA" "Geo_METDIV"
## [31] "Geo_MACC" "Geo_MEMI"
## [33] "Geo_NECTA" "Geo_CNECTA"
## [35] "Geo_NECTADIV" "Geo_UA"
## [37] "Geo_UACP" "Geo_CDCURR"
## [39] "Geo_SLDU" "Geo_SLDL"
## [41] "Geo_VTD" "Geo_ZCTA3"
## [43] "Geo_ZCTA5" "Geo_SUBMCD"
## [45] "Geo_SDELM" "Geo_SDSEC"
## [47] "Geo_SDUNI" "Geo_UR"
## [49] "Geo_PCI" "Geo_TAZ"
## [51] "Geo_UGA" "Geo_BTTR"
## [53] "Geo_BTBG" "Geo_PUMA5"
## [55] "Geo_PUMA1" "Total_Pop_25_Years_and_Over"
## [57] "Less_than_High_School" "Atleast_High_School_Graduate"
## [59] "Atleast_Some_College" "BA_Or_Above"
## [61] "MA_Or_Above" "Professional_Degree_or_More"
## [63] "Doctorate"
Education2=Education1%>%
mutate(DegreesAboveBA=(MA_Or_Above+Professional_Degree_or_More+Doctorate))
head(Education2)
Education3<-select(Education2,US_Region,Total_Pop_25_Years_and_Over,Less_than_High_School,Atleast_High_School_Graduate,Atleast_Some_College,BA_Or_Above,DegreesAboveBA)
print(Education3)
## # A tibble: 4 x 7
## US_Region Total_Pop_25_Ye~ Less_than_High_~ Atleast_High_Sc~
## <chr> <dbl> <dbl> <dbl>
## 1 Northeas~ 38965012 4430933 34534079
## 2 Midwest ~ 45584874 4543533 41041341
## 3 South Re~ 81181926 11332322 69849604
## 4 West Reg~ 50539832 7130326 43409506
## # ... with 3 more variables: Atleast_Some_College <dbl>,
## # BA_Or_Above <dbl>, DegreesAboveBA <dbl>
dim(Education3)
## [1] 4 7
HS<-summarize(Education3,Atleast_High_School_Graduate
=sum(as.numeric(Atleast_High_School_Graduate),na.rm = TRUE))
print(HS)
## # A tibble: 1 x 1
## Atleast_High_School_Graduate
## <dbl>
## 1 188834530
BA<-summarize(Education3,BA_Or_Above=sum(as.numeric(BA_Or_Above),na.rm = TRUE))
print(BA)
## # A tibble: 1 x 1
## BA_Or_Above
## <dbl>
## 1 66887603
MA_PHD_PROFESSIONAL<-summarize(Education3,DegreesAboveBA=sum(as.numeric(DegreesAboveBA),na.rm = TRUE))
print(MA_PHD_PROFESSIONAL)
## # A tibble: 1 x 1
## DegreesAboveBA
## <dbl>
## 1 35828661
HS>BA
## Atleast_High_School_Graduate
## [1,] TRUE
BA>MA_PHD_PROFESSIONAL
## BA_Or_Above
## [1,] TRUE
HS>BA+MA_PHD_PROFESSIONAL
## Atleast_High_School_Graduate
## [1,] TRUE
Edu <-Education %>%
select(Geo_NAME,SE_T150_001,SE_T150_002,SE_T150_003,SE_T150_004,SE_T150_005,SE_T150_006,SE_T150_007,SE_T150_008) %>%
rename(US_Region=Geo_NAME,
Total_Pop_25_Years_and_Over=SE_T150_001,
Less_than_High_School=SE_T150_002,
Atleast_High_School_Graduate=SE_T150_003,
Atleast_Some_College=SE_T150_004,
BA_Or_Above=SE_T150_005,
MA_Or_Above=SE_T150_006,
Professional_Degree_or_More=SE_T150_007,
Doctorate=SE_T150_008
) %>%
mutate(DegreesBeyondBA = MA_Or_Above + Professional_Degree_or_More+Doctorate)%>%
select(US_Region,Total_Pop_25_Years_and_Over,Less_than_High_School,Atleast_High_School_Graduate,Atleast_Some_College,BA_Or_Above,DegreesBeyondBA)%>%
print(Edu)
## # A tibble: 4 x 7
## US_Region Total_Pop_25_Ye~ Less_than_High_~ Atleast_High_Sc~
## <chr> <dbl> <dbl> <dbl>
## 1 Northeas~ 38965012 4430933 34534079
## 2 Midwest ~ 45584874 4543533 41041341
## 3 South Re~ 81181926 11332322 69849604
## 4 West Reg~ 50539832 7130326 43409506
## # ... with 3 more variables: Atleast_Some_College <dbl>,
## # BA_Or_Above <dbl>, DegreesBeyondBA <dbl>
One can see that the highest levels of education attained can be found in the Northeast, which is known to be a blue voting region. The lowest levels of education attained (Less_than_High_School) are the South (red voting region) and West regions (blue voting region). However, the West region is also shown to have the second highest attainment of education beyond a Bachelor’s degree.
ggplot(data = Edu) + geom_col(aes(x=US_Region, y= Less_than_High_School/Total_Pop_25_Years_and_Over), fill = "pink") + labs(x="Regions",y="Less_than_High_School", title="Highest Education Attainment by Region")
ggplot(data = Edu) + geom_col(aes(x=US_Region,y=Atleast_High_School_Graduate/Total_Pop_25_Years_and_Over), fill = "pink") + labs(x="Regions",y="Atleast_High_School_Graduate", title="Highest Education Attainment by Region")
ggplot(data = Edu) + geom_col(aes(x=US_Region, y= Atleast_Some_College/Total_Pop_25_Years_and_Over), fill = "pink") + labs(x="Regions",y="Atleast_Some_College", title="Highest Education Attainment by Region")
ggplot(data = Edu) + geom_col(aes(x=US_Region, y= Atleast_High_School_Graduate/Total_Pop_25_Years_and_Over), fill = "pink") + labs(x="Regions",y="Atleast_High_School_Graduate", title="Highest Education Attainment by Region")
ggplot(data = Edu) + geom_col(aes(x=US_Region, y=BA_Or_Above/Total_Pop_25_Years_and_Over), fill = "pink") + labs(x="Regions",y="BA_Or_Above", title="Highest Education Attainment by Region")
ggplot(data = Edu) + geom_col(aes(x=US_Region, y= DegreesBeyondBA/Total_Pop_25_Years_and_Over), fill = "pink") + labs(x="Regions",y="DegreesBeyondBA (MA,PHD,Professional_Degree", title="Highest Education Attainment by Region")