Data Preparation

library(dplyr)
library(tidyr)
library(stringr)
library(pastecs)
library(ggplot2)
#Remove scientific notation for the plots
options(scipen=1000000)

#Data files are stored on my GitHub for the following URLs. 
#I had to pull them by separate years for every year they had available for analysis
college_rates_1992 <- read.csv("https://raw.githubusercontent.com/smithchad17/Class606/master/Project/HS%20to%20College%20rates/1992.txt", header = T, stringsAsFactors = F, sep = "\t", row.names = NULL)

college_rates_1994 <- read.csv("https://raw.githubusercontent.com/smithchad17/Class606/master/Project/HS%20to%20College%20rates/1994.txt", header = T, stringsAsFactors = F, sep = "\t", row.names = NULL)

college_rates_1996 <- read.csv("https://raw.githubusercontent.com/smithchad17/Class606/master/Project/HS%20to%20College%20rates/1996.txt", header = T, stringsAsFactors = F, sep = "\t", row.names = NULL)

college_rates_1998 <- read.csv("https://raw.githubusercontent.com/smithchad17/Class606/master/Project/HS%20to%20College%20rates/1998.txt", header = T, stringsAsFactors = F, sep = "\t", row.names = NULL)

college_rates_2000 <- read.csv("https://raw.githubusercontent.com/smithchad17/Class606/master/Project/HS%20to%20College%20rates/2000.txt", header = T, stringsAsFactors = F, sep = "\t", row.names = NULL)

college_rates_2002 <- read.csv("https://raw.githubusercontent.com/smithchad17/Class606/master/Project/HS%20to%20College%20rates/2002.txt", header = T, stringsAsFactors = F, sep = "\t", row.names = NULL)

college_rates_2004 <- read.csv("https://raw.githubusercontent.com/smithchad17/Class606/master/Project/HS%20to%20College%20rates/2004.txt", header = T, stringsAsFactors = F, sep = "\t", row.names = NULL)

college_rates_2006 <- read.csv("https://raw.githubusercontent.com/smithchad17/Class606/master/Project/HS%20to%20College%20rates/2006.txt", header = T, stringsAsFactors = F, sep = "\t", row.names = NULL)

college_rates_2008 <- read.csv("https://raw.githubusercontent.com/smithchad17/Class606/master/Project/HS%20to%20College%20rates/2008.txt", header = T, stringsAsFactors = F, sep = "\t", row.names = NULL)

college_rates_2010 <- read.csv("https://raw.githubusercontent.com/smithchad17/Class606/master/Project/HS%20to%20College%20rates/2010.txt", header = T, stringsAsFactors = F, sep = "\t", row.names = NULL)

college_rates_2012 <- read.csv("https://raw.githubusercontent.com/smithchad17/Class606/master/Project/HS%20to%20College%20rates/2012.txt", header = T, stringsAsFactors = F, sep = "\t", row.names = NULL)

college_rates_2014 <- read.csv("https://raw.githubusercontent.com/smithchad17/Class606/master/Project/HS%20to%20College%20rates/2014.txt", header = T, stringsAsFactors = F, sep = "\t", row.names = NULL)

median_income_by_state <- read.csv("https://raw.githubusercontent.com/smithchad17/Class606/master/Project/h08.xls%20-%20h08.csv", stringsAsFactors = F)

#Put data frames in a list so we can loop through them and tidy the data sets.
college_rates <- list(college_rates_1992, college_rates_1994, college_rates_1996, college_rates_1998,
          college_rates_2000, college_rates_2002, college_rates_2004, college_rates_2006,
          college_rates_2008, college_rates_2010, college_rates_2012, college_rates_2014)

#Since it was tab delimited, there were some left-over headers that started with 'X' to remove.
#Some data sets have a column with changes from the previous year that started with 'C'.
#Added a 'year' column so we could merge with other data sets later
clean_columns <- function(x){
  j <- 1
  year <- 1992
  mylist <- list()
  for(i in x){
      i <- i %>%
        select(-starts_with("X")) %>%
        select(-starts_with("C")) 
      i["year"] <- year
      colnames(i) <- c("State", "grads_to_college_percentage", "hs_grads", "grads_enrolled_in_college", "year")
      mylist[[j]] <- i
      j <- j + 1
      year <- year + 2
  }
  return(mylist)
}

college_rates <- clean_columns(college_rates)

#Join data frames vertically for a long format
num <- length(college_rates)
cr_bind <- rbind(college_rates[[1]], college_rates[[2]])
i <- 3
while(i <= num){
  cr_bind <- rbind(cr_bind, college_rates[[i]])
  i <- i + 1
}

#Each data set had a 'Nation' row with the averages. I removed that because I was going to analyze it myself.
cr_bind <- filter(cr_bind, State != "Nation")

#Tidy median_income_by_state data frame
#Remove the rows we don't need
#Keep the rows that use Current Dollars and remove the District of Columbia since
#it's not in the college_rate data frames
median_income_by_state <- median_income_by_state[c(4:57), ]
median_income_by_state <- median_income_by_state[-c(2,3), ]
median_income_by_state <- median_income_by_state[-c(10), ]

#Find empty column headers and remove those columns
#Those columns held the standard deviation and mean that we didn't need.
i <- 1
cv <- c()
while(i <= dim(median_income_by_state)[2]){
  if(median_income_by_state[1,i] == ""){cv <- c(cv, i)}
  i <- i + 1
}
median_income_by_state <- median_income_by_state[,-c(cv)]

#Clean up column names and remove columns we don't need (Odd numbered years & years before 1992).
#We needed the years to match the other data sets for comparison

median_income_by_state <- median_income_by_state[,-c(28:35)]

v <- c(2,3,5,6,8,10,12,14,16,18,20,22,24,26)
median_income_by_state <- median_income_by_state[,-c(v)]
median_income_by_state <- median_income_by_state[-c(1),]

#Cleaned up column names
colnames(median_income_by_state) <- c("State", "2014", "2012", "2010", "2008", 
                                      "2006", "2004", "2002", "2000", "1998", "1996", "1994", "1992")

#Convert from wide to long format
median_income_by_state <- gather(median_income_by_state, year, median_income, "2014":"1992")

#Converted to numeric type
median_income_by_state$year <- as.numeric(median_income_by_state$year)


#removed commas before converting the column to numeric
median_income_by_state$median_income <- gsub(",", "", median_income_by_state$median_income)
median_income_by_state$median_income <- as.numeric(median_income_by_state$median_income)

Research question

Does the median family income determine college enrollment after high school graduation?

Cases

Each case represents a state for that year. These data sets contain all 50 states over 12 different years, totaling 600 observations

Data collection

Data from the median family income was collected by the United States Census Bureau. Data about the college-going rates of high school graduates were collected from The National Center for Higher Education Management Systems(NCHEMS). They collected from the sources of the National Center for Education Statistics, the Integrated Postsecondary Education Data System Enrollment Survey, and the Western Interstate Commission for Higher Education.

Type of study

This is an observational study

Data Source

Data is collected by the US Census Bureau and is available online at Table H-8 here: https://www.census.gov/data/tables/time-series/demo/income-poverty/historical-income-households.html.

College-going rates of high school graduates is collected by NCHEMS and is available online here: http://www.higheredinfo.org/dbrowser/?year=2014&level=nation&mode=data&state=0&submeasure=63.

Response

The response variable is the college-going rate of high school graduates and is numerical.

Explanatory

The explanatory variable is the median family income and is numerical.

Relevant summary statistics

Summary statistics of college-going high school grads grouped by state across all years

#Summary statistics for college-going high-school grads for each state across all years.
by(cr_bind$grads_to_college_percentage, cr_bind$State, function(x)
  round(stat.desc(x), 1))
## cr_bind$State: Alabama
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         55.2         66.7 
##        range          sum       median         mean      SE.mean 
##         11.5        726.0         60.2         60.5          1.0 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.1         11.4          3.4          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Alaska
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         37.2         47.4 
##        range          sum       median         mean      SE.mean 
##         10.2        519.0         44.6         43.2          1.0 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.3         13.0          3.6          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Arizona
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         44.8         57.9 
##        range          sum       median         mean      SE.mean 
##         13.1        590.3         47.8         49.2          1.1 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.5         15.6          4.0          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Arkansas
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         45.7         67.8 
##        range          sum       median         mean      SE.mean 
##         22.1        680.9         56.2         56.7          2.0 
## CI.mean.0.95          var      std.dev     coef.var 
##          4.4         47.6          6.9          0.1 
## -------------------------------------------------------- 
## cr_bind$State: California
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         43.7         66.4 
##        range          sum       median         mean      SE.mean 
##         22.7        674.6         57.1         56.2          2.1 
## CI.mean.0.95          var      std.dev     coef.var 
##          4.6         51.7          7.2          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Colorado
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         51.2         62.9 
##        range          sum       median         mean      SE.mean 
##         11.7        682.0         57.5         56.8          1.2 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.7         17.8          4.2          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Connecticut
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         57.1         78.7 
##        range          sum       median         mean      SE.mean 
##         21.6        788.0         63.6         65.7          1.9 
## CI.mean.0.95          var      std.dev     coef.var 
##          4.3         45.1          6.7          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Delaware
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         40.8         66.5 
##        range          sum       median         mean      SE.mean 
##         25.7        712.5         62.7         59.4          2.3 
## CI.mean.0.95          var      std.dev     coef.var 
##          5.2         66.0          8.1          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Florida
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         45.4         64.1 
##        range          sum       median         mean      SE.mean 
##         18.7        670.4         56.5         55.9          1.8 
## CI.mean.0.95          var      std.dev     coef.var 
##          4.0         39.8          6.3          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Georgia
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         55.1         69.6 
##        range          sum       median         mean      SE.mean 
##         14.5        751.0         62.0         62.6          1.4 
## CI.mean.0.95          var      std.dev     coef.var 
##          3.1         24.5          5.0          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Hawaii
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         49.8         64.2 
##        range          sum       median         mean      SE.mean 
##         14.4        711.3         60.3         59.3          1.3 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.9         20.7          4.5          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Idaho
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         43.7         49.3 
##        range          sum       median         mean      SE.mean 
##          5.6        560.5         46.8         46.7          0.6 
## CI.mean.0.95          var      std.dev     coef.var 
##          1.2          3.6          1.9          0.0 
## -------------------------------------------------------- 
## cr_bind$State: Illinois
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         55.2         63.9 
##        range          sum       median         mean      SE.mean 
##          8.7        723.1         60.4         60.3          0.8 
## CI.mean.0.95          var      std.dev     coef.var 
##          1.7          7.3          2.7          0.0 
## -------------------------------------------------------- 
## cr_bind$State: Indiana
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         50.5         65.8 
##        range          sum       median         mean      SE.mean 
##         15.3        724.2         60.9         60.4          1.3 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.8         19.2          4.4          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Iowa
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         60.2         67.2 
##        range          sum       median         mean      SE.mean 
##          7.0        766.1         64.2         63.8          0.7 
## CI.mean.0.95          var      std.dev     coef.var 
##          1.5          5.6          2.4          0.0 
## -------------------------------------------------------- 
## cr_bind$State: Kansas
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         54.6         67.5 
##        range          sum       median         mean      SE.mean 
##         12.9        746.7         63.2         62.2          1.2 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.6         16.9          4.1          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Kentucky
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         48.9         63.6 
##        range          sum       median         mean      SE.mean 
##         14.7        696.2         59.8         58.0          1.5 
## CI.mean.0.95          var      std.dev     coef.var 
##          3.4         28.6          5.3          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Louisiana
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         53.4         69.5 
##        range          sum       median         mean      SE.mean 
##         16.1        727.0         61.1         60.6          1.6 
## CI.mean.0.95          var      std.dev     coef.var 
##          3.6         32.5          5.7          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Maine
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         48.6         64.6 
##        range          sum       median         mean      SE.mean 
##         16.0        654.1         54.1         54.5          1.3 
## CI.mean.0.95          var      std.dev     coef.var 
##          3.0         21.7          4.7          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Maryland
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         54.7         65.6 
##        range          sum       median         mean      SE.mean 
##         10.9        713.7         58.5         59.5          1.1 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.4         14.5          3.8          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Massachusetts
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         59.0         74.6 
##        range          sum       median         mean      SE.mean 
##         15.6        829.8         71.2         69.2          1.4 
## CI.mean.0.95          var      std.dev     coef.var 
##          3.1         23.8          4.9          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Michigan
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         51.1         65.2 
##        range          sum       median         mean      SE.mean 
##         14.1        716.6         59.3         59.7          1.1 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.3         13.6          3.7          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Minnesota
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         49.1         70.9 
##        range          sum       median         mean      SE.mean 
##         21.8        749.5         64.6         62.5          2.3 
## CI.mean.0.95          var      std.dev     coef.var 
##          5.1         63.5          8.0          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Mississippi
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         59.7         79.5 
##        range          sum       median         mean      SE.mean 
##         19.8        826.1         66.8         68.8          2.2 
## CI.mean.0.95          var      std.dev     coef.var 
##          4.8         57.6          7.6          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Missouri
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         48.7         62.4 
##        range          sum       median         mean      SE.mean 
##         13.7        666.9         54.0         55.6          1.4 
## CI.mean.0.95          var      std.dev     coef.var 
##          3.1         23.8          4.9          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Montana
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         50.8         60.9 
##        range          sum       median         mean      SE.mean 
##         10.1        676.0         56.6         56.3          0.9 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.1         10.8          3.3          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Nebraska
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         58.8         69.5 
##        range          sum       median         mean      SE.mean 
##         10.7        751.6         62.1         62.6          0.9 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.0         10.0          3.2          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Nevada
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         32.8         55.6 
##        range          sum       median         mean      SE.mean 
##         22.8        548.9         46.1         45.7          2.5 
## CI.mean.0.95          var      std.dev     coef.var 
##          5.4         73.3          8.6          0.2 
## -------------------------------------------------------- 
## cr_bind$State: New Hampshire
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         55.4         64.9 
##        range          sum       median         mean      SE.mean 
##          9.5        716.0         59.2         59.7          1.0 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.3         12.6          3.6          0.1 
## -------------------------------------------------------- 
## cr_bind$State: New Jersey
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         60.9         71.1 
##        range          sum       median         mean      SE.mean 
##         10.2        795.8         66.8         66.3          1.0 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.2         11.5          3.4          0.1 
## -------------------------------------------------------- 
## cr_bind$State: New Mexico
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         50.1         72.4 
##        range          sum       median         mean      SE.mean 
##         22.3        745.3         62.3         62.1          2.0 
## CI.mean.0.95          var      std.dev     coef.var 
##          4.5         49.5          7.0          0.1 
## -------------------------------------------------------- 
## cr_bind$State: New York
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         63.9         74.4 
##        range          sum       median         mean      SE.mean 
##         10.5        835.0         69.2         69.6          0.8 
## CI.mean.0.95          var      std.dev     coef.var 
##          1.9          8.5          2.9          0.0 
## -------------------------------------------------------- 
## cr_bind$State: North Carolina
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         50.0         66.0 
##        range          sum       median         mean      SE.mean 
##         16.0        733.2         63.8         61.1          1.7 
## CI.mean.0.95          var      std.dev     coef.var 
##          3.7         34.8          5.9          0.1 
## -------------------------------------------------------- 
## cr_bind$State: North Dakota
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         65.4         73.7 
##        range          sum       median         mean      SE.mean 
##          8.3        823.8         67.9         68.7          0.8 
## CI.mean.0.95          var      std.dev     coef.var 
##          1.7          6.9          2.6          0.0 
## -------------------------------------------------------- 
## cr_bind$State: Ohio
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         50.3         62.7 
##        range          sum       median         mean      SE.mean 
##         12.4        684.7         56.8         57.1          1.3 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.8         18.9          4.3          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Oklahoma
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         47.6         63.0 
##        range          sum       median         mean      SE.mean 
##         15.4        649.5         52.0         54.1          1.5 
## CI.mean.0.95          var      std.dev     coef.var 
##          3.3         26.3          5.1          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Oregon
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         45.5         57.4 
##        range          sum       median         mean      SE.mean 
##         11.9        592.2         47.6         49.4          1.1 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.3         13.4          3.7          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Pennsylvania
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         53.8         63.9 
##        range          sum       median         mean      SE.mean 
##         10.1        717.4         60.3         59.8          0.8 
## CI.mean.0.95          var      std.dev     coef.var 
##          1.8          8.3          2.9          0.0 
## -------------------------------------------------------- 
## cr_bind$State: Rhode Island
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         53.4         67.8 
##        range          sum       median         mean      SE.mean 
##         14.4        750.3         65.2         62.5          1.5 
## CI.mean.0.95          var      std.dev     coef.var 
##          3.3         26.4          5.1          0.1 
## -------------------------------------------------------- 
## cr_bind$State: South Carolina
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         43.4         70.1 
##        range          sum       median         mean      SE.mean 
##         26.7        751.0         64.8         62.6          2.1 
## CI.mean.0.95          var      std.dev     coef.var 
##          4.6         52.0          7.2          0.1 
## -------------------------------------------------------- 
## cr_bind$State: South Dakota
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         49.9         72.1 
##        range          sum       median         mean      SE.mean 
##         22.2        760.4         65.3         63.4          2.4 
## CI.mean.0.95          var      std.dev     coef.var 
##          5.2         67.1          8.2          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Tennessee
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         46.7         64.5 
##        range          sum       median         mean      SE.mean 
##         17.8        708.6         61.7         59.0          1.6 
## CI.mean.0.95          var      std.dev     coef.var 
##          3.4         29.0          5.4          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Texas
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         50.4         58.7 
##        range          sum       median         mean      SE.mean 
##          8.3        651.0         53.8         54.2          0.8 
## CI.mean.0.95          var      std.dev     coef.var 
##          1.7          7.5          2.7          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Utah
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         38.1         58.5 
##        range          sum       median         mean      SE.mean 
##         20.4        585.1         49.0         48.8          1.8 
## CI.mean.0.95          var      std.dev     coef.var 
##          3.9         37.0          6.1          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Vermont
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         43.4         55.8 
##        range          sum       median         mean      SE.mean 
##         12.4        600.9         50.2         50.1          1.2 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.7         17.5          4.2          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Virginia
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         51.7         68.7 
##        range          sum       median         mean      SE.mean 
##         17.0        712.7         56.9         59.4          1.9 
## CI.mean.0.95          var      std.dev     coef.var 
##          4.1         42.0          6.5          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Washington
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         43.4         58.4 
##        range          sum       median         mean      SE.mean 
##         15.0        604.6         49.8         50.4          1.5 
## CI.mean.0.95          var      std.dev     coef.var 
##          3.4         28.3          5.3          0.1 
## -------------------------------------------------------- 
## cr_bind$State: West Virginia
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         49.1         59.2 
##        range          sum       median         mean      SE.mean 
##         10.1        649.0         53.7         54.1          1.0 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.2         12.4          3.5          0.1 
## -------------------------------------------------------- 
## cr_bind$State: Wisconsin
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         55.4         61.6 
##        range          sum       median         mean      SE.mean 
##          6.2        710.3         59.5         59.2          0.5 
## CI.mean.0.95          var      std.dev     coef.var 
##          1.2          3.6          1.9          0.0 
## -------------------------------------------------------- 
## cr_bind$State: Wyoming
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0         46.2         60.4 
##        range          sum       median         mean      SE.mean 
##         14.2        660.3         54.6         55.0          1.1 
## CI.mean.0.95          var      std.dev     coef.var 
##          2.5         15.9          4.0          0.1

Plot of college-going high school grads grouped by year across all states

#Plot college-going high school graduates by year
plot <- aggregate(grads_to_college_percentage ~ year, data = cr_bind, mean)

ggplot(plot) + geom_bar(aes(x=year, y=grads_to_college_percentage), stat = "identity") + 
  ylim(0, 100) +
  ggtitle("AVG Percentage of college-going HS grads in the US by year") +
  scale_x_continuous(breaks=seq(1992,2014,2))

Summary statistics of median family income grouped by state across all years

#Summary statistics for median income for each state across all years.
by(median_income_by_state$median_income, median_income_by_state$State, function(x)
  round(stat.desc(x), 1))
## median_income_by_state$State: Alabama
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      25808.0      44476.0 
##        range          sum       median         mean      SE.mean 
##      18668.0     438331.0      37116.0      36527.6       1760.2 
## CI.mean.0.95          var      std.dev     coef.var 
##       3874.3   37181280.4       6097.6          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Alaska
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      41802.0      67629.0 
##        range          sum       median         mean      SE.mean 
##      25827.0     660856.0      53955.0      55071.3       2176.3 
## CI.mean.0.95          var      std.dev     coef.var 
##       4790.1   56837316.8       7539.1          0.1 
## -------------------------------------------------------- 
## median_income_by_state$State: Arizona
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      29358.0      49254.0 
##        range          sum       median         mean      SE.mean 
##      19896.0     489506.0      41814.5      40792.2       2040.8 
## CI.mean.0.95          var      std.dev     coef.var 
##       4491.8   49979947.2       7069.6          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Arkansas
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      23882.0      44922.0 
##        range          sum       median         mean      SE.mean 
##      21040.0     400473.0      33685.5      33372.8       1915.6 
## CI.mean.0.95          var      std.dev     coef.var 
##       4216.2   44033877.1       6635.8          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: California
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      34903.0      60487.0 
##        range          sum       median         mean      SE.mean 
##      25584.0     577578.0      48329.5      48131.5       2583.4 
## CI.mean.0.95          var      std.dev     coef.var 
##       5686.0   80088080.6       8949.2          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Colorado
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      32484.0      60943.0 
##        range          sum       median         mean      SE.mean 
##      28459.0     600354.0      49590.0      50029.5       2730.8 
## CI.mean.0.95          var      std.dev     coef.var 
##       6010.4   89485091.5       9459.7          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Connecticut
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      40841.0      70161.0 
##        range          sum       median         mean      SE.mean 
##      29320.0     656716.0      54243.5      54726.3       3067.1 
## CI.mean.0.95          var      std.dev     coef.var 
##       6750.6  112882438.2      10624.6          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Delaware
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      35678.0      57522.0 
##        range          sum       median         mean      SE.mean 
##      21844.0     565230.0      49311.0      47102.5       2107.9 
## CI.mean.0.95          var      std.dev     coef.var 
##       4639.5   53320983.7       7302.1          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Florida
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      27349.0      46140.0 
##        range          sum       median         mean      SE.mean 
##      18791.0     466418.0      39695.5      38868.2       1994.7 
## CI.mean.0.95          var      std.dev     coef.var 
##       4390.3   47745345.2       6909.8          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Georgia
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      28797.0      49555.0 
##        range          sum       median         mean      SE.mean 
##      20758.0     494613.0      42420.0      41217.8       2045.2 
## CI.mean.0.95          var      std.dev     coef.var 
##       4501.4   50192777.8       7084.7          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Hawaii
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      40827.0      71223.0 
##        range          sum       median         mean      SE.mean 
##      30396.0     631074.0      53894.0      52589.5       2840.7 
## CI.mean.0.95          var      std.dev     coef.var 
##       6252.3   96834712.1       9840.5          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Idaho
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      27704.0      53438.0 
##        range          sum       median         mean      SE.mean 
##      25734.0     492356.0      41036.5      41029.7       2245.8 
## CI.mean.0.95          var      std.dev     coef.var 
##       4942.9   60522212.6       7779.6          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Illinois
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      31551.0      54916.0 
##        range          sum       median         mean      SE.mean 
##      23365.0     543522.0      46070.5      45293.5       2090.5 
## CI.mean.0.95          var      std.dev     coef.var 
##       4601.2   52444147.4       7241.8          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Indiana
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      27858.0      48060.0 
##        range          sum       median         mean      SE.mean 
##      20202.0     487791.0      41688.0      40649.2       1979.3 
## CI.mean.0.95          var      std.dev     coef.var 
##       4356.5   47012848.8       6856.6          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Iowa
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      28743.0      57810.0 
##        range          sum       median         mean      SE.mean 
##      29067.0     516017.0      42220.0      43001.4       2586.6 
## CI.mean.0.95          var      std.dev     coef.var 
##       5693.1   80287948.3       8960.4          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Kansas
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      28322.0      53444.0 
##        range          sum       median         mean      SE.mean 
##      25122.0     495638.0      41842.5      41303.2       2293.7 
## CI.mean.0.95          var      std.dev     coef.var 
##       5048.3   63130647.1       7945.5          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Kentucky
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      23485.0      42786.0 
##        range          sum       median         mean      SE.mean 
##      19301.0     432991.0      36513.5      36082.6       1730.6 
## CI.mean.0.95          var      std.dev     coef.var 
##       3809.1   35940508.1       5995.0          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Louisiana
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      25439.0      42406.0 
##        range          sum       median         mean      SE.mean 
##      16967.0     411109.0      35218.5      34259.1       1603.2 
## CI.mean.0.95          var      std.dev     coef.var 
##       3528.7   30844898.1       5553.8          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Maine
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      29617.0      51710.0 
##        range          sum       median         mean      SE.mean 
##      22093.0     487386.0      39297.5      40615.5       2186.9 
## CI.mean.0.95          var      std.dev     coef.var 
##       4813.3   57390641.5       7575.7          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Maryland
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      37203.0      76165.0 
##        range          sum       median         mean      SE.mean 
##      38962.0     678036.0      56755.0      56503.0       3545.2 
## CI.mean.0.95          var      std.dev     coef.var 
##       7802.9  150820305.5      12280.9          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Massachusetts
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      36359.0      63656.0 
##        range          sum       median         mean      SE.mean 
##      27297.0     610716.0      50937.0      50893.0       2839.5 
## CI.mean.0.95          var      std.dev     coef.var 
##       6249.7   96751976.5       9836.3          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Michigan
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      32267.0      52005.0 
##        range          sum       median         mean      SE.mean 
##      19738.0     525811.0      44113.5      43817.6       1758.7 
## CI.mean.0.95          var      std.dev     coef.var 
##       3871.0   37117835.0       6092.4          0.1 
## -------------------------------------------------------- 
## median_income_by_state$State: Minnesota
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      30981.0      67244.0 
##        range          sum       median         mean      SE.mean 
##      36263.0     611015.0      54436.5      50917.9       3121.3 
## CI.mean.0.95          var      std.dev     coef.var 
##       6869.9  116907568.3      10812.4          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Mississippi
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      20570.0      38160.0 
##        range          sum       median         mean      SE.mean 
##      17590.0     383204.0      34516.0      31933.7       1566.8 
## CI.mean.0.95          var      std.dev     coef.var 
##       3448.4   29456755.0       5427.4          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Missouri
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      27361.0      56630.0 
##        range          sum       median         mean      SE.mean 
##      29269.0     504855.0      43677.5      42071.2       2365.9 
## CI.mean.0.95          var      std.dev     coef.var 
##       5207.2   67167022.9       8195.5          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Montana
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      26525.0      51102.0 
##        range          sum       median         mean      SE.mean 
##      24577.0     437460.0      34395.5      36455.0       2233.6 
## CI.mean.0.95          var      std.dev     coef.var 
##       4916.2   59868430.4       7737.5          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Nebraska
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      30048.0      56870.0 
##        range          sum       median         mean      SE.mean 
##      26822.0     521044.0      43291.0      43420.3       2568.8 
## CI.mean.0.95          var      std.dev     coef.var 
##       5653.9   79184508.8       8898.6          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Nevada
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      31908.0      54744.0 
##        range          sum       median         mean      SE.mean 
##      22836.0     539429.0      46481.0      44952.4       2031.9 
## CI.mean.0.95          var      std.dev     coef.var 
##       4472.3   49545326.3       7038.8          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: New Hampshire
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      35245.0      73397.0 
##        range          sum       median         mean      SE.mean 
##      38152.0     658103.0      56068.0      54841.9       3700.7 
## CI.mean.0.95          var      std.dev     coef.var 
##       8145.2  164342120.6      12819.6          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: New Jersey
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      39000.0      68059.0 
##        range          sum       median         mean      SE.mean 
##      29059.0     667090.0      54921.5      55590.8       2887.2 
## CI.mean.0.95          var      std.dev     coef.var 
##       6354.7  100031729.1      10001.6          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: New Mexico
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      25086.0      46686.0 
##        range          sum       median         mean      SE.mean 
##      21600.0     436880.0      37509.5      36406.7       2206.3 
## CI.mean.0.95          var      std.dev     coef.var 
##       4856.0   58412290.4       7642.8          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: New York
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      31051.0      54310.0 
##        range          sum       median         mean      SE.mean 
##      23259.0     513567.0      43307.5      42797.2       2205.7 
## CI.mean.0.95          var      std.dev     coef.var 
##       4854.6   58378818.8       7640.6          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: North Carolina
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      27771.0      46784.0 
##        range          sum       median         mean      SE.mean 
##      19013.0     459288.0      39057.0      38274.0       1593.0 
## CI.mean.0.95          var      std.dev     coef.var 
##       3506.2   30451800.2       5518.3          0.1 
## -------------------------------------------------------- 
## median_income_by_state$State: North Dakota
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      26959.0      60730.0 
##        range          sum       median         mean      SE.mean 
##      33771.0     486607.0      37710.0      40550.6       3250.8 
## CI.mean.0.95          var      std.dev     coef.var 
##       7155.1  126815732.3      11261.2          0.3 
## -------------------------------------------------------- 
## median_income_by_state$State: Ohio
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      31404.0      49644.0 
##        range          sum       median         mean      SE.mean 
##      18240.0     497694.0      43008.5      41474.5       1752.6 
## CI.mean.0.95          var      std.dev     coef.var 
##       3857.5   36859636.5       6071.2          0.1 
## -------------------------------------------------------- 
## median_income_by_state$State: Oklahoma
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      25284.0      48407.0 
##        range          sum       median         mean      SE.mean 
##      23123.0     445601.0      37648.0      37133.4       2343.9 
## CI.mean.0.95          var      std.dev     coef.var 
##       5158.8   65923858.4       8119.4          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Oregon
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      31456.0      58875.0 
##        range          sum       median         mean      SE.mean 
##      27419.0     523307.0      42150.5      43608.9       2475.1 
## CI.mean.0.95          var      std.dev     coef.var 
##       5447.8   73516197.2       8574.2          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Pennsylvania
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      29882.0      55173.0 
##        range          sum       median         mean      SE.mean 
##      25291.0     519912.0      43302.0      43326.0       2352.2 
## CI.mean.0.95          var      std.dev     coef.var 
##       5177.1   66392044.0       8148.1          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Rhode Island
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      30432.0      58633.0 
##        range          sum       median         mean      SE.mean 
##      28201.0     545879.0      45176.0      45489.9       2728.3 
## CI.mean.0.95          var      std.dev     coef.var 
##       6005.0   89326374.8       9451.3          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: South Carolina
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      27578.0      44929.0 
##        range          sum       median         mean      SE.mean 
##      17351.0     452229.0      38251.5      37685.8       1582.8 
## CI.mean.0.95          var      std.dev     coef.var 
##       3483.6   30061796.6       5482.9          0.1 
## -------------------------------------------------------- 
## median_income_by_state$State: South Dakota
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      26259.0      53053.0 
##        range          sum       median         mean      SE.mean 
##      26794.0     478606.0      39490.0      39883.8       2648.1 
## CI.mean.0.95          var      std.dev     coef.var 
##       5828.5   84151397.8       9173.4          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Tennessee
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      24318.0      43716.0 
##        range          sum       median         mean      SE.mean 
##      19398.0     432733.0      37551.0      36061.1       1699.6 
## CI.mean.0.95          var      std.dev     coef.var 
##       3740.7   34662306.1       5887.5          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Texas
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      27953.0      53875.0 
##        range          sum       median         mean      SE.mean 
##      25922.0     490582.0      40773.0      40881.8       2354.2 
## CI.mean.0.95          var      std.dev     coef.var 
##       5181.5   66504894.9       8155.1          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Utah
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      34251.0      63383.0 
##        range          sum       median         mean      SE.mean 
##      29132.0     593176.0      49366.0      49431.3       2928.7 
## CI.mean.0.95          var      std.dev     coef.var 
##       6446.0  102928137.0      10145.4          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Vermont
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      32358.0      60708.0 
##        range          sum       median         mean      SE.mean 
##      28350.0     545114.0      45164.0      45426.2       2782.4 
## CI.mean.0.95          var      std.dev     coef.var 
##       6124.1   92904128.0       9638.7          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Virginia
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      37647.0      66155.0 
##        range          sum       median         mean      SE.mean 
##      28508.0     616603.0      50386.0      51383.6       3035.0 
## CI.mean.0.95          var      std.dev     coef.var 
##       6679.9  110531377.4      10513.4          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Washington
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      33533.0      62187.0 
##        range          sum       median         mean      SE.mean 
##      28654.0     577932.0      48671.5      48161.0       2871.0 
## CI.mean.0.95          var      std.dev     coef.var 
##       6319.1   98915045.8       9945.6          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: West Virginia
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      20271.0      43553.0 
##        range          sum       median         mean      SE.mean 
##      23282.0     390224.0      31392.0      32518.7       2267.2 
## CI.mean.0.95          var      std.dev     coef.var 
##       4990.1   61681886.4       7853.8          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Wisconsin
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      33308.0      58080.0 
##        range          sum       median         mean      SE.mean 
##      24772.0     551149.0      45817.5      45929.1       2144.0 
## CI.mean.0.95          var      std.dev     coef.var 
##       4719.0   55162515.5       7427.1          0.2 
## -------------------------------------------------------- 
## median_income_by_state$State: Wyoming
##      nbr.val     nbr.null       nbr.na          min          max 
##         12.0          0.0          0.0      30209.0      57512.0 
##        range          sum       median         mean      SE.mean 
##      27303.0     520122.0      42580.0      43343.5       2849.3 
## CI.mean.0.95          var      std.dev     coef.var 
##       6271.3   97424490.6       9870.4          0.2

Plot of average median family income by year across all states

#Plot median family income per year
plot <- aggregate(median_income ~ year, data = median_income_by_state, mean)

ggplot(plot) + geom_bar(aes(x=year, y=median_income), stat = "identity") + 
  ggtitle("AVG median family income in the US by year") +
  scale_x_continuous(breaks=seq(1992,2014,2))