Research Questions
- How does tuition differ by institution type?
- Which states have the highest student debt?
- Is there a relationship between tuition and earnings?
- How do completion rates vary by institution type?
- Does institution size relate to student debt?
library(readr)
## Warning: package 'readr' was built under R version 4.5.2
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.5.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df <- read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2020/2020-03-10/tuition_cost.csv")
## Rows: 2973 Columns: 10
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): name, state, state_code, type, degree_length
## dbl (5): room_and_board, in_state_tuition, in_state_total, out_of_state_tuit...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(df)
## # A tibble: 6 × 10
## name state state_code type degree_length room_and_board in_state_tuition
## <chr> <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 Aaniiih … Mont… MT Publ… 2 Year NA 2380
## 2 Abilene … Texas TX Priv… 4 Year 10350 34850
## 3 Abraham … Geor… GA Publ… 2 Year 8474 4128
## 4 Academy … Minn… MN For … 2 Year NA 17661
## 5 Academy … Cali… CA For … 4 Year 16648 27810
## 6 Adams St… Colo… CO Publ… 4 Year 8782 9440
## # ℹ 3 more variables: in_state_total <dbl>, out_of_state_tuition <dbl>,
## # out_of_state_total <dbl>
summary(df)
## name state state_code type
## Length:2973 Length:2973 Length:2973 Length:2973
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## degree_length room_and_board in_state_tuition in_state_total
## Length:2973 Min. : 30 Min. : 480 Min. : 962
## Class :character 1st Qu.: 7935 1st Qu.: 4890 1st Qu.: 5802
## Mode :character Median :10000 Median :10099 Median :17669
## Mean :10095 Mean :16491 Mean :22872
## 3rd Qu.:12424 3rd Qu.:27124 3rd Qu.:35960
## Max. :21300 Max. :59985 Max. :75003
## NA's :1094
## out_of_state_tuition out_of_state_total
## Min. : 480 Min. : 1376
## 1st Qu.: 9552 1st Qu.:11196
## Median :17486 Median :23214
## Mean :20533 Mean :26913
## 3rd Qu.:29208 3rd Qu.:39054
## Max. :59985 Max. :75003
##
# Q1: Tuition by institution type
ggplot(df, aes(x=type, y=in_state_tuition, fill=type)) +
geom_boxplot() + theme_minimal() +
labs(title="Q1: Tuition by Institution Type", x="", y="Tuition ($)")

# Q2: Top 10 states by tuition
df %>% group_by(state) %>%
summarise(avg=mean(in_state_tuition, na.rm=TRUE)) %>%
arrange(desc(avg)) %>% slice_head(n=10) %>%
ggplot(aes(x=reorder(state,avg), y=avg)) +
geom_col(fill="steelblue") + coord_flip() + theme_minimal() +
labs(title="Q2: Top 10 States by Tuition", x="State", y="Avg Tuition ($)")

# Q3: In-state vs out-of-state tuition
ggplot(df, aes(x=in_state_tuition, y=out_of_state_tuition)) +
geom_point(alpha=0.5) + geom_smooth(method="lm") + theme_minimal() +
labs(title="Q3: In-State vs Out-of-State Tuition", x="In-State ($)", y="Out-of-State ($)")
## `geom_smooth()` using formula = 'y ~ x'

# Q4: Tuition by degree length
ggplot(df, aes(x=degree_length, y=in_state_tuition, fill=degree_length)) +
geom_boxplot() + theme_minimal() +
labs(title="Q4: Tuition by Degree Length", x="", y="Tuition ($)")

# Q5: Total cost in-state vs out-of-state
ggplot(df, aes(x=in_state_total, y=out_of_state_total, color=type)) +
geom_point(alpha=0.5) + theme_minimal() +
labs(title="Q5: Total Cost In-State vs Out-of-State", x="In-State Total ($)", y="Out-of-State Total ($)")
