library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.2
## -- Attaching packages ---------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.1 v purrr 0.3.4
## v tibble 3.0.1 v dplyr 1.0.0
## v tidyr 1.1.0 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## Warning: package 'readr' was built under R version 4.0.2
## Warning: package 'forcats' was built under R version 4.0.2
## -- Conflicts ------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
college_data <- read_csv("http://672258.youcanlearnit.net/college.csv")
## Parsed with column specification:
## cols(
## id = col_double(),
## name = col_character(),
## city = col_character(),
## state = col_character(),
## region = col_character(),
## highest_degree = col_character(),
## control = col_character(),
## gender = col_character(),
## admission_rate = col_double(),
## sat_avg = col_double(),
## undergrads = col_double(),
## tuition = col_double(),
## faculty_salary_avg = col_double(),
## loan_default_rate = col_character(),
## median_debt = col_double(),
## lon = col_double(),
## lat = col_double()
## )
summary(college_data)
## id name city state
## Min. :100654 Length:1269 Length:1269 Length:1269
## 1st Qu.:153250 Class :character Class :character Class :character
## Median :186283 Mode :character Mode :character Mode :character
## Mean :186988
## 3rd Qu.:215284
## Max. :484905
## region highest_degree control gender
## Length:1269 Length:1269 Length:1269 Length:1269
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## admission_rate sat_avg undergrads tuition
## Min. :0.0509 Min. : 720 Min. : 47 Min. : 2732
## 1st Qu.:0.5339 1st Qu.: 973 1st Qu.: 1296 1st Qu.: 8970
## Median :0.6687 Median :1040 Median : 2556 Median :20000
## Mean :0.6501 Mean :1060 Mean : 5629 Mean :21025
## 3rd Qu.:0.7859 3rd Qu.:1120 3rd Qu.: 6715 3rd Qu.:30364
## Max. :1.0000 Max. :1545 Max. :52280 Max. :51008
## faculty_salary_avg loan_default_rate median_debt lon
## Min. : 1451 Length:1269 Min. : 6056 Min. :-157.92
## 1st Qu.: 6191 Class :character 1st Qu.:21250 1st Qu.: -94.17
## Median : 7272 Mode :character Median :24589 Median : -84.89
## Mean : 7656 Mean :23483 Mean : -88.29
## 3rd Qu.: 8671 3rd Qu.:27000 3rd Qu.: -78.63
## Max. :20650 Max. :41000 Max. : -68.59
## lat
## Min. :19.71
## 1st Qu.:35.22
## Median :39.74
## Mean :38.61
## 3rd Qu.:41.81
## Max. :61.22
college_data <- college_data %>%
mutate(state = as.factor(state),region = as.factor(region) )
college_data <- college_data %>%
mutate(loan_default_rate = as.numeric(loan_default_rate))
## Warning in mask$eval_all_mutate(dots[[i]]): NAs introduced by coercion
ggplot(data = college_data)+
geom_histogram(mapping = aes(x = undergrads))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = college_data)+
geom_histogram(mapping = aes(x = undergrads), bins = 4, origin = 0)
## Warning: `origin` is deprecated. Please use `boundary` instead.
ggplot(data = college_data)+
geom_histogram(mapping = aes(x = undergrads), binwidth = 10000, origin = 0)
## Warning: `origin` is deprecated. Please use `boundary` instead.
ggplot(data = college_data)+
geom_point(mapping = aes(x = control, y = tuition))
ggplot(data = college_data)+
geom_jitter(mapping = aes(x = control, y = tuition))
ggplot(data = college_data)+
geom_boxplot(mapping = aes(x = control, y = tuition))