Load the necessary package

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.0.2
## -- Attaching packages ---------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.1     v purrr   0.3.4
## v tibble  3.0.1     v dplyr   1.0.0
## v tidyr   1.1.0     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## Warning: package 'readr' was built under R version 4.0.2
## Warning: package 'forcats' was built under R version 4.0.2
## -- Conflicts ------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()



Load the dataset

college_data <- read_csv("http://672258.youcanlearnit.net/college.csv")
## Parsed with column specification:
## cols(
##   id = col_double(),
##   name = col_character(),
##   city = col_character(),
##   state = col_character(),
##   region = col_character(),
##   highest_degree = col_character(),
##   control = col_character(),
##   gender = col_character(),
##   admission_rate = col_double(),
##   sat_avg = col_double(),
##   undergrads = col_double(),
##   tuition = col_double(),
##   faculty_salary_avg = col_double(),
##   loan_default_rate = col_character(),
##   median_debt = col_double(),
##   lon = col_double(),
##   lat = col_double()
## )



Summarize the dataset

summary(college_data)
##        id             name               city              state          
##  Min.   :100654   Length:1269        Length:1269        Length:1269       
##  1st Qu.:153250   Class :character   Class :character   Class :character  
##  Median :186283   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :186988                                                           
##  3rd Qu.:215284                                                           
##  Max.   :484905                                                           
##     region          highest_degree       control             gender         
##  Length:1269        Length:1269        Length:1269        Length:1269       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  admission_rate      sat_avg       undergrads       tuition     
##  Min.   :0.0509   Min.   : 720   Min.   :   47   Min.   : 2732  
##  1st Qu.:0.5339   1st Qu.: 973   1st Qu.: 1296   1st Qu.: 8970  
##  Median :0.6687   Median :1040   Median : 2556   Median :20000  
##  Mean   :0.6501   Mean   :1060   Mean   : 5629   Mean   :21025  
##  3rd Qu.:0.7859   3rd Qu.:1120   3rd Qu.: 6715   3rd Qu.:30364  
##  Max.   :1.0000   Max.   :1545   Max.   :52280   Max.   :51008  
##  faculty_salary_avg loan_default_rate   median_debt         lon         
##  Min.   : 1451      Length:1269        Min.   : 6056   Min.   :-157.92  
##  1st Qu.: 6191      Class :character   1st Qu.:21250   1st Qu.: -94.17  
##  Median : 7272      Mode  :character   Median :24589   Median : -84.89  
##  Mean   : 7656                         Mean   :23483   Mean   : -88.29  
##  3rd Qu.: 8671                         3rd Qu.:27000   3rd Qu.: -78.63  
##  Max.   :20650                         Max.   :41000   Max.   : -68.59  
##       lat       
##  Min.   :19.71  
##  1st Qu.:35.22  
##  Median :39.74  
##  Mean   :38.61  
##  3rd Qu.:41.81  
##  Max.   :61.22



Done some mutation

college_data <- college_data %>%
  mutate(state = as.factor(state),region = as.factor(region) )

college_data <- college_data %>%
  mutate(loan_default_rate = as.numeric(loan_default_rate))
## Warning in mask$eval_all_mutate(dots[[i]]): NAs introduced by coercion





Creating a histogram

ggplot(data = college_data)+
  geom_histogram(mapping = aes(x = undergrads))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.





Histogram with Binning

(Binning means grouping data.)

(origin = 0 means the hist will start from 0.)

ggplot(data = college_data)+
  geom_histogram(mapping = aes(x = undergrads), bins = 4, origin = 0)
## Warning: `origin` is deprecated. Please use `boundary` instead.





Grouping of 0-10000, 10001-20000 and so on

ggplot(data = college_data)+
  geom_histogram(mapping = aes(x = undergrads), binwidth = 10000, origin = 0)
## Warning: `origin` is deprecated. Please use `boundary` instead.





Creating a Scatterplot

ggplot(data = college_data)+
  geom_point(mapping = aes(x = control, y = tuition))





Creating a Jitter plot

(jitter plot represents a more clear visualization when a large number of points gather at a small place.)

ggplot(data = college_data)+
  geom_jitter(mapping = aes(x = control, y = tuition))





Creating a Box plot

ggplot(data = college_data)+
  geom_boxplot(mapping = aes(x = control, y = tuition))