B02 but I do the actual assingment with the dataset everyone else is using instead of my own thing (oops)

loading libraries and csv

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.4 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.2      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
edu_raw <- read_csv('https://vincentarelbundock.github.io/Rdatasets/csv/robustbase/education.csv')
## New names:
## Rows: 50 Columns: 7
## ── Column specification
## ──────────────────────────────────────────────────────── Delimiter: "," chr
## (1): State dbl (6): ...1, Region, X1, X2, X3, Y
## ℹ Use `spec()` to retrieve the full column specification for this data. ℹ
## Specify the column types or set `show_col_types = FALSE` to quiet this message.
## • `` -> `...1`
glimpse(edu_raw)
## Rows: 50
## Columns: 7
## $ ...1   <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, …
## $ State  <chr> "ME", "NH", "VT", "MA", "RI", "CT", "NY", "NJ", "PA", "OH", "IN…
## $ Region <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ X1     <dbl> 508, 564, 322, 846, 871, 774, 856, 889, 715, 753, 649, 830, 738…
## $ X2     <dbl> 3944, 4578, 4011, 5233, 4780, 5889, 5663, 5759, 4894, 5012, 490…
## $ X3     <dbl> 325, 323, 328, 305, 303, 307, 301, 310, 300, 324, 329, 320, 337…
## $ Y      <dbl> 235, 231, 270, 261, 300, 317, 387, 285, 300, 221, 264, 308, 379…

renaming and factoring discrete variables

edu <- edu_raw %>% 
            rename(res_density = X1, income = X2, minors = X3, spend_public = Y) %>%
            mutate(Region = factor(Region))
glimpse(edu)
## Rows: 50
## Columns: 7
## $ ...1         <dbl> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17…
## $ State        <chr> "ME", "NH", "VT", "MA", "RI", "CT", "NY", "NJ", "PA", "OH…
## $ Region       <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ res_density  <dbl> 508, 564, 322, 846, 871, 774, 856, 889, 715, 753, 649, 83…
## $ income       <dbl> 3944, 4578, 4011, 5233, 4780, 5889, 5663, 5759, 4894, 501…
## $ minors       <dbl> 325, 323, 328, 305, 303, 307, 301, 310, 300, 324, 329, 32…
## $ spend_public <dbl> 235, 231, 270, 261, 300, 317, 387, 285, 300, 221, 264, 30…

#1 Box plot of income

ggplot(edu, aes(x = income)) +
  geom_boxplot()

#2 Bar chart showing how many states in each region.

#counting the number of states in each region
edu %>% 
  count(Region) %>% 
  ggplot(aes(x = Region, y = n)) +
  geom_bar(stat = 'identity') +
  ylab('Number of States')

#3 Scatter plot of minors vs spend_public (remember spend is per capita so this isn’t exactly a meaningful graph)

ggplot(edu, aes(x = minors, y = spend_public)) +
  geom_point()

#4 Box plot of income separated by region… use factor(Region) to treat numbers as categories

ggplot(edu, aes(x = factor(Region), y = income)) +
  geom_boxplot() +
  xlab('Region')

#5 Dotplot of density, colored by region

ggplot(edu, aes(x = res_density, fill = Region)) +
  geom_dotplot(method = "histodot", binwidth = 25)