1 Libraries and Data

Load needed libraries and data

library(tidyverse)
library(tidytuesdayR)
tuesdata <- tidytuesdayR::tt_load('2020-03-10')

We are working with the tuition_cost and diversity_school data from the TidyTuesday set from March 10, 2020.

tuition_cost <- tuesdata$tuition_cost
diversity_school <- tuesdata$diversity_school

2 Creating My Custom Theme

mytheme <- theme_minimal() + 
  theme(plot.title = element_text(face = "bold", size = 16),
        plot.subtitle = element_text(face = "italic", size = 14),
        axis.title = element_text(size = 14, face = "bold"), 
        axis.text = element_text(size = 12), 
        axis.ticks = element_blank(),
        legend.background = element_rect(fill = "#C0C0C0"))

3 Heatmaps

3.1 Step 1: Tidy Data

First step is to tidy the data. A Heat map has three elements we need to define: * X-Axis/Columns = State (Alambama:Wyoming + Territories/DC, ~50 rows) * Y-Axis/Rows = School Type (Public, Private, For Profit, 3 columns) * Tile/Values = Average In-State Tuition be (~150 total values/observations)

tuition_cost

## # A tibble: 2,973 x 10
##    name     state state_code type  degree_length room_and_board in_state_tuition
##    <chr>    <chr> <chr>      <chr> <chr>                  <dbl>            <dbl>
##  1 Aaniiih~ Mont~ MT         Publ~ 2 Year                    NA             2380
##  2 Abilene~ Texas TX         Priv~ 4 Year                 10350            34850
##  3 Abraham~ Geor~ GA         Publ~ 2 Year                  8474             4128
##  4 Academy~ Minn~ MN         For ~ 2 Year                    NA            17661
##  5 Academy~ Cali~ CA         For ~ 4 Year                 16648            27810
##  6 Adams S~ Colo~ CO         Publ~ 4 Year                  8782             9440
##  7 Adelphi~ New ~ NY         Priv~ 4 Year                 16030            38660
##  8 Adirond~ New ~ NY         Publ~ 2 Year                 11660             5375
##  9 Adrian ~ Mich~ MI         Priv~ 4 Year                 11318            37087
## 10 Advance~ Virg~ VA         For ~ 2 Year                    NA            13680
## # ... with 2,963 more rows, and 3 more variables: in_state_total <dbl>,
## #   out_of_state_tuition <dbl>, out_of_state_total <dbl>

3.1.1 Y-Axis Ordering

Plot will have schools on Y axis in alpha order by state

## Using pipe with tidyverse

states <- tuition_cost %>%
  select(state) %>%
  distinct(state) %>%
  arrange(state)

states

## # A tibble: 51 x 1
##    state      
##    <chr>      
##  1 Alabama    
##  2 Alaska     
##  3 Arizona    
##  4 Arkansas   
##  5 California 
##  6 Colorado   
##  7 Connecticut
##  8 Delaware   
##  9 Florida    
## 10 Georgia    
## # ... with 41 more rows

Hm seems there’s some observations with state marked “NA”, probably DC and other territories.

tuition_cost %>%
  filter(is.na(state)) %>%
  distinct(state_code)

## # A tibble: 5 x 1
##   state_code
##   <chr>     
## 1 AS        
## 2 DC        
## 3 PR        
## 4 GU        
## 5 VI

So there are “states” coded NA that are either American Samoa, DC, Puerto Rico, Guam, and the Virgin Islands. While DC isn’t a state, it is much more similar to the other states (within continental US, more comparable demographically) than the other territories.

Let’s distinguish DC from the other territories using an if_else on the state_code

tuition_cost <- tuition_cost %>%
  mutate(state = if_else(is.na(state), if_else(state_code == "DC", "District of Columbia", "Territories"), state))

tuition_cost %>%
  filter(state == "Territories" | state == "District of Columbia")

## # A tibble: 52 x 10
##    name    state  state_code type  degree_length room_and_board in_state_tuition
##    <chr>   <chr>  <chr>      <chr> <chr>                  <dbl>            <dbl>
##  1 Americ~ Terri~ AS         Publ~ 2 Year                    NA             3700
##  2 Americ~ Distr~ DC         Priv~ 4 Year                 14880            48459
##  3 Americ~ Terri~ PR         Priv~ 4 Year                    NA             6946
##  4 Atlant~ Terri~ PR         Priv~ 4 Year                    NA             4525
##  5 Bayamo~ Terri~ PR         Priv~ 4 Year                    NA             5775
##  6 Caribb~ Terri~ PR         Priv~ 4 Year                    NA             6570
##  7 Carlos~ Terri~ PR         Priv~ 4 Year                    NA             5912
##  8 Cathol~ Distr~ DC         Priv~ 4 Year                 14650            45804
##  9 Columb~ Terri~ PR         For ~ 2 Year                    NA             6420
## 10 Conser~ Terri~ PR         Publ~ 4 Year                    NA             4000
## # ... with 42 more rows, and 3 more variables: in_state_total <dbl>,
## #   out_of_state_tuition <dbl>, out_of_state_total <dbl>

Perfect! This is now resolved.

3.1.2 X-Axis Ordering

X-Axis is school type from least to greatest in state tuition cost. To figure that out we need to group_by type and then do a summary of the means for in_state_tuition.

types <- tuition_cost %>%
  group_by(type) %>%
  summarise(mean(in_state_total)) %>%
  arrange(`mean(in_state_total)`)

types

## # A tibble: 4 x 2
##   type       `mean(in_state_total)`
##   <chr>                       <dbl>
## 1 Other                       8448 
## 2 Public                     10394.
## 3 For Profit                 18466.
## 4 Private                    38680.

What is this “Other”? type? Let’s see where we observe it…

tuition_cost %>%
  filter(type == "Other")

## # A tibble: 1 x 10
##   name      state state_code type  degree_length room_and_board in_state_tuition
##   <chr>     <chr> <chr>      <chr> <chr>                  <dbl>            <dbl>
## 1 Universi~ Texas TX         Other Other                     NA             8448
## # ... with 3 more variables: in_state_total <dbl>, out_of_state_tuition <dbl>,
## #   out_of_state_total <dbl>

So this “Other” is exactly one school in Texas… If we include this in the heat map as its own column it will look ugly, so we should probably exclude this one value.

Let’s reassign tuition_cost so it excludes this observation.

tuition_cost <- tuition_cost %>%
  filter(type != "Other")

tuition_cost

## # A tibble: 2,972 x 10
##    name     state state_code type  degree_length room_and_board in_state_tuition
##    <chr>    <chr> <chr>      <chr> <chr>                  <dbl>            <dbl>
##  1 Aaniiih~ Mont~ MT         Publ~ 2 Year                    NA             2380
##  2 Abilene~ Texas TX         Priv~ 4 Year                 10350            34850
##  3 Abraham~ Geor~ GA         Publ~ 2 Year                  8474             4128
##  4 Academy~ Minn~ MN         For ~ 2 Year                    NA            17661
##  5 Academy~ Cali~ CA         For ~ 4 Year                 16648            27810
##  6 Adams S~ Colo~ CO         Publ~ 4 Year                  8782             9440
##  7 Adelphi~ New ~ NY         Priv~ 4 Year                 16030            38660
##  8 Adirond~ New ~ NY         Publ~ 2 Year                 11660             5375
##  9 Adrian ~ Mich~ MI         Priv~ 4 Year                 11318            37087
## 10 Advance~ Virg~ VA         For ~ 2 Year                    NA            13680
## # ... with 2,962 more rows, and 3 more variables: in_state_total <dbl>,
## #   out_of_state_tuition <dbl>, out_of_state_total <dbl>

Now let’s reassess:

types <- tuition_cost %>%
  group_by(type) %>%
  summarise(mean(in_state_total)) %>%
  arrange(`mean(in_state_total)`)

types

## # A tibble: 3 x 2
##   type       `mean(in_state_total)`
##   <chr>                       <dbl>
## 1 Public                     10394.
## 2 For Profit                 18466.
## 3 Private                    38680.

So “Public” is cheapest on average, while private schools ironically cost more than For-Profit schools. So the order (L to R) will be Public, For Profit, Private

types

3.1.3 Tile value, fill aes

Last thing we need is the fill aesthetic, which is taken from the average in state tuition by state and type (150 values total).

tiles <- tuition_cost %>%
  group_by(state, type) %>%
  summarise(tuition_mean = mean(in_state_tuition)) %>%
  mutate(type_F = factor(type)) %>%
  mutate(type_F = fct_relevel(type_F, c("Public", "For Profit", "Private"))) %>%
  mutate(state_F = factor(state)) %>% 
  select(state_F, type_F, tuition_mean)

3.1.4 Gradient

Use either

scale_fill_gradient(low = "white", high = "blue")

library(rcartocolor)
scale_fill_carto_c(palette = "BluGrm", direction = 1)

## <ScaleContinuous>
##  Range:  
##  Limits:    0 --    1

3.2 Step 2: Exploratory Plot

Now we have our x, y and fill values, we can start the plot.

ggplot(tiles, aes(type_F, state_F, fill = tuition_mean)) + 
  geom_tile()

So we see what we suspected, public schools tend to be cheapest (although there seems to be some variability here), then for profit (where there are for profit schools) and then private schools (though not always).

3.3 Step 3: Pick a Palette

So now let’s pick a color palette that looks nice.

ggplot(tiles, aes(type_F, state_F, fill = tuition_mean)) + 
  geom_tile() +
  scale_y_discrete(limits=rev) + 
  scale_fill_distiller(palette = "RdPu")

3.4 Step 4: Refine the Plot

Now let’s clean this up!

ggplot(tiles, aes(type_F, state_F, fill = tuition_mean)) + 
  geom_tile() +
  scale_y_discrete(limits=rev) + 
  scale_fill_distiller(palette = "RdPu") + 
  labs(title = "The Cost of An Education in the United States",
       subtitle = "Comparing Average Tuition by School Type and State", 
       x = "School Type", 
       y = "State", 
       fill = "Average In-State Tuition",
       caption = "Data from the Chronicle of Higher Education") +
  mytheme

4 Lollipop Charts

4.1 Step 1: Tidy Data

diversity_school %>% print(n = 20)

50655 X 5

tuition_cost

2973 X 10

Will want to do a left join of tuition cost and diversity school, but first need to widen diversity_school with pivot_wider

diversity_school_tidy <- diversity_school %>%
  drop_na() %>%
  pivot_wider(id_cols = c(state, name, total_enrollment),
              names_from = category,
              values_from = enrollment) %>% 
  mutate(state = factor(state) %>%
           fct_relevel(sort))

diversity_school_tidy

## # A tibble: 4,574 x 14
##    state  name    total_enrollment  Women `American Indian~ Asian Black Hispanic
##    <fct>  <chr>              <dbl>  <dbl>             <dbl> <dbl> <dbl>    <dbl>
##  1 Arizo~ Univer~           195059 134722               876  1959 31455    13984
##  2 India~ Ivy Te~            91179  53476               357  1369 12370     5533
##  3 Virgi~ Libert~            81459  48329               447   856 14751     1186
##  4 Texas  Lone S~            69395  41268               168  4198 12094    23751
##  5 Flori~ Miami ~            66046  38323                47   655 10722    44870
##  6 Arizo~ Grand ~            62304  46647               586  2446 13856     8933
##  7 Texas  Texas ~            61642  29277               173  3545  1879    11256
##  8 Flori~ Univer~            60767  33482               120  3343  6400    13108
##  9 Ohio   Ohio S~            58322  28658                76  3339  3108     2049
## 10 Texas  Housto~            58276  34007               116  5391 18520    18411
## # ... with 4,564 more rows, and 6 more variables:
## #   Native Hawaiian / Pacific Islander <dbl>, White <dbl>,
## #   Two Or More Races <dbl>, Unknown <dbl>, Non-Resident Foreign <dbl>,
## #   Total Minority <dbl>

Now for the left join.

data <- left_join(tuition_cost, diversity_school_tidy, by = c("name", "state"))

4.2 Step 2: The Candy (geom_point)

Lollipop charts are two parts, the candy (using geom_point) and the stick (using geom_segment).

We will start with the candy using geom_point. Before we start plotting, let’s recall what data we need to plot:

We will include only * 4-year * private or public institutions * in Illinois * whose total_enrollment is higher than the state median.

So we need to filter out schools not in Illinois, as well as any For Profit schools, and 2 year colleges.

The last one requires a quick calculation, as we need to calculate the state median enrollment.

data %>%
  filter(state == "Illinois") %>% # First filter by state
  filter(degree_length == "4 Year") %>% # Now remove 2 year schools
  filter(type == "Public" | type == "Private") %>% # Now only public or private schools
  filter(total_enrollment > median(total_enrollment, na.rm = T)) %>% # MUST include na.rm = T for this to work!!
  print(n = Inf)

## # A tibble: 27 x 22
##    name     state state_code type  degree_length room_and_board in_state_tuition
##    <chr>    <chr> <chr>      <chr> <chr>                  <dbl>            <dbl>
##  1 Aurora ~ Illi~ IL         Priv~ 4 Year                 11700            24260
##  2 Benedic~ Illi~ IL         Priv~ 4 Year                  9480            34290
##  3 Bradley~ Illi~ IL         Priv~ 4 Year                 10620            33760
##  4 Chicago~ Illi~ IL         Publ~ 4 Year                  8724            12177
##  5 Columbi~ Illi~ IL         Priv~ 4 Year                 14012            27176
##  6 Concord~ Illi~ IL         Priv~ 4 Year                  9748            32078
##  7 DePaul ~ Illi~ IL         Priv~ 4 Year                 14235            39975
##  8 Dominic~ Illi~ IL         Priv~ 4 Year                 10241            33434
##  9 Eastern~ Illi~ IL         Publ~ 4 Year                  9882            11803
## 10 Governo~ Illi~ IL         Publ~ 4 Year                 10346            12196
## 11 Illinoi~ Illi~ IL         Priv~ 4 Year                 12762            47296
## 12 Illinoi~ Illi~ IL         Publ~ 4 Year                  9850            14516
## 13 Lewis U~ Illi~ IL         Priv~ 4 Year                 10578            32450
## 14 Loyola ~ Illi~ IL         Priv~ 4 Year                 14480            44048
## 15 Moody B~ Illi~ IL         Priv~ 4 Year                 11820            13565
## 16 Nationa~ Illi~ IL         Priv~ 4 Year                    NA            10710
## 17 Northea~ Illi~ IL         Publ~ 4 Year                 11424            14022
## 18 Norther~ Illi~ IL         Publ~ 4 Year                 10880            12262
## 19 Northwe~ Illi~ IL         Priv~ 4 Year                 16626            54567
## 20 Olivet ~ Illi~ IL         Priv~ 4 Year                  7900            36070
## 21 Rooseve~ Illi~ IL         Priv~ 4 Year                 13223            29832
## 22 Saint X~ Illi~ IL         Priv~ 4 Year                 11340            33880
## 23 School ~ Illi~ IL         Priv~ 4 Year                 15890            49270
## 24 Univers~ Illi~ IL         Priv~ 4 Year                 16350            58230
## 25 Univers~ Illi~ IL         Publ~ 4 Year                 12074            13764
## 26 Univers~ Illi~ IL         Publ~ 4 Year                 11308            15094
## 27 Western~ Illi~ IL         Publ~ 4 Year                  9630            11267
## # ... with 15 more variables: in_state_total <dbl>, out_of_state_tuition <dbl>,
## #   out_of_state_total <dbl>, total_enrollment <dbl>, Women <dbl>,
## #   American Indian / Alaska Native <dbl>, Asian <dbl>, Black <dbl>,
## #   Hispanic <dbl>, Native Hawaiian / Pacific Islander <dbl>, White <dbl>,
## #   Two Or More Races <dbl>, Unknown <dbl>, Non-Resident Foreign <dbl>,
## #   Total Minority <dbl>

# Let's clean up this pipe before we continue...


data %>% 
  filter(type == "Public" | type == "Private") %>% 
  filter(degree_length == "4 Year",
         state == "Illinois",
         total_enrollment > median(total_enrollment, na.rm = T)) %>% # Better!
  mutate(int_student = `Non-Resident Foreign` / total_enrollment) %>%
  ggplot(aes(x = name, y = int_student)) +
  geom_point() + 
  coord_flip()

One thing that is immediately apparent; given how many schools there are, we should consider the color aes for the geom_point, and possibly using that so each school has a unique color. Let’s try this now

data %>% 
  filter(type == "Public" | type == "Private") %>% 
  filter(degree_length == "4 Year",
         state == "Illinois",
         total_enrollment > median(total_enrollment, na.rm = T)) %>% 
  mutate(int_student = `Non-Resident Foreign` / total_enrollment) %>%
  ggplot(aes(x = name, y = int_student)) + # assignment instructions are to plot schools on y axis, but my personal pref is to code variable of interest on y and then just use coord_flip at the end. 
  geom_point(aes(color = name)) + 
  coord_flip() + 
  theme(legend.position = "none") # Since the legend isn't necessary and will take up much of the plot, let's surpress it now

4.3 Step 3: The Stick (geom_segment)

Next we will add the stick using geom_segment

data %>% 
  filter(type == "Public" | type == "Private") %>% 
  filter(degree_length == "4 Year",
         state == "Illinois",
         total_enrollment > median(total_enrollment, na.rm = T)) %>% 
  mutate(int_student = `Non-Resident Foreign` / total_enrollment) %>%
  ggplot(aes(x = name, y = int_student)) + 
  geom_point(aes(color = name)) + 
  geom_segment(aes(xend = name, yend = 0, color = name)) + # assign color again so the segment matches! 
  coord_flip() + 
  theme(legend.position = "none")

Just to expirement, let’s see what happens when I plot the stick before the candy…

data %>% 
  filter(type == "Public" | type == "Private") %>% 
  filter(degree_length == "4 Year",
         state == "Illinois",
         total_enrollment > median(total_enrollment, na.rm = T)) %>% 
  mutate(int_student = `Non-Resident Foreign` / total_enrollment) %>%
  ggplot(aes(x = name, y = int_student)) + 
  geom_segment(aes(xend = name, yend = 0, color = name)) + 
  geom_point(aes(color = name)) + 
  coord_flip() + 
  theme(legend.position = "none")

4.4 Step 4: Refine the Plot

So the immediate things I need to do are:

Reorder based on percentage (use fct_reorder) from highest to lowest,
Make dots bigger!
Titles, labels, etc.

First we will address 1 and 2.

data %>% 
  filter(type == "Public" | type == "Private") %>% 
  filter(degree_length == "4 Year",
         state == "Illinois",
         total_enrollment > median(total_enrollment, na.rm = T)) %>% 
  mutate(int_student = `Non-Resident Foreign` / total_enrollment, # Expand the mutate then use fct_reoder to revise ordering 
         name = factor(name) %>% # Creates a new "name" variable that is a factor instead of character
           fct_reorder(.x = int_student, .fun = sort)) %>% # Then reorders "names" based on the values of int_student
  ggplot(aes(x = name, y = int_student)) + 
  geom_point(aes(color = name), size = 2.5) + # Make the points a bit bigger
  geom_segment(aes(xend = name, yend = 0, color = name)) + # assign color again so the segment matches! 
  coord_flip() + 
  theme(legend.position = "none")

Now let’s add a title and clean up the rest of the graph.

data %>% 
  filter(type == "Public" | type == "Private") %>% 
  filter(degree_length == "4 Year",
         state == "Illinois",
         total_enrollment > median(total_enrollment, na.rm = T)) %>% 
  mutate(int_student = `Non-Resident Foreign` / total_enrollment, 
         name = factor(name) %>% 
           fct_reorder(.x = int_student, .fun = sort)) %>% 
  ggplot(aes(x = name, y = int_student)) + 
  geom_point(aes(color = name), size = 3.5) + 
  geom_segment(aes(xend = name, yend = 0, color = name), size = 1) +  
  scale_y_continuous(labels = scales::percent, limits = c(0, .5)) + # Since the int_student variable is a %, I looked for a way to change the x-axis to express these values as % rather than a value 0 to 1.
  labs(title = "International Student Enrollment in Illinois Universities", # Knock out labs in one handy set of arguments!
       subtitle = "Comparing Large Public and Private Schools", 
       caption = "Data from the Chronicle of Higher Education", 
       x = "University", 
       y = "Percentage of Foreign, Non-Resident Students") +
  coord_flip() + 
  mytheme + 
  theme(legend.position = "none") # replace theme with my specific theme

Beautiful!

5 Cleveland Dot Charts

For this plot we will compare the proportion of foreign, non-resident (international) students with the proportion of White students.

5.1 Step 1: unTidy Data

First, we need to unTidy the data by calculating the percentages for each group and getting them on to their own line, using a pivot_longer

data %>% 
  filter(type == "Public" | type == "Private") %>% 
  filter(degree_length == "4 Year",
         state == "Illinois",
         total_enrollment > median(total_enrollment, na.rm = T)) %>% 
  select(name, total_enrollment, White, `Non-Resident Foreign`) %>% 
  pivot_longer(cols = c(White, `Non-Resident Foreign`), 
               names_to = "Student_Type")

## # A tibble: 56 x 4
##    name                     total_enrollment Student_Type         value
##    <chr>                               <dbl> <chr>                <dbl>
##  1 Aurora University                    5084 White                 3240
##  2 Aurora University                    5084 Non-Resident Foreign     9
##  3 Benedictine University               6307 White                 2811
##  4 Benedictine University               6307 Non-Resident Foreign   132
##  5 Bradley University                   5300 White                 3507
##  6 Bradley University                   5300 Non-Resident Foreign   387
##  7 Chicago State University             5211 White                  271
##  8 Chicago State University             5211 Non-Resident Foreign   360
##  9 Columbia College Chicago             9440 White                 5428
## 10 Columbia College Chicago             9440 Non-Resident Foreign   283
## # ... with 46 more rows

Now we can start experimenting with our plot.

5.2 Step 2: Exploratory Plot

data %>% 
  filter(type == "Public" | type == "Private") %>% 
  filter(degree_length == "4 Year",
         state == "Illinois",
         total_enrollment > median(total_enrollment, na.rm = T)) %>% 
  select(name, total_enrollment, White, `Non-Resident Foreign`) %>% 
  pivot_longer(cols = c(White, `Non-Resident Foreign`), 
               names_to = "Student_Type") %>%
mutate(perc = value / total_enrollment) %>% 
  ggplot(aes(x = name, y = perc)) + 
  geom_point() +
  geom_line(aes(group = name)) + 
  coord_flip()

5.3 Step 3: Colors and Order

First let’s add some color.

data %>% 
  filter(type == "Public" | type == "Private") %>% 
  filter(degree_length == "4 Year",
         state == "Illinois",
         total_enrollment > median(total_enrollment, na.rm = T)) %>% 
  select(name, total_enrollment, White, `Non-Resident Foreign`) %>% 
  pivot_longer(cols = c(White, `Non-Resident Foreign`), 
               names_to = "Student_Type") %>%
mutate(perc = value / total_enrollment) %>% 
  ggplot(aes(x = name, y = perc)) + 
  geom_point(aes(color = Student_Type)) +
  geom_line(aes(group = name)) + 
  scale_y_continuous(labels = scales::percent) + 
  coord_flip() + 
  mytheme

Since most schools (expectedly) have more White students than international students, this data is not going to be able to be shaped into an hour glass. The next logical ordering I can think of is the same as the previous chart, going from greatest to least % of international students, as then we can at least see if there’s any interesting trends as the % of international students change.

Let’s see what this looks like using that ordering.

data %>% 
  filter(type == "Public" | type == "Private") %>% 
  filter(degree_length == "4 Year",
         state == "Illinois",
         total_enrollment > median(total_enrollment, na.rm = T)) %>% 
  select(name, total_enrollment, White, `Non-Resident Foreign`) %>% 
  pivot_longer(cols = c(White, `Non-Resident Foreign`), 
               names_to = "Student_Type") %>%
mutate(perc = value / total_enrollment,
       name = factor(name) %>%
         fct_reorder2(.x = Student_Type,
                      .y = perc,
                      .fun = first2,
                      .desc = F)) %>% 
  ggplot(aes(x = name, y = perc)) + 
  geom_point(aes(color = Student_Type)) +
  geom_line(aes(group = name)) + 
  scale_y_continuous(labels = scales::percent) + 
  coord_flip() + 
  mytheme

5.4 Step 4: Refine the Plot

Now to add labels, titles, and my theme.

data %>% 
  filter(type == "Public" | type == "Private") %>% 
  filter(degree_length == "4 Year",
         state == "Illinois",
         total_enrollment > median(total_enrollment, na.rm = T)) %>% 
  select(name, total_enrollment, White, `Non-Resident Foreign`) %>% 
  pivot_longer(cols = c(White, `Non-Resident Foreign`), 
               names_to = "Student_Type") %>%
mutate(perc = value / total_enrollment,
       
       name = factor(name %>%
                       fct_reorder2(.x = Student_Type,
                                    .y = perc,
                                    .fun = first2,
                                    .desc = F))) %>% 
  ggplot(aes(x = name, y = perc)) + 
  geom_line(aes(group = name), size = 1, color = "#808080") + 
  geom_point(aes(color = Student_Type), size = 3) +
  scale_y_continuous(labels = scales::percent) + 
   labs(title = "Comparing White & International Student Enrollment in Illinois Universities", 
       subtitle = "Comparing Large Public and Private Schools", 
       caption = "Data from the Chronicle of Higher Education", 
       x = "University", 
       y = "Percentage of Student Type",
       color = "Student Type") +
  coord_flip() + 
  mytheme

Assignment 10

Jake da Silva Passos-Hoioos

04/04/2021