data <- read_excel("data/myData.xlsx")
data
## # A tibble: 2,973 × 10
## name state state_code type degree_length room_and_board in_state_tuition
## <chr> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 Aaniiih… Mont… MT Publ… 2 Year NA 2380
## 2 Abilene… Texas TX Priv… 4 Year 10350 34850
## 3 Abraham… Geor… GA Publ… 2 Year 8474 4128
## 4 Academy… Minn… MN For … 2 Year NA 17661
## 5 Academy… Cali… CA For … 4 Year 16648 27810
## 6 Adams S… Colo… CO Publ… 4 Year 8782 9440
## 7 Adelphi… New … NY Priv… 4 Year 16030 38660
## 8 Adirond… New … NY Publ… 2 Year 11660 5375
## 9 Adrian … Mich… MI Priv… 4 Year 11318 37087
## 10 Advance… Virg… VA For … 2 Year NA 13680
## # ℹ 2,963 more rows
## # ℹ 3 more variables: in_state_total <dbl>, out_of_state_tuition <dbl>,
## # out_of_state_total <dbl>
Make two bar charts here - one before ordering another after.
Unordered:
data %>%
mutate(state = state %>% fct_rev()) %>%
ggplot(aes(state)) +
geom_bar()
Ordered:
data %>%
mutate(state = state %>% fct_infreq()) %>%
ggplot(aes(state)) +
geom_bar()
Show examples of three functions:
data %>% distinct(state, state_code)
## # A tibble: 55 × 2
## state state_code
## <chr> <chr>
## 1 Montana MT
## 2 Texas TX
## 3 Georgia GA
## 4 Minnesota MN
## 5 California CA
## 6 Colorado CO
## 7 New York NY
## 8 Michigan MI
## 9 Virginia VA
## 10 Florida FL
## # ℹ 45 more rows
data %>%
mutate(state_rev = fct_recode(state, "NH" = "New Hampshire")) %>%
select(state, state_rev) %>%
filter(state == "New Hampshire")
## # A tibble: 21 × 2
## state state_rev
## <chr> <fct>
## 1 New Hampshire NH
## 2 New Hampshire NH
## 3 New Hampshire NH
## 4 New Hampshire NH
## 5 New Hampshire NH
## 6 New Hampshire NH
## 7 New Hampshire NH
## 8 New Hampshire NH
## 9 New Hampshire NH
## 10 New Hampshire NH
## # ℹ 11 more rows
data %>%
mutate(state_col = fct_collapse(state, "NH" = c("New Hampshire", "Vermont"))) %>%
select(state, state_col) %>%
filter(state != "Maine")
## # A tibble: 2,946 × 2
## state state_col
## <chr> <fct>
## 1 Montana Montana
## 2 Texas Texas
## 3 Georgia Georgia
## 4 Minnesota Minnesota
## 5 California California
## 6 Colorado Colorado
## 7 New York New York
## 8 New York New York
## 9 Michigan Michigan
## 10 Virginia Virginia
## # ℹ 2,936 more rows
data %>% count(state)
## # A tibble: 51 × 2
## state n
## <chr> <int>
## 1 Alabama 54
## 2 Alaska 6
## 3 Arizona 34
## 4 Arkansas 46
## 5 California 254
## 6 Colorado 38
## 7 Connecticut 36
## 8 Delaware 9
## 9 Florida 88
## 10 Georgia 79
## # ℹ 41 more rows
data %>% mutate(state_lump = fct_lump(state)) %>% distinct(state_lump)
## # A tibble: 51 × 1
## state_lump
## <fct>
## 1 Montana
## 2 Texas
## 3 Georgia
## 4 Minnesota
## 5 California
## 6 Colorado
## 7 New York
## 8 Michigan
## 9 Virginia
## 10 Florida
## # ℹ 41 more rows
No need to do anything here.