Convert the class column in mpg data set into a factor with an order of levels following size (from small to large): 2seater, subcompact, compact, midsize, suv, minivan, pickup.

mpg$class <- factor(mpg$class, levels = c("2seater", "subcompact", "compact", "midsize", "suv", "minivan", "pickup"))
class(mpg$class)
## [1] "factor"

What are the levels of marital in gss_cat data set? Which level is the most common one?

levels(gss_cat$marital)
## [1] "No answer"     "Never married" "Separated"     "Divorced"     
## [5] "Widowed"       "Married"
ggplot(gss_cat, aes(marital)) +
    geom_bar() +
    scale_x_discrete(drop = FALSE)

Answer: Most of them are Married

In flights data set, create a graph of average arrival delay time vs destination airports after factor reordering.

f_light <- flights %>%
  group_by(dest) %>%
  summarise(
    ave_arr_delay = mean(arr_delay, na.rm = TRUE),
    n = n() )%>%
    filter(!is.na(ave_arr_delay))  


f_light
## # A tibble: 104 × 3
##    dest  ave_arr_delay     n
##    <chr>         <dbl> <int>
##  1 ABQ            4.38   254
##  2 ACK            4.85   265
##  3 ALB           14.4    439
##  4 ANC           -2.5      8
##  5 ATL           11.3  17215
##  6 AUS            6.02  2439
##  7 AVL            8.00   275
##  8 BDL            7.05   443
##  9 BGR            8.03   375
## 10 BHM           16.9    297
## # ℹ 94 more rows
f_light%>%
  mutate(dest = fct_reorder(dest, ave_arr_delay)) %>%
  ggplot(aes(ave_arr_delay, dest)) +
    geom_point() +
    theme(axis.text.y = element_text(size = 5))

Update the levels of rincome in gss_cat into three categories, $10000 or more, less than $10000 and Others.

levels(gss_cat$rincome)
##  [1] "No answer"      "Don't know"     "Refused"        "$25000 or more"
##  [5] "$20000 - 24999" "$15000 - 19999" "$10000 - 14999" "$8000 to 9999" 
##  [9] "$7000 to 7999"  "$6000 to 6999"  "$5000 to 5999"  "$4000 to 4999" 
## [13] "$3000 to 3999"  "$1000 to 2999"  "Lt $1000"       "Not applicable"
gss_cat %>%
  mutate(rincome = fct_collapse(rincome,
    "$10000 or more" = c("$25000 or more", "$20000 - 24999", "$15000 - 19999","$10000 - 14999"),
    "less than $10000" = c("$8000 to 9999", "$7000 to 7999", "$6000 to 6999", "$5000 to 5999", "$4000 to 4999", "$3000 to 3999", "$1000 to 2999", "Lt $1000"),
    "Others." = c("No answer", "Don't know", "Refused", "Not applicable"),
  )) %>%
  count(rincome)
## # A tibble: 3 × 2
##   rincome              n
##   <fct>            <int>
## 1 Others.           8468
## 2 $10000 or more   10862
## 3 less than $10000  2153