library(tidyverse)
## -- Attaching core tidyverse packages ------------------------ tidyverse 2.0.0 --
## v dplyr 1.1.4 v readr 2.1.6
## v forcats 1.0.1 v stringr 1.6.0
## v ggplot2 4.0.1 v tibble 3.3.1
## v lubridate 1.9.4 v tidyr 1.3.2
## v purrr 1.2.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## i Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nycflights13)
library(dplyr)
library(ggplot2)
library(forcats)
EX1:
mpg$class <- factor(
mpg$class,
levels = c(
"2seater", "subcompact", "compact",
"midsize", "suv", "minivan", "pickup"
)
)
EX2:
levels(gss_cat$marital)
## [1] "No answer" "Never married" "Separated" "Divorced"
## [5] "Widowed" "Married"
gss_cat %>%
count(marital, sort = TRUE)
## # A tibble: 6 x 2
## marital n
## <fct> <int>
## 1 Married 10117
## 2 Never married 5416
## 3 Divorced 3383
## 4 Widowed 1807
## 5 Separated 743
## 6 No answer 17
EX3:
avg_delay <- flights %>%
group_by(dest) %>%
summarise(avg_arr_delay = mean(arr_delay, na.rm = TRUE)) %>%
ungroup() %>%
arrange(avg_arr_delay)
avg_delay$dest <- factor(avg_delay$dest, levels = avg_delay$dest)
ggplot(avg_delay, aes(x = dest, y = avg_arr_delay)) +
geom_col(fill = "blue") +
labs(
x = "Destination Airport",
y = "Average Arrival Delay (minutes)",
title = "Average Arrival Delay by Destination Airport"
) +
theme_minimal()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_col()`).
levels(gss_cat$rincome)
## [1] "No answer" "Don't know" "Refused" "$25000 or more"
## [5] "$20000 - 24999" "$15000 - 19999" "$10000 - 14999" "$8000 to 9999"
## [9] "$7000 to 7999" "$6000 to 6999" "$5000 to 5999" "$4000 to 4999"
## [13] "$3000 to 3999" "$1000 to 2999" "Lt $1000" "Not applicable"
gss_cat <- gss_cat %>%
mutate(rincome_cat = fct_collapse(
rincome,
"$10000 or more" = c("$10000 - 14999", "$15000 - 19999",
"$20000 - 24999", "$25000 or more"),
"less than $10000" = c("$8000 to 9999", "$7000 to 7999",
"$6000 to 6999", "$5000 to 5999",
"$4000 to 4999", "$3000 to 3999",
"$1000 to 2999", "Lt $1000"),
"Others" = c("No answer", "Don't know", "Refused", "Not applicable")
))