data <- read_excel("../00_data/my_data.xlsx")
data
## # A tibble: 450 × 5
## service component severity diagnosed year
## <chr> <chr> <chr> <chr> <dbl>
## 1 Army Active Penetrating 189 2006
## 2 Army Active Severe 102 2006
## 3 Army Active Moderate 709 2006
## 4 Army Active Mild 5896 2006
## 5 Army Active Not Classifiable 122 2006
## 6 Army Guard Penetrating 33 2006
## 7 Army Guard Severe 26 2006
## 8 Army Guard Moderate 177 2006
## 9 Army Guard Mild 1332 2006
## 10 Army Guard Not Classifiable 29 2006
## # ℹ 440 more rows
data %>% count(service)
## # A tibble: 4 × 2
## service n
## <chr> <int>
## 1 Air Force 135
## 2 Army 135
## 3 Marines 90
## 4 Navy 90
service_levels <- c("Navy", "Army", "Marines", "Air Force")
data_rev <- data %>%
mutate(service = service %>% factor(levels = service_levels))
Make two bar charts here - one before ordering another after
data_rev %>%
group_by(service) %>%
summarise(
year = mean(year, na.rm = TRUE)
)
## # A tibble: 4 × 2
## service year
## <fct> <dbl>
## 1 Navy 2010
## 2 Army 2010
## 3 Marines 2010
## 4 Air Force 2010
data_rev
## # A tibble: 450 × 5
## service component severity diagnosed year
## <fct> <chr> <chr> <chr> <dbl>
## 1 Army Active Penetrating 189 2006
## 2 Army Active Severe 102 2006
## 3 Army Active Moderate 709 2006
## 4 Army Active Mild 5896 2006
## 5 Army Active Not Classifiable 122 2006
## 6 Army Guard Penetrating 33 2006
## 7 Army Guard Severe 26 2006
## 8 Army Guard Moderate 177 2006
## 9 Army Guard Mild 1332 2006
## 10 Army Guard Not Classifiable 29 2006
## # ℹ 440 more rows
ggplot(data_rev, aes(year, service)) + geom_point()
ggplot(data_rev, aes(year, fct_reorder(service, year))) + geom_point()
Show examples of three functions:
data_rev %>%
mutate(service = fct_recode(service,
"USNavy" = "Navy",
"USArmy" = "Army",
"USMarines" = "Marines",
"USAir Force" = "Air Force")) %>%
count(service)
## # A tibble: 4 × 2
## service n
## <fct> <int>
## 1 USNavy 90
## 2 USArmy 135
## 3 USMarines 90
## 4 USAir Force 135
data %>%
mutate(service = fct_collapse(service,
USNavy = "Navy",
Other = c("USArmy", "USMarines", "USAir Force"))) %>%
count(service)
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `service = fct_collapse(...)`.
## Caused by warning:
## ! Unknown levels in `f`: USArmy, USMarines, USAir Force
## # A tibble: 4 × 2
## service n
## <fct> <int>
## 1 Air Force 135
## 2 Army 135
## 3 Marines 90
## 4 USNavy 90
data %>%
mutate(rank = fct_lump(service)) %>%
count(service)
## # A tibble: 4 × 2
## service n
## <chr> <int>
## 1 Air Force 135
## 2 Army 135
## 3 Marines 90
## 4 Navy 90
No need to do anything here.