# excel file
data <- read_excel("Salaries.xlsx")
data
## # A tibble: 397 × 6
## rank discipline yrs.since.phd yrs.service sex salary
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 Prof B 19 18 Male 139750
## 2 Prof B 20 16 Male 173200
## 3 AsstProf B 4 3 Male 79750
## 4 Prof B 45 39 Male 115000
## 5 Prof B 40 41 Male 141500
## 6 AssocProf B 6 6 Male 97000
## 7 Prof B 30 23 Male 175000
## 8 Prof B 45 45 Male 147765
## 9 Prof B 21 20 Male 119250
## 10 Prof B 18 18 Female 129000
## # … with 387 more rows
# The variable with the least amount of unique observations is yrs.service..
data <- data %>%
group_by(yrs.service) %>%
summarise(avg_rank = mean(rank, na.rm = TRUE))
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
## Warning in mean.default(rank, na.rm = TRUE): argument is not numeric or logical:
## returning NA
Make two bar charts here - one before ordering another after
# Transform data: calculate average salary by rank
data <- data %>%
group_by("rank") %>%
summarise(
avg_salary = mean("salary", na.rm = TRUE)
)
## Warning in mean.default("salary", na.rm = TRUE): argument is not numeric or
## logical: returning NA
data
## # A tibble: 1 × 2
## `"rank"` avg_salary
## <chr> <dbl>
## 1 rank NA
# Plot before ordering
data %>%
ggplot(aes(x = "rank", y = "salary")) +
geom_point()
# Plot Before Ordering
data %>%
ggplot(aes(x = "rank", y = "yrs.service")) +
geom_point()
# Plot After Ordering
data %>%
ggplot(aes(x = "rank", y = fct_reorder(.f = "yrs.service", .x = "rank"))) +
geom_point() +
# Labeling
labs(y = NULL, x = "Average Years of Service")
Show examples of three functions:
# fct_recode
data %>% distinct("yrs.service")
## # A tibble: 1 × 1
## `"yrs.service"`
## <chr>
## 1 yrs.service
# fct_collapse
data %>% mutate("yrs.service")
## # A tibble: 1 × 3
## `"rank"` avg_salary `"yrs.service"`
## <chr> <dbl> <chr>
## 1 rank NA yrs.service
# fct_lump
data %>% count("yrs.service")
## # A tibble: 1 × 2
## `"yrs.service"` n
## <chr> <int>
## 1 yrs.service 1
No need to do anything here.