# excel file
data <- read_excel("../00_data/Salaries.xlsx")
data
## # A tibble: 397 × 6
## rank discipline yrs.since.phd yrs.service sex salary
## <chr> <chr> <dbl> <dbl> <chr> <dbl>
## 1 Prof B 19 18 Male 139750
## 2 Prof B 20 16 Male 173200
## 3 AsstProf B 4 3 Male 79750
## 4 Prof B 45 39 Male 115000
## 5 Prof B 40 41 Male 141500
## 6 AssocProf B 6 6 Male 97000
## 7 Prof B 30 23 Male 175000
## 8 Prof B 45 45 Male 147765
## 9 Prof B 21 20 Male 119250
## 10 Prof B 18 18 Female 129000
## # ℹ 387 more rows
Do male or female professors make more than one another
names(data)
## [1] "rank" "discipline" "yrs.since.phd" "yrs.service"
## [5] "sex" "salary"
# Summarize average salary by sex
salary_summary <- data |>
group_by(sex) |>
summarise(
mean_salary = mean(salary, na.rm = TRUE),
sd_salary = sd(salary, na.rm = TRUE),
n = n()
)
salary_summary
## # A tibble: 2 × 4
## sex mean_salary sd_salary n
## <chr> <dbl> <dbl> <int>
## 1 Female 101002. 25952. 39
## 2 Male 115090. 30437. 358
# Boxplot comparing salary distributions
ggplot(data, aes(x = sex, y = salary, fill = sex)) +
geom_boxplot(alpha = 0.7, outlier.color = "red") +
labs(
title = "Salary Comparison by Sex",
x = "Sex",
y = "Salary",
caption = "Each box shows salary distribution for male vs female professors"
) +
theme_minimal() +
theme(legend.position = "none")
basing from the data shown male teachers have a bit higher of a salary than female teachers.