library("dplyr")
library("DT")
library("psych")
library("ggplot2")
library("forcats")I decided to use a publicly available data set containing employment data of most Public Officers of the Government of Belize for 2014.
I removed the names of the employees as a matter of courtesy and good will.
# Load data
employees <- read.csv("data/salaries.csv", sep = ",", header = TRUE, stringsAsFactors = FALSE, quote = '"')
employees$Gender <- factor(employees$Gender,
levels=c("M","F"),
labels = c("Male", "Female"))
# Create factor for gender
employees <- filter(employees, !is.na(Gender))
# We are only interested in established and temporary employment types
employees <- filter(employees, employees$Employment.Type %in% c("EST", "TMPR"))
# Create factor for the two types of employment
employees$Employment.Type <- factor(employees$Employment.Type,
levels=c("EST","TMPR"),
labels = c("Established", "Temporary"))
# Create factor for the education levels
employees$Education <- factor(employees$Education,
levels=c("AS","BA","BED","BS",
"CER","CLE","DIPL","DR",
"HS","LD","LLB","LLM",
"MA","MD","MED","MSC",
"PG","PHD","PSLC","PGC",
"PGD", "DIPED", "INTM","PM",
"XM", "EXDIP"))
# Group educational levels for simplicity
employees <- mutate(employees, Educational.Level = fct_collapse(employees$Education,
Primary = c("PSLC"),
Secondary = c("DIPL", "HS"),
Certificate = c("CER", "CLE"),
Associate = c("AS"),
Undergraduate = c("BA", "BED", "BS", "LLB"),
Diploma = c("DIPED", "EXDIP" ),
Graduate = c("MA","MED","MSC","INTM","PM", "XM", "LD", "LLM"),
PostGraduate = c("PG", "PGD", "PGC"),
Doctorate = c("PHD", "DR", "MD")
))
# Convert numeric variables to numerica values
employees$Pay.Scale <- as.numeric(employees$Pay.Scale)
employees$Annual.Salary <- as.numeric(employees$Annual.Salary)
# We are interested in senior employees. A payscale above 9 is considered senior.
senior.employees <- filter(employees, employees$Pay.Scale > 8)datatable(sample(select(senior.employees, Gender, Pay.Scale, Educational.Level, Employment.Type)), options = list(filter = FALSE))Is there a gender bias in the appointment of senior public officers of Belize?
Each case represents a staff member of the Public Service of Belize. There are 3158 observations in this data set.
Information of employees of the Government of Belize is entered into the HRMIS, a component of the Belize Public Sector Reform Strategy and a modernization initiative of the Government of Belize. This data is collected and managed by the Government of Belize.
This is an observational study of data that was collected by the Gonvernment of Belize.
This data is collected by Belize’s Ministry of Finance, Public Service, Energy and Public Utilities.
The response variable is pay scale, which is a formal indicator of seniority as per the Public Service Regulations of Belize.
Pay scale is a numerical variable.
The explanatory variable is gender and it is categorical.
Employment type and level of education may also be considered explanatory variables. It is my intent to consider the overall rate and seniority rate within the two groups of employment types, and also within the different levels of education. Both employment type and level of education are categorical variables.
summary(employees$Gender )## Male Female
## 1456 1702
summary(senior.employees$Gender )## Male Female
## 523 823
describeBy(senior.employees$Pay.Scale,
group = list(senior.employees$Gender,senior.employees$Employment.Type))##
## Descriptive statistics by group
## : Male
## : Established
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 432 15.61 4.93 16 15.35 7.41 9 26 17 0.27 -1.12
## se
## X1 0.24
## --------------------------------------------------------
## : Female
## : Established
## vars n mean sd median trimmed mad min max range skew kurtosis
## X1 1 684 15.29 4.39 16 14.98 5.93 9 26 17 0.34 -0.86
## se
## X1 0.17
## --------------------------------------------------------
## : Male
## : Temporary
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 91 14.04 4.41 12 13.71 2.97 9 23 14 0.46 -1.36 0.46
## --------------------------------------------------------
## : Female
## : Temporary
## vars n mean sd median trimmed mad min max range skew kurtosis se
## X1 1 139 13.81 4 12 13.44 2.97 9 23 14 0.5 -1.04 0.34
select(senior.employees, Gender) %>%
group_by(Gender) %>%
summarise(Rate = n()/nrow(employees)) %>%
ggplot(aes(x = Gender, y = Rate,fill=Gender)) +
geom_bar(stat = "identity") +
labs(title = "Rate of Seniority by Gender")select(senior.employees, Gender, Educational.Level) %>%
group_by(Gender,Educational.Level) %>%
summarise(Rate = n()/nrow(senior.employees)) %>%
ggplot(aes(x = Gender, y = Rate,fill=Educational.Level)) +
geom_bar(stat = "identity") +
labs(title = "Distribution of Seniority by Gender & Education Level")