#install.packages("tidyverse")
#install.packages("tinyvex")
#install.packages("RColorBrewer")
#install.packages("highcharter")
#install.packages("ggplot2")
#install.packages("ggplot1")
#install.packages("ggrepel")
#install.packages("ggthemes")
#install.packages("plotly")
#install.packages("rvest")
#install.packages("shiny")
#install.packages("urltools")
#install.packages("tmap")
#install.packages("tmaptools")
#install.packages("leaflet")
#install.packages("scales")
#install.packages("leaflet.extras")
#install.packages("rio")
#install.packages("htmlwidgets")
#install.packages("sf")
#install.packages("dplyr")
#install.packages("sp")
#tinytex::install_tinytex()
#install.packages("psych")
#install.packages("janitor")
#install.packages("CGPfunctions")
#install.packages("vtree")
#install.packages("ggpubr")
#install.packages("plotly")
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(tinytex)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.7 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(RColorBrewer)
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(ggplot2)
library(ggrepel)
library(ggthemes)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(rvest)
##
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
##
## guess_encoding
library(shiny)
library(urltools)
library(tmap)
library(tmaptools)
library(leaflet)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(leaflet.extras)
library(rio)
##
## Attaching package: 'rio'
## The following object is masked from 'package:plotly':
##
## export
library(htmlwidgets)
library(sf)
## Linking to GEOS 3.9.1, GDAL 3.3.2, PROJ 7.2.1; sf_use_s2() is TRUE
library(dplyr)
library(sp)
library(psych)
##
## Attaching package: 'psych'
## The following objects are masked from 'package:scales':
##
## alpha, rescale
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
library(ggpubr)
library(tidyverse)
setwd("C:/Users/tgoode/Desktop")
salaries <- read_csv("employeesalaries2021.csv")
## Rows: 9907 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): Department, Department_Name, Division, Gender, Grade
## dbl (3): Base_Salary, 2021_Overtime_Pay, 2021_Longevity_Pay
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(salaries)
## Rows: 9,907
## Columns: 8
## $ Department <chr> "ABS", "ABS", "ABS", "ABS", "ABS", "ABS", "ABS", …
## $ Department_Name <chr> "Alcohol Beverage Services", "Alcohol Beverage Se…
## $ Division <chr> "Beer Loading", "Liquor and Wine Delivery Operati…
## $ Gender <chr> "M", "M", "M", "M", "M", "M", "M", "M", "M", "M",…
## $ Base_Salary <dbl> 87969.00, 80086.00, 80086.00, 70814.00, 76419.00,…
## $ `2021_Overtime_Pay` <dbl> 32953.25, 32656.04, 31369.15, 29837.85, 21379.22,…
## $ `2021_Longevity_Pay` <dbl> NA, 1105.44, 1591.20, NA, NA, NA, NA, NA, 3022.88…
## $ Grade <chr> "20", "18", "18", "18", "17", "20", "17", "20", "…
str(salaries)
## spec_tbl_df [9,907 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Department : chr [1:9907] "ABS" "ABS" "ABS" "ABS" ...
## $ Department_Name : chr [1:9907] "Alcohol Beverage Services" "Alcohol Beverage Services" "Alcohol Beverage Services" "Alcohol Beverage Services" ...
## $ Division : chr [1:9907] "Beer Loading" "Liquor and Wine Delivery Operations" "Beer Delivery Operations" "Beer Delivery Operations" ...
## $ Gender : chr [1:9907] "M" "M" "M" "M" ...
## $ Base_Salary : num [1:9907] 87969 80086 80086 70814 76419 ...
## $ 2021_Overtime_Pay : num [1:9907] 32953 32656 31369 29838 21379 ...
## $ 2021_Longevity_Pay: num [1:9907] NA 1105 1591 NA NA ...
## $ Grade : chr [1:9907] "20" "18" "18" "18" ...
## - attr(*, "spec")=
## .. cols(
## .. Department = col_character(),
## .. Department_Name = col_character(),
## .. Division = col_character(),
## .. Gender = col_character(),
## .. Base_Salary = col_double(),
## .. `2021_Overtime_Pay` = col_double(),
## .. `2021_Longevity_Pay` = col_double(),
## .. Grade = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
mean(salaries$Base_Salary)
## [1] 81339.53
median(salaries$Base_Salary)
## [1] 78789.38
sd(salaries$Base_Salary)
## [1] 30639.34
var(salaries$Base_Salary)
## [1] 938768931
summary(salaries)
## Department Department_Name Division Gender
## Length:9907 Length:9907 Length:9907 Length:9907
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Base_Salary 2021_Overtime_Pay 2021_Longevity_Pay Grade
## Min. : 11147 Min. : 1 Min. : 19.93 Length:9907
## 1st Qu.: 59939 1st Qu.: 1091 1st Qu.: 1981.28 Class :character
## Median : 78789 Median : 4818 Median : 3164.72 Mode :character
## Mean : 81340 Mean : 10561 Mean : 3674.41
## 3rd Qu.: 96665 3rd Qu.: 12669 3rd Qu.: 5419.68
## Max. :262000 Max. :219326 Max. :12100.40
## NA's :4312 NA's :7494
salaries %>% count(Gender, sort = TRUE)
## # A tibble: 2 × 2
## Gender n
## <chr> <int>
## 1 M 5806
## 2 F 4101
tabyl(salaries, Gender, Department)
## Gender ABS BOA BOE CAT CCL CEC CEX COR CUS DEP DGS DHS DOT DPS ECM FIN FRS
## F 96 3 20 53 66 37 30 197 14 72 66 6 256 61 1 81 109
## M 357 0 6 23 43 57 17 313 10 94 337 8 1036 167 1 38 1265
## HCA HHS HRC IGR LIB MPB NDA OAG OAS OCP OHR OIG OLO OLR OMB ORE PIO POL PRO
## 52 1489 4 4 278 1 11 3 53 9 44 8 10 2 22 4 36 690 13
## 44 288 5 0 79 1 9 6 19 9 16 5 6 6 16 0 31 1186 14
## REC SHF TBS ZAH
## 89 57 51 3
## 66 141 86 1
Used to select department for gender wage gap comparison. The closest to 50% each gender will be considered for further review
tabyl(salaries, Gender, Department) %>%
adorn_percentages("col") %>%
adorn_pct_formatting(digits = 1)
## Gender ABS BOA BOE CAT CCL CEC CEX COR CUS DEP DGS
## F 21.2% 100.0% 76.9% 69.7% 60.6% 39.4% 63.8% 38.6% 58.3% 43.4% 16.4%
## M 78.8% 0.0% 23.1% 30.3% 39.4% 60.6% 36.2% 61.4% 41.7% 56.6% 83.6%
## DHS DOT DPS ECM FIN FRS HCA HHS HRC IGR LIB MPB NDA
## 42.9% 19.8% 26.8% 50.0% 68.1% 7.9% 54.2% 83.8% 44.4% 100.0% 77.9% 50.0% 55.0%
## 57.1% 80.2% 73.2% 50.0% 31.9% 92.1% 45.8% 16.2% 55.6% 0.0% 22.1% 50.0% 45.0%
## OAG OAS OCP OHR OIG OLO OLR OMB ORE PIO POL PRO REC
## 33.3% 73.6% 50.0% 73.3% 61.5% 62.5% 25.0% 57.9% 100.0% 53.7% 36.8% 48.1% 57.4%
## 66.7% 26.4% 50.0% 26.7% 38.5% 37.5% 75.0% 42.1% 0.0% 46.3% 63.2% 51.9% 42.6%
## SHF TBS ZAH
## 28.8% 37.2% 75.0%
## 71.2% 62.8% 25.0%
Used to select department for gender wage gap comparison. The closest to 50% each gender will be selected
tabyl(salaries, Gender, Department_Name) %>%
adorn_percentages("col") %>%
adorn_pct_formatting(digits = 1)
## Gender Alcohol Beverage Services Board of Appeals Department
## F 21.2% 100.0%
## M 78.8% 0.0%
## Board of Elections Community Engagement Cluster
## 76.9% 39.4%
## 23.1% 60.6%
## Community Use of Public Facilities Correction and Rehabilitation
## 58.3% 38.6%
## 41.7% 61.4%
## County Attorney's Office County Council Department of Environmental Protection
## 69.7% 60.6% 43.4%
## 30.3% 39.4% 56.6%
## Department of Finance Department of General Services
## 68.1% 16.4%
## 31.9% 83.6%
## Department of Health and Human Services
## 83.8%
## 16.2%
## Department of Housing and Community Affairs Department of Permitting Services
## 54.2% 26.8%
## 45.8% 73.2%
## Department of Police Department of Public Libraries Department of Recreation
## 36.8% 77.9% 57.4%
## 63.2% 22.1% 42.6%
## Department of Technology and Enterprise Business Solutions
## 37.2%
## 62.8%
## Department of Transportation Ethics Commission Fire and Rescue Services
## 19.8% 50.0% 7.9%
## 80.2% 50.0% 92.1%
## Merit System Protection Board Department Non-Departmental Account
## 50.0% 55.0%
## 50.0% 45.0%
## Office of Agriculture Office of Animal Services Office of Consumer Protection
## 33.3% 73.6% 50.0%
## 66.7% 26.4% 50.0%
## Office of Emergency Management and Homeland Security Office of Human Resources
## 42.9% 73.3%
## 57.1% 26.7%
## Office of Human Rights Office of Intergovernmental Relations Department
## 44.4% 100.0%
## 55.6% 0.0%
## Office of Labor Relations Office of Legislative Oversight
## 25.0% 62.5%
## 75.0% 37.5%
## Office of Management and Budget Office of Procurement
## 57.9% 48.1%
## 42.1% 51.9%
## Office of Public Information Office of Racial Equity and Social Justice
## 53.7% 100.0%
## 46.3% 0.0%
## Office of the Inspector General Office of Zoning and Administrative Hearings
## 61.5% 75.0%
## 38.5% 25.0%
## Offices of the County Executive Sheriff's Office
## 63.8% 28.8%
## 36.2% 71.2%
library(CGPfunctions)
PlotXTabs(salaries, Gender, Department)
## Plotted dataset salaries variables Gender by Department
###### The departments with an even distribution by gender are too small
to use for a comparison of wages by gender with a department. For
example, the Merit System Protection Board (MPB) has two employees
working within different pay bands.
library(vtree)
vtree(salaries, c("Gender", "Department"),
fillcolor = c( Department = "#e7d4e8", Gender = "#99d8c9"),
horiz = FALSE)
tabyl(salaries, Gender, Grade)
## Gender 0 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27
## F 33 164 9 72 179 50 380 324 82 250 64 200 306 58 295 477 251 98 14
## M 52 84 18 138 79 163 723 106 89 175 206 117 233 78 205 119 159 96 21
## 28 29 30 31 32 33 34 35 36 38 39 40 5 7 8 9 A1 A2 A3 B1 B2 B3 B4 B6 C1
## 76 2 10 0 31 0 0 0 0 0 0 2 4 0 31 1 15 8 3 6 8 3 3 0 7
## 94 2 7 2 23 1 3 1 1 1 3 0 11 2 12 29 135 29 17 120 144 27 10 3 16
## C2 C3 C4 C5 C6 D1 D2 D3 D4 F1 F2 F3 F3P F4 G1 G2 G3 G4 M1 M2 M3 MD2 MD3
## 1 6 10 42 11 6 4 1 0 5 22 18 1 10 0 3 2 12 12 71 133 1 3
## 3 14 62 107 36 25 9 4 1 58 205 439 0 206 3 9 20 61 14 69 132 0 2
## MD4 P1 P2 P3 P4 P4P P5
## 2 6 16 18 153 8 8
## 2 19 59 59 571 1 62
Used to select department for gender wage gap comparison. The closest to 50% each gender will be considered for further review
tabyl(salaries, Gender, Grade) %>%
adorn_percentages("col") %>%
adorn_pct_formatting(digits = 1)
## Gender 0 10 11 12 13 14 15 16 17 18 19 20
## F 38.8% 66.1% 33.3% 34.3% 69.4% 23.5% 34.5% 75.3% 48.0% 58.8% 23.7% 63.1%
## M 61.2% 33.9% 66.7% 65.7% 30.6% 76.5% 65.5% 24.7% 52.0% 41.2% 76.3% 36.9%
## 21 22 23 24 25 26 27 28 29 30 31 32
## 56.8% 42.6% 59.0% 80.0% 61.2% 50.5% 40.0% 44.7% 50.0% 58.8% 0.0% 57.4%
## 43.2% 57.4% 41.0% 20.0% 38.8% 49.5% 60.0% 55.3% 50.0% 41.2% 100.0% 42.6%
## 33 34 35 36 38 39 40 5 7 8 9
## 0.0% 0.0% 0.0% 0.0% 0.0% 0.0% 100.0% 26.7% 0.0% 72.1% 3.3%
## 100.0% 100.0% 100.0% 100.0% 100.0% 100.0% 0.0% 73.3% 100.0% 27.9% 96.7%
## A1 A2 A3 B1 B2 B3 B4 B6 C1 C2 C3 C4 C5
## 10.0% 21.6% 15.0% 4.8% 5.3% 10.0% 23.1% 0.0% 30.4% 25.0% 30.0% 13.9% 28.2%
## 90.0% 78.4% 85.0% 95.2% 94.7% 90.0% 76.9% 100.0% 69.6% 75.0% 70.0% 86.1% 71.8%
## C6 D1 D2 D3 D4 F1 F2 F3 F3P F4 G1 G2
## 23.4% 19.4% 30.8% 20.0% 0.0% 7.9% 9.7% 3.9% 100.0% 4.6% 0.0% 25.0%
## 76.6% 80.6% 69.2% 80.0% 100.0% 92.1% 90.3% 96.1% 0.0% 95.4% 100.0% 75.0%
## G3 G4 M1 M2 M3 MD2 MD3 MD4 P1 P2 P3 P4 P4P
## 9.1% 16.4% 46.2% 50.7% 50.2% 100.0% 60.0% 50.0% 24.0% 21.3% 23.4% 21.1% 88.9%
## 90.9% 83.6% 53.8% 49.3% 49.8% 0.0% 40.0% 50.0% 76.0% 78.7% 76.6% 78.9% 11.1%
## P5
## 11.4%
## 88.6%
library(CGPfunctions)
PlotXTabs(salaries, Gender, Grade)
## Plotted dataset salaries variables Gender by Grade
salaries %>%
filter(Grade == 17)
## # A tibble: 171 × 8
## Department Department_Name Division Gender Base_Salary `2021_Overtime…`
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 ABS Alcohol Beverage Ser… Stock L… M 76419 21379.
## 2 ABS Alcohol Beverage Ser… Liquor … M 76419 17666.
## 3 ABS Alcohol Beverage Ser… Special… M 58301. 15625.
## 4 ABS Alcohol Beverage Ser… Beer Wa… M 55152. 11309.
## 5 CCL County Council Council… F 69643. 7357.
## 6 CCL County Council Council… F 76419 82.7
## 7 CCL County Council Council… M 61428 NA
## 8 CCL County Council Council… F 64513. NA
## 9 CCL County Council CCL 01 … F 76419 NA
## 10 CCL County Council Council… F 76419 NA
## # … with 161 more rows, and 2 more variables: `2021_Longevity_Pay` <dbl>,
## # Grade <chr>
salaries %>%
filter(Grade == 17) %>%
ggplot() +
geom_point(aes(x=Base_Salary, y=Grade, color=Gender)) +
ggtitle("2021 Salaries in Pay Grade 17") +
ylab("Pay Grade 17") +
xlab("2021 Salaries") +
labs(fill = "Gender")
salaries %>%
filter(Grade == 26)
## # A tibble: 194 × 8
## Department Department_Name Division Gender Base_Salary `2021_Overtime…`
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 ABS Alcohol Beverage Ser… IT Admi… F 116825 NA
## 2 ABS Alcohol Beverage Ser… Adminis… M 116825 NA
## 3 CCL County Council CCL 01 … F 17963 NA
## 4 CCL County Council CCL 01 … F 48000 NA
## 5 CCL County Council CCL 01 … M 52816. NA
## 6 CCL County Council CCL 01 … F 55825 NA
## 7 CCL County Council CCL 01 … F 63497. NA
## 8 CCL County Council CCL 01 … M 68000 NA
## 9 CCL County Council CCL 01 … M 68284. NA
## 10 CCL County Council CCL 01 … M 78189. NA
## # … with 184 more rows, and 2 more variables: `2021_Longevity_Pay` <dbl>,
## # Grade <chr>
salaries %>%
filter(Grade == 26) %>%
ggplot() +
geom_point(aes(x=Base_Salary, y=Grade, color=Gender)) +
ggtitle("2021 Salaries in Pay Grade 26") +
ylab("Pay Grade 26") +
xlab("2021 Salaries") +
labs(fill = "Gender")
salaries %>%
filter(Grade == 29)
## # A tibble: 4 × 8
## Department Department_Name Division Gender Base_Salary `2021_Overtime…`
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 FIN Department of Finance Divisio… M 134750 NA
## 2 FRS Fire and Rescue Servi… Behavio… F 105560 NA
## 3 HHS Department of Health … Childre… F 134750 NA
## 4 OAS Office of Animal Serv… Animal … M 131316. 931.
## # … with 2 more variables: `2021_Longevity_Pay` <dbl>, Grade <chr>
salaries %>%
filter(Grade == 29) %>%
ggplot() +
geom_point(aes(x=Base_Salary, y=Grade, color=Gender)) +
ggtitle("2021 Salaries in Pay Grade 29") +
ylab("Pay Grade 29") +
xlab("2021 Salaries") +
labs(fill = "Gender")
salaries %>%
filter(Grade == "M2")
## # A tibble: 140 × 8
## Department Department_Name Division Gender Base_Salary `2021_Overtime…`
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 ABS Alcohol Beverage Ser… Licensu… F 147755. NA
## 2 ABS Alcohol Beverage Ser… ABS 85 … F 166656. NA
## 3 ABS Alcohol Beverage Ser… Retail … M 166729. NA
## 4 ABS Alcohol Beverage Ser… Adminis… M 169856 NA
## 5 ABS Alcohol Beverage Ser… Wholesa… M 169856 NA
## 6 CAT County Attorney's Of… Health … M 169853. NA
## 7 CAT County Attorney's Of… Insuran… F 169854. NA
## 8 CAT County Attorney's Of… Human R… F 169856 NA
## 9 CAT County Attorney's Of… Governm… M 169856 NA
## 10 CAT County Attorney's Of… Zoning,… M 169856 NA
## # … with 130 more rows, and 2 more variables: `2021_Longevity_Pay` <dbl>,
## # Grade <chr>
salaries %>%
filter(Grade == "M2") %>%
ggplot() +
geom_point(aes(x=Base_Salary, y=Grade, color=Gender)) +
ggtitle("2021 Salaries in Pay Grade M2") +
ylab("Pay Grade M2") +
xlab("2021 Salaries") +
labs(fill = "Gender")
salaries %>%
filter(Grade == "M3")
## # A tibble: 265 × 8
## Department Department_Name Division Gender Base_Salary `2021_Overtime…`
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 ABS Alcohol Beverage Ser… Wholesa… M 126976. NA
## 2 ABS Alcohol Beverage Ser… Adminis… M 129102. NA
## 3 ABS Alcohol Beverage Ser… Wholesa… M 133219. NA
## 4 ABS Alcohol Beverage Ser… Adminis… F 143447. NA
## 5 ABS Alcohol Beverage Ser… IT Admi… M 146923 NA
## 6 ABS Alcohol Beverage Ser… Adminis… M 146923 NA
## 7 BOA Board of Appeals Dep… Executi… F 146923 NA
## 8 BOE Board of Elections Directo… F 146917. NA
## 9 CAT County Attorney's Of… County … F 146921. NA
## 10 CCL County Council Council… F 99274. NA
## # … with 255 more rows, and 2 more variables: `2021_Longevity_Pay` <dbl>,
## # Grade <chr>
salaries %>%
filter(Grade == "M3") %>%
ggplot() +
geom_point(aes(x=Base_Salary, y=Grade, color=Gender)) +
ggtitle("2021 Salaries in Pay Grade M3") +
ylab("Pay Grade M3") +
xlab("2021 Salaries") +
labs(fill = "Gender")
salaries %>%
filter(Grade == "MD4")
## # A tibble: 4 × 8
## Department Department_Name Division Gender Base_Salary `2021_Overtime…`
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 HHS Department of Health … Behavio… F 110683. 6661.
## 2 HHS Department of Health … Behavio… M 113656 NA
## 3 HHS Department of Health … Public … F 204581. NA
## 4 HHS Department of Health … Behavio… M 227312 NA
## # … with 2 more variables: `2021_Longevity_Pay` <dbl>, Grade <chr>
salaries %>%
filter(Grade == "MD4") %>%
ggplot() +
geom_point(aes(x=Base_Salary, y=Grade, color=Gender)) +
ggtitle("2021 Salaries in Pay Grade MD4") +
ylab("Pay Grade MD4") +
xlab("2021 Salaries") +
labs(fill = "Gender")
salaries2 <- salaries %>%
filter(Grade %in% c("17", "26", "29", "M2", "M3", "MD4"))
salaries2
## # A tibble: 778 × 8
## Department Department_Name Division Gender Base_Salary `2021_Overtime…`
## <chr> <chr> <chr> <chr> <dbl> <dbl>
## 1 ABS Alcohol Beverage Ser… Stock L… M 76419 21379.
## 2 ABS Alcohol Beverage Ser… Liquor … M 76419 17666.
## 3 ABS Alcohol Beverage Ser… Special… M 58301. 15625.
## 4 ABS Alcohol Beverage Ser… Beer Wa… M 55152. 11309.
## 5 ABS Alcohol Beverage Ser… IT Admi… F 116825 NA
## 6 ABS Alcohol Beverage Ser… Adminis… M 116825 NA
## 7 ABS Alcohol Beverage Ser… Wholesa… M 126976. NA
## 8 ABS Alcohol Beverage Ser… Adminis… M 129102. NA
## 9 ABS Alcohol Beverage Ser… Wholesa… M 133219. NA
## 10 ABS Alcohol Beverage Ser… Adminis… F 143447. NA
## # … with 768 more rows, and 2 more variables: `2021_Longevity_Pay` <dbl>,
## # Grade <chr>
str(salaries2)
## spec_tbl_df [778 × 8] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Department : chr [1:778] "ABS" "ABS" "ABS" "ABS" ...
## $ Department_Name : chr [1:778] "Alcohol Beverage Services" "Alcohol Beverage Services" "Alcohol Beverage Services" "Alcohol Beverage Services" ...
## $ Division : chr [1:778] "Stock Liquor and Wine Warehouse Operations" "Liquor and WIne Loading" "Special Order Liquor and Wine Warehouse Operations" "Beer Warehouse Operations" ...
## $ Gender : chr [1:778] "M" "M" "M" "M" ...
## $ Base_Salary : num [1:778] 76419 76419 58301 55152 116825 ...
## $ 2021_Overtime_Pay : num [1:778] 21379 17666 15625 11309 NA ...
## $ 2021_Longevity_Pay: num [1:778] NA NA NA NA 3480 ...
## $ Grade : chr [1:778] "17" "17" "17" "17" ...
## - attr(*, "spec")=
## .. cols(
## .. Department = col_character(),
## .. Department_Name = col_character(),
## .. Division = col_character(),
## .. Gender = col_character(),
## .. Base_Salary = col_double(),
## .. `2021_Overtime_Pay` = col_double(),
## .. `2021_Longevity_Pay` = col_double(),
## .. Grade = col_character()
## .. )
## - attr(*, "problems")=<externalptr>
salaries2 %>%
filter(Grade %in% c("17", "26", "29", "M2", "M3", "MD4")) %>%
ggplot() +
geom_point(aes(x=Base_Salary, y=Grade, color=Gender)) +
ggtitle("2021 Salaries in Select Pay Grades by Gender") +
ylab("Select Pay Grades") +
xlab("2021 Salaries") +
labs(fill = "Gender")
The salaries noted in grade 0 are spread across divisions. They are primarily director level or higher positions.
salaries %>%
ggplot() +
geom_point(aes(x=Base_Salary/100, y=Grade, color=Gender)) +
ggtitle("2021 Salaries by Pay Grade") +
ylab("Pay Grade") +
xlab("2021 Salaries (000s)") +
labs(fill = "Gender")
Some sources calculate the gender wage gap using the median income for each gender. Other sources use the average income for each gender. All the sources divide women’s wages by men’s wages to identity the gender wage gap.
76809.92/80086
## [1] 0.959093
aggregate(x = salaries$Base_Salary,
by = list(salaries$Gender),
FUN = median)
## Group.1 x
## 1 F 76809.92
## 2 M 80086.00
79709.61/82490.81
## [1] 0.9662847
aggregate(x = salaries$Base_Salary,
by = list(salaries$Gender),
FUN = mean)
## Group.1 x
## 1 F 79709.61
## 2 M 82490.81
salaries2 %>%
ggplot() +
geom_boxplot(aes(x=Base_Salary/100, y=Grade, color=Gender, outcol="red")) +
ggtitle("2021 Salaries by Select Pay Grade") +
ylab("Select Pay Grade") +
xlab("2021 Salaries (000s)") +
labs(fill = "Gender")
## Warning: Ignoring unknown aesthetics: outcol
library(dplyr)
library(ggplot2)
library(plotly)
(ggplot_object <- salaries2 %>%
ggplot(aes(x = `2021_Overtime_Pay`, y = Grade)) +
geom_point(mapping = aes(color = Gender)) +
geom_smooth())+
ggtitle("Overtime Pay Distribution by Select Grades")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## Warning: Removed 647 rows containing non-finite values (stat_smooth).
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : span too small. fewer data values than degrees of freedom.
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : at 56.683
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : radius 16.137
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : pseudoinverse used at 56.683
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : neighborhood radius 4.017
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : reciprocal condition number 1
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : at 868.13
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : radius 16.137
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : all data on boundary of neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : There are other near singularities as well. 16.137
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : zero-width neighborhood. make span bigger
## Warning in simpleLoess(y, x, w, span, degree = degree, parametric =
## parametric, : zero-width neighborhood. make span bigger
## Warning: Computation failed in `stat_smooth()`:
## NA/NaN/Inf in foreign function call (arg 5)
## Warning: Removed 647 rows containing missing values (geom_point).
#### Overtime Pay Distribution by Select Grades ##### Using Plotly
salaries2 %>%
plot_ly(x = ~`2021_Overtime_Pay`, y = ~Grade, color = ~Gender, type = 'scatter')%>%
layout(title = 'Overtime Pay Distribution by Select Grades', plot_bgcolor = "#e5ecf6")
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
## Warning: Ignoring 647 observations
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
(ggplot_object <- salaries %>%
ggplot(aes(x = `2021_Overtime_Pay`, y = Grade)) +
geom_point(mapping = aes(color = Gender)) +
geom_smooth())+
ggtitle("Overtime Pay Distribution by Pay Grades")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## Warning: Removed 4312 rows containing non-finite values (stat_smooth).
## Warning: Computation failed in `stat_smooth()`:
## NA/NaN/Inf in foreign function call (arg 3)
## Warning: Removed 4312 rows containing missing values (geom_point).
#### Overtime Pay Distribution by Pay Grades ##### Using Plotly
salaries %>%
plot_ly(x = ~`2021_Overtime_Pay`, y = ~Grade, color = ~Gender, type = 'scatter')%>%
layout(title = 'Overtime Pay Distribution by Pay Grades', plot_bgcolor = "#e5ecf6")
## No scatter mode specifed:
## Setting the mode to markers
## Read more about this attribute -> https://plotly.com/r/reference/#scatter-mode
## Warning: Ignoring 4312 observations
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels
## Warning in RColorBrewer::brewer.pal(N, "Set2"): minimal value for n is 3, returning requested palette with 3 different levels