Import, clean, preliminary statistics
Import payroll and titles datasets
#import titles
dft <- data.frame(read.csv("20180305titles.csv"),stringsAsFactors = FALSE)
#import payroll
df <- data.frame(read.csv("20180425CivilList.csv"),stringsAsFactors = FALSE)
#payroll change factors to character
df %>% mutate_if(is.factor, as.character) -> df
## Warning: package 'bindrcpp' was built under R version 3.4.4
#payroll show head
df %>%
head(5)%>%
kable
| 2014 |
868 |
B J SANDIFORD |
DEPARTMENT OF CITYWIDE ADM |
12702 |
X |
$ 5.00 |
| 2014 |
868 |
C A WIGFALL |
DEPARTMENT OF CITYWIDE ADM |
12702 |
X |
$ 5.00 |
| 2014 |
69 |
A E A-AWOSOGBA |
HRA/DEPARTMENT OF SOCIAL S |
52311 |
A |
$ 51,955.00 |
| 2014 |
868 |
K D AABY |
DEPARTMENT OF CITYWIDE ADM |
10209 |
X |
$ 12.00 |
| 2014 |
56 |
I D AADIL |
POLICE DEPARTMENT |
71012 |
A |
$ 46,953.00 |
#titles show head
dft %>%
head(5)%>%
kable
| 56099 |
FIELD SUPERVISOR (SUMMER YOUTH |
1 |
14 |
14 |
ValidSalary |
SingleTitle |
HourlySalary |
Seasonal |
172 |
SEASONALS |
DC37 |
District Council 37 |
ActiveTitle |
| 56101 |
SENIOR FIELD SUPERVISOR (SUMME |
1 |
15 |
15 |
ValidSalary |
SingleTitle |
HourlySalary |
Seasonal |
172 |
SEASONALS |
DC37 |
District Council 37 |
ActiveTitle |
| 47 |
BOARD OF EDUCATION |
1 |
0 |
0 |
ZeroMinMax |
SingleTitle |
NoSalListed |
Annual |
998 |
PENDING ASSIGNMENT TO CBU |
N/U |
Non-Union |
ActiveTitle |
| 91406 |
CITY SEASONAL AIDE |
21 |
11 |
16 |
ValidSalary |
MultipleTitles |
HourlySalary |
Seasonal |
172 |
SEASONALS |
DC37 |
District Council 37 |
ActiveTitle |
| 71205 |
LIFE GUARD |
4 |
109 |
131 |
ValidSalary |
MultipleTitles |
HourlySalary |
Seasonal |
172 |
SEASONALS |
DC37 |
District Council 37 |
ActiveTitle |
Show structure and set column names
#payroll show structure
str(df)
## 'data.frame': 1161078 obs. of 7 variables:
## $ CALENDAR.YEAR: int 2014 2014 2014 2014 2014 2014 2014 2014 2014 2014 ...
## $ AGENCY.CODE : int 868 868 69 868 56 69 464 998 998 826 ...
## $ EMPLOYEE.NAME: chr "B J SANDIFORD" "C A WIGFALL" "A E A-AWOSOGBA" "K D AABY" ...
## $ AGENCY.NAME : chr "DEPARTMENT OF CITYWIDE ADM" "DEPARTMENT OF CITYWIDE ADM" "HRA/DEPARTMENT OF SOCIAL S" "DEPARTMENT OF CITYWIDE ADM" ...
## $ TITLE.CODE : chr "12702" "12702" "52311" "10209" ...
## $ PAY.CLASS : chr "X" "X" "A" "X" ...
## $ SALARY.RATE : chr "$ 5.00" "$ 5.00" "$ 51,955.00" "$ 12.00" ...
#title set column names
setnames(dft, "UniqueTitle", "title")
#payroll set column names
setnames(df, "CALENDAR.YEAR", "year")
setnames(df, "AGENCY.CODE", "agencycode")
setnames(df, "EMPLOYEE.NAME", "empname")
setnames(df, "AGENCY.NAME", "agency")
setnames(df, "TITLE.CODE", "title")
setnames(df, "PAY.CLASS", "payclass")
setnames(df, "SALARY.RATE", "sal")
Show count of records by year
#payroll show count by year
df %>%
group_by(year) %>%
summarise(total.count=n()) %>%
arrange((year))
## # A tibble: 4 x 2
## year total.count
## <int> <int>
## 1 2014 269838
## 2 2015 300653
## 3 2016 278869
## 4 2017 311718
Join Payroll and Titles tables
#left outer join to match by title
dfm<-merge(x = df, y = dft, by = "title", all.x = TRUE)
dfm %>% mutate_if(is.factor, as.character) -> dfm
#show structure of merged dataset. Should have same number of observations as payroll dataset -- 1161078
str(dfm)
## 'data.frame': 1161078 obs. of 20 variables:
## $ title : chr "" "" "" "" ...
## $ year : int 2014 2014 2014 2014 2014 2014 2015 2014 2014 2016 ...
## $ agencycode : int 993 993 993 993 993 993 993 993 998 998 ...
## $ empname : chr "L A CUBANO" "C DIELE" "R O ODIDI" "S J PUGLIESE" ...
## $ agency : chr "TRIBOROUGH BRIDGE AND TUNN" "TRIBOROUGH BRIDGE AND TUNN" "TRIBOROUGH BRIDGE AND TUNN" "TRIBOROUGH BRIDGE AND TUNN" ...
## $ payclass : chr "WK" "BW" "BW" "WK" ...
## $ sal : chr "$ 12.86" "$ 44,488.00" "$ 37,722.00" "$ 12.86" ...
## $ Description : chr NA NA NA NA ...
## $ CountSubTitles : int NA NA NA NA NA NA NA NA NA NA ...
## $ Min.MinRate : int NA NA NA NA NA NA NA NA NA NA ...
## $ Max.MaxRate : int NA NA NA NA NA NA NA NA NA NA ...
## $ SalaryMinMaxValid: chr NA NA NA NA ...
## $ MultipleSubtitles: chr NA NA NA NA ...
## $ PayType : chr NA NA NA NA ...
## $ PayPeriod : chr NA NA NA NA ...
## $ UnionCode : int NA NA NA NA NA NA NA NA NA NA ...
## $ UnionName : chr NA NA NA NA ...
## $ BargUnit : chr NA NA NA NA ...
## $ BargDesc : chr NA NA NA NA ...
## $ TitleStatus : chr NA NA NA NA ...
All years combined - show most common descriptions
dfm %>%
group_by(Description) %>%
summarise(total.count=n()) %>%
arrange(desc(total.count))
## # A tibble: 1,297 x 2
## Description total.count
## <chr> <int>
## 1 <NA> 313043
## 2 POLICE OFFICER (RECURRING NIGH 94447
## 3 MONITOR 43711
## 4 CORRECTION OFFICER 35845
## 5 FIREFIGHTER 33088
## 6 SANITATION WORKER 26144
## 7 COMMUNITY ASSOCIATE 20395
## 8 SCHOOL SAFETY AGENT 20208
## 9 COLLEGE ASSISTANT (NO LEAVE AC 19617
## 10 CLERICAL ASSOCIATE 18815
## # ... with 1,287 more rows
All years combined - show incomplete cases to be excluded
dfna <- dfm[is.na(dfm$Description),]
dfna %>%
group_by(agency) %>%
summarise(total.count=n()) %>%
arrange(desc(total.count))%>%
head(70)%>%
kable
| N.Y.C. TRANSIT AUTHORITY |
164234 |
| N.Y.C. TRANSIT AUTHORITY |
55587 |
| CUNY KINGSBOROUGH COMMMUNI |
12143 |
| CUNY LAGUARDIA COMMUNITY C |
11787 |
| CUNY MANHATTAN COMMUNITY C |
10557 |
| DEPARTMENT OF EDUCATION |
8969 |
| CUNY QUEENSBOROUGH COMMUNI |
7802 |
| CUNY BRONX COMMUNITY COLLE |
6320 |
| TRIBOROUGH BRIDGE AND TUNN |
6261 |
| CUNY HOSTOS COMMUNITY COLL |
4508 |
| DEPARTMENT OF EDUCATION |
3883 |
| DEPARTMENT OF PARKS & RECR |
2305 |
| POLICE DEPARTMENT |
1546 |
| POLICE DEPARTMENT |
1438 |
| OFFICE OF MANAGEMENT AND B |
1165 |
| CUNY HUNTER COLLEGE HIGH S |
1139 |
| CUNY COLLEGE OF STATEN ISL |
1137 |
| DEPARTMENT FOR THE AGING |
1135 |
| ADMINISTRATION FOR CHILDRE |
818 |
| N.Y.C. HOUSING AUTHORITY |
694 |
| HRA/DEPARTMENT OF SOCIAL S |
686 |
| DEPARTMENT OF ENVIRONMENTA |
628 |
| DEPARTMENT OF HEALTH AND M |
620 |
| OFFICE OF EMERGENCY MANAGE |
589 |
| DEPARTMENT OF TRANSPORTATI |
587 |
| DEPARTMENT FOR THE AGING |
509 |
| DEPARTMENT OF CITYWIDE ADM |
480 |
| DEPARTMENT OF DESIGN AND C |
430 |
| OFFICE OF THE MAYOR |
425 |
| DEPARTMENT OF CORRECTION |
304 |
| DEPARTMENT OF BUILDINGS |
299 |
| HOUSING PRESERVATION & DEV |
292 |
| N.Y.C. HOUSING AUTHORITY |
224 |
| DEPARTMENT OF INFORMATION |
201 |
| FIRE DEPARTMENT |
201 |
| DEPARTMENT OF HOMELESS SER |
186 |
| DEPARTMENT OF FINANCE |
177 |
| DEPARTMENT OF INFORMATION |
161 |
| DEPARTMENT OF BUILDINGS |
151 |
| CAMPAIGN FINANCE BOARD |
150 |
| DEPARTMENT OF CORRECTION |
136 |
| OFFICE OF THE COMPTROLLER |
118 |
| DISTRICT ATTORNEY-BRONX CO |
108 |
| DEPARTMENT OF SANITATION |
105 |
| DEPARTMENT OF YOUTH AND CO |
105 |
| OFFICE OF THE MAYOR |
100 |
| FIRE DEPARTMENT |
97 |
| DISTRICT ATTORNEY - SPECIA |
91 |
| BOROUGH PRESIDENT-RICHMOND |
82 |
| MAYOR’S OFFICE OF CONTRACT |
81 |
| INDEPENDENT BUDGET OFFICE |
76 |
| OFFICE OF THE COMPTROLLER |
73 |
| BOROUGH PRESIDENT-BROOKLYN |
69 |
| LAW DEPARTMENT |
69 |
| OFFICE OF LABOR RELATIONS |
69 |
| CAMPAIGN FINANCE BOARD |
68 |
| DEPARTMENT OF FINANCE |
68 |
| DEPARTMENT OF SANITATION |
60 |
| FINANCIAL INFORMATION SERV |
56 |
| DEPARTMENT OF PROBATION |
37 |
| OFFICE OF PAYROLL ADMINIST |
37 |
| MUNICIPAL WATER FINANCE AU |
36 |
| LAW DEPARTMENT |
35 |
| DISTRICT ATTORNEY-QUEENS C |
31 |
| CUNY CENTRAL OFFICE |
30 |
| BOROUGH PRESIDENT-QUEENS |
28 |
| DEPARTMENT OF BUSINESS SER |
28 |
| CITY CLERK |
27 |
| BOROUGH PRESIDENT-BRONX |
26 |
| INDEPENDENT BUDGET OFFICE |
24 |
dfna %>%
group_by(year) %>%
summarise(total.count=n()) %>%
head(5)%>%
kable
| 2014 |
75607 |
| 2015 |
77878 |
| 2016 |
76009 |
| 2017 |
83549 |
View joined table; complete cases only
#show complete cases only-- do not include NA rows
dfc<-dfm[complete.cases(dfm), ]
#convert salary to numeric, remove currency symbol
dfc$sal <- as.numeric(gsub('[$,]', '', dfc$sal))
#show head by descending salary
dfc %>%
arrange(desc(sal))%>%
head(5)%>%
kable
| 12707 |
2015 |
15 |
S C EVANS |
OFFICE OF THE COMPTROLLER |
A |
350000 |
PENSION INVESTMENT ADVISOR |
1 |
53373 |
212614 |
ValidSalary |
SingleTitle |
AnnualSalary |
Annual |
772 |
NON COMPETITIVE MANAGER |
N/U |
Non-Union |
ActiveTitle |
| 12707 |
2017 |
15 |
S C EVANS |
OFFICE OF THE COMPTROLLER |
A |
350000 |
PENSION INVESTMENT ADVISOR |
1 |
53373 |
212614 |
ValidSalary |
SingleTitle |
AnnualSalary |
Annual |
772 |
NON COMPETITIVE MANAGER |
N/U |
Non-Union |
ActiveTitle |
| 12707 |
2016 |
15 |
S C EVANS |
OFFICE OF THE COMPTROLLER |
A |
350000 |
PENSION INVESTMENT ADVISOR |
1 |
53373 |
212614 |
ValidSalary |
SingleTitle |
AnnualSalary |
Annual |
772 |
NON COMPETITIVE MANAGER |
N/U |
Non-Union |
ActiveTitle |
| 40735 |
2016 |
8 |
S S CHAN |
OFFICE OF THE ACTUARY |
A |
290331 |
CHIEF ACTUARY |
1 |
53373 |
212614 |
ValidSalary |
SingleTitle |
AnnualSalary |
Annual |
890 |
MGR-WELFARE FUND/MGR LEAVE/NON |
N/U |
Non-Union |
ActiveTitle |
| 40735 |
2017 |
8 |
S S CHAN |
OFFICE OF THE ACTUARY |
A |
290331 |
CHIEF ACTUARY |
1 |
53373 |
212614 |
ValidSalary |
SingleTitle |
AnnualSalary |
Annual |
890 |
MGR-WELFARE FUND/MGR LEAVE/NON |
N/U |
Non-Union |
ActiveTitle |
str(dfc)
## 'data.frame': 848035 obs. of 20 variables:
## $ title : chr "0527A" "0527A" "0527A" "0527A" ...
## $ year : int 2014 2016 2015 2016 2015 2017 2014 2017 2017 2017 ...
## $ agencycode : int 2 2 2 2 2 2 2 82 2 2 ...
## $ empname : chr "K H GREIG" "J CHIU" "S KAJEEPETA" "A MCLAUCHLAN" ...
## $ agency : chr "OFFICE OF THE MAYOR" "OFFICE OF THE MAYOR" "OFFICE OF THE MAYOR" "OFFICE OF THE MAYOR" ...
## $ payclass : chr "A" "A" "A" "A" ...
## $ sal : num 125000 165609 65000 56650 128125 ...
## $ Description : chr "Research Projects Coor(MA)-MGR" "Research Projects Coor(MA)-MGR" "Research Projects Coor(MA)-MGR" "Research Projects Coor(MA)-MGR" ...
## $ CountSubTitles : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Min.MinRate : int 53373 53373 53373 53373 53373 53373 53373 53373 53373 53373 ...
## $ Max.MaxRate : int 212614 212614 212614 212614 212614 212614 212614 212614 212614 212614 ...
## $ SalaryMinMaxValid: chr "ValidSalary" "ValidSalary" "ValidSalary" "ValidSalary" ...
## $ MultipleSubtitles: chr "SingleTitle" "SingleTitle" "SingleTitle" "SingleTitle" ...
## $ PayType : chr "AnnualSalary" "AnnualSalary" "AnnualSalary" "AnnualSalary" ...
## $ PayPeriod : chr "Annual" "Annual" "Annual" "Annual" ...
## $ UnionCode : int 883 883 883 883 883 883 883 883 883 883 ...
## $ UnionName : chr "MGRL TEMP - PROP EX" "MGRL TEMP - PROP EX" "MGRL TEMP - PROP EX" "MGRL TEMP - PROP EX" ...
## $ BargUnit : chr "N/U" "N/U" "N/U" "N/U" ...
## $ BargDesc : chr "Non-Union" "Non-Union" "Non-Union" "Non-Union" ...
## $ TitleStatus : chr "ActiveTitle" "ActiveTitle" "ActiveTitle" "ActiveTitle" ...
Export complete cases
#changed to eval=FALSE after first run.
write.csv(dfc, file = "dfc.csv")
Investigate Complete Cases
The following analysis relates to complete observations only. These observations are where the title code in the payroll list had a matching title code in the titles list. The incomplete cases were dropped from this analysis.
Incomplete cases primarily include CUNY, Triboro Bridge and Tunnel Authority, MTA, HHC, and other “non-mayoral” agencies. A small percentage of incomplete cases fall into mayoral agencies.
Pay Types
bypaytype<- dfc %>% count(PayType, year) %>% ungroup() %>% arrange(desc(n))
kable(head(bypaytype,10))
| AnnualSalary |
2017 |
190871 |
| AnnualSalary |
2015 |
183806 |
| AnnualSalary |
2016 |
174258 |
| AnnualSalary |
2014 |
165513 |
| HourlySalary |
2015 |
38107 |
| HourlySalary |
2017 |
36286 |
| HourlySalary |
2014 |
27901 |
| HourlySalary |
2016 |
27773 |
| EitherAnnualHourly |
2017 |
1007 |
| EitherAnnualHourly |
2015 |
857 |
EitherAnnualHourly indicates the min and max for the title are a broad range encompassing both hourly rates and annual rates. For example, the min salary may be $10, and the max salary may be $30,000. This indicates there may be multiple subtitles within the broader title, where some subtitles are paid hourly, and some are paid annually. Alternately, the data may have errors or may be outdated. It is not allowable to list a salary range in a job posting that is both hourly and annual.
Unique values in the SalaryMinMaxValid Column
byvalidsal<- dfc %>% count(SalaryMinMaxValid, year) %>% ungroup() %>% arrange(desc(n))
kable(head(byvalidsal,10))
| ValidSalary |
2017 |
228164 |
| ValidSalary |
2015 |
222770 |
| ValidSalary |
2016 |
202856 |
| ValidSalary |
2014 |
194226 |
| ZeroMinMax |
2014 |
5 |
| ZeroMinMax |
2015 |
5 |
| ZeroMinMax |
2017 |
5 |
| ZeroMinMax |
2016 |
4 |
We see that the majority of these complete cases have a valid salary listed from the Titles dataset. A very small percentage have $0 listed in the min and max possible for the salary as determined by the title designation. Since these employees received a non-zero salary, this indicates an error in the Titles dataset for these 10 observations.
Unique values in the TitleStatus Column
bytitlestatus<- dfc %>% count(TitleStatus, year) %>% ungroup() %>% arrange(desc(n))
kable(head(bytitlestatus,10))
| ActiveTitle |
2017 |
217765 |
| ActiveTitle |
2015 |
211673 |
| ActiveTitle |
2016 |
192687 |
| ActiveTitle |
2014 |
183452 |
| DeletedTitle |
2015 |
11102 |
| DeletedTitle |
2014 |
10779 |
| DeletedTitle |
2017 |
10404 |
| DeletedTitle |
2016 |
10173 |
According to the Titles dataset, some of the employees in the payroll have titles that are considered deleted titles. This could be due to the Titles dataset being more current than the Payroll dataset, in which case the employees in the defunct titles will likely have been transfered to equivalent active titles. It could also indicate errors in the datasets.
Unique values in the UnionName Column
byunion<- dfc %>% count(UnionName, year) %>% ungroup() %>% arrange(desc(n))
kable(head(byunion,10))
| POLICE OFFICERS PBA |
2017 |
24443 |
| POLICE OFFICERS PBA |
2015 |
24049 |
| POLICE OFFICERS PBA |
2016 |
23768 |
| SOCIAL SERVICE TITLES DC 37L37 |
2017 |
22669 |
| POLICE OFFICERS PBA |
2014 |
22197 |
| SOCIAL SERVICE TITLES DC 37L37 |
2015 |
20640 |
| SOCIAL SERVICE TITLES DC 37L37 |
2016 |
19846 |
| SOCIAL SERVICE TITLES DC 37L37 |
2014 |
17860 |
| CLERICAL TITLES DC37 |
2015 |
13723 |
| CLERICAL TITLES DC37 |
2014 |
13069 |
Salaries Under 10K
Split dataset into Over 10K and under 10K groups
For easier analysis, the dataset is split into salaries under 10K and over 10K.
df10<-subset(dfc, sal>=10000)
dfsub10<-subset(dfc, sal<10000)
The following section relates to salaries under 10K only. This includes trades workers such as carpenters, painters, and mechanics, which are full-time, but are listed with an hourly salary instead of annual. This causes these employees to appear as low-wage workers when they may actually earn a mid- or high-range annual salary. For clarity, these have been separated instead of attempting to calculate an equivalent annual rate.
Union Representation Under 10K
byunionsub10<- dfsub10 %>% count(UnionName, year) %>% ungroup() %>% arrange(desc(n))
kable(head(byunionsub10,10))
| COMPETITIVE OJ |
2015 |
11418 |
| COMPETITIVE OJ |
2016 |
11286 |
| COMPETITIVE OJ |
2017 |
11258 |
| COMPETITIVE OJ |
2014 |
9792 |
| JOB TRAINING PARTICIPANTS DC37 |
2017 |
6998 |
| JOB TRAINING PARTICIPANTS DC37 |
2015 |
6101 |
| PART TIME OJ |
2015 |
5920 |
| UNCLASSIFIED NON OJ & NON MANA |
2017 |
5675 |
| UNCLASSIFIED NON OJ & NON MANA |
2016 |
5129 |
| SEASONALS |
2015 |
5059 |
Agency Representation Under 10K
byagencysub10<- dfsub10 %>% count(agency, year) %>% ungroup() %>% arrange(desc(n))
kable(head(byagencysub10,10))
| DEPARTMENT OF CITYWIDE ADM |
2015 |
16061 |
| DEPARTMENT OF CITYWIDE ADM |
2014 |
14135 |
| DEPARTMENT OF CITYWIDE ADM |
2017 |
12664 |
| DEPARTMENT OF CITYWIDE ADM |
2016 |
12378 |
| DEPARTMENT OF PARKS & RECR |
2017 |
9955 |
| DEPARTMENT OF PARKS & RECR |
2015 |
9793 |
| POLICE DEPARTMENT |
2017 |
4013 |
| DEPARTMENT OF PARKS & RECR |
2016 |
3821 |
| DEPARTMENT OF PARKS & RECR |
2014 |
3528 |
| POLICE DEPARTMENT |
2016 |
3505 |
Title Representation Under 10K
bytitlesub10<- dfsub10 %>% count(Description, year) %>% ungroup() %>% arrange(desc(n))
kable(head(bytitlesub10,10))
| MONITOR |
2015 |
11400 |
| MONITOR |
2016 |
11277 |
| MONITOR |
2017 |
11251 |
| MONITOR |
2014 |
9783 |
| JOB TRAINING PARTICIPANT |
2017 |
6998 |
| JOB TRAINING PARTICIPANT |
2015 |
6101 |
| COLLEGE ASSISTANT (NO LEAVE AC |
2017 |
5641 |
| COLLEGE ASSISTANT (NO LEAVE AC |
2016 |
5097 |
| COLLEGE ASSISTANT (NO LEAVE AC |
2015 |
4996 |
| COLLEGE AIDE (ALL CITY DEPTS) |
2015 |
4963 |
Salaries over 10K
The following section relates to salaries over 10K only.
Union Representation Over 10K
byunion10plus<- df10 %>% count(UnionName, year) %>% ungroup() %>% arrange(desc(n))
kable(head(byunion10plus,10))
| POLICE OFFICERS PBA |
2017 |
24443 |
| POLICE OFFICERS PBA |
2015 |
24049 |
| POLICE OFFICERS PBA |
2016 |
23768 |
| POLICE OFFICERS PBA |
2014 |
22197 |
| SOCIAL SERVICE TITLES DC 37L37 |
2017 |
22033 |
| SOCIAL SERVICE TITLES DC 37L37 |
2015 |
19718 |
| SOCIAL SERVICE TITLES DC 37L37 |
2016 |
19404 |
| SOCIAL SERVICE TITLES DC 37L37 |
2014 |
17333 |
| CLERICAL TITLES DC37 |
2015 |
13218 |
| CLERICAL TITLES DC37 |
2014 |
12641 |
Agency Representation Over 10K
byagency10plus<- df10 %>% count(agency, year) %>% ungroup() %>% arrange(desc(n))
kable(head(byagency10plus,10))
| POLICE DEPARTMENT |
2017 |
52200 |
| POLICE DEPARTMENT |
2015 |
51051 |
| POLICE DEPARTMENT |
2016 |
49815 |
| POLICE DEPARTMENT |
2014 |
48451 |
| FIRE DEPARTMENT |
2017 |
17552 |
| FIRE DEPARTMENT |
2015 |
16839 |
| FIRE DEPARTMENT |
2016 |
16717 |
| FIRE DEPARTMENT |
2014 |
15400 |
| HRA/DEPARTMENT OF SOCIAL S |
2015 |
14023 |
| HRA/DEPARTMENT OF SOCIAL S |
2017 |
13681 |
Title Representation Over 10K
bytitle10plus<- df10 %>% count(Description, year) %>% ungroup() %>% arrange(desc(n))
kable(head(bytitle10plus,10))
| POLICE OFFICER (RECURRING NIGH |
2017 |
24443 |
| POLICE OFFICER (RECURRING NIGH |
2015 |
24044 |
| POLICE OFFICER (RECURRING NIGH |
2016 |
23768 |
| POLICE OFFICER (RECURRING NIGH |
2014 |
22192 |
| CORRECTION OFFICER |
2017 |
10159 |
| CORRECTION OFFICER |
2016 |
9235 |
| FIREFIGHTER |
2017 |
8720 |
| CORRECTION OFFICER |
2015 |
8693 |
| FIREFIGHTER |
2015 |
8424 |
| FIREFIGHTER |
2016 |
8325 |
Agency and Title Distribution Over 10K
summary10<- df10 %>% count(agency, Description) %>% ungroup() %>% arrange(desc(n))
kable(head(summary10,10))
| POLICE DEPARTMENT |
POLICE OFFICER (RECURRING NIGH |
70004 |
| DEPARTMENT OF CORRECTION |
CORRECTION OFFICER |
25686 |
| POLICE DEPARTMENT |
POLICE OFFICER (RECURRING NIGH |
24443 |
| FIRE DEPARTMENT |
FIREFIGHTER |
24368 |
| DEPARTMENT OF SANITATION |
SANITATION WORKER |
19474 |
| POLICE DEPARTMENT |
SCHOOL SAFETY AGENT |
14835 |
| POLICE DEPARTMENT |
SERGEANT (RECURRING NIGHT SHIF |
12428 |
| DEPARTMENT OF CORRECTION |
CORRECTION OFFICER |
10159 |
| HRA/DEPARTMENT OF SOCIAL S |
ELIGIBILITY SPECIALIST |
9963 |
| N.Y.C. HOUSING AUTHORITY |
CARETAKER (HA) |
9319 |
Average Salary by Title Over 10K
10 highest and 10 lowest average salaries by title.
salbytitle<-df10 %>%
group_by(Description) %>%
mutate(Count = n()) %>%
group_by(title, Description, Count) %>%
summarise_at(vars(sal), funs(mean(., na.rm=TRUE) ))%>%
arrange(desc(sal))
kable(head(salbytitle,10))
| 12707 |
PENSION INVESTMENT ADVISOR |
4 |
318644.5 |
| 40735 |
CHIEF ACTUARY |
3 |
287512.3 |
| 12942 |
FIRST DEPUTY MAYOR |
4 |
262817.8 |
| 94450 |
CHIEF OF STAFF (CITY COUNCIL) |
3 |
234282.3 |
| 12995 |
MAYOR |
4 |
233437.5 |
| 12940 |
DEPUTY MAYOR |
13 |
227925.8 |
| 94465 |
EXECUTIVE DIRECTOR (CAMPAIGN F |
4 |
226882.0 |
| 94488 |
CHAIR (HA) |
4 |
224576.0 |
| 94310 |
DIRECTOR OF THE OFFICE OF COLL |
3 |
224168.3 |
| 94468 |
EXECUTIVE DIRECTOR (FISA) |
3 |
224168.3 |
kable(tail(salbytitle,10))
| 10232 |
SUMMER GRADUATE INTERN |
5 |
27524.80 |
| 52406 |
COMMUNITY SERVICE AIDE |
653 |
27183.45 |
| 54501 |
SCHOOL LUNCH HELPER |
2 |
27031.00 |
| 10234 |
SUMMER COLLEGE INTERN |
2 |
26596.00 |
| 51800 |
PROBATION ASSISTANT |
14 |
26513.36 |
| 1299A |
COMMISSIONER (NOT FULL-TIME) |
16 |
25677.00 |
| 94367 |
TEMPORARY CLERK (BOARD OF ELEC |
51 |
25645.98 |
| 91405 |
CITY SERVICE AIDE |
103 |
23149.38 |
| 91406 |
CITY SEASONAL AIDE |
1144 |
18255.26 |
| 10209 |
COLLEGE AIDE (ALL CITY DEPTS) |
14 |
18067.57 |
Average Salary by Title Over 10K, 10 Most Common Titles
salbytitle2<-df10 %>%
group_by(Description) %>%
mutate(Count = n()) %>%
group_by(title, Description, Count) %>%
summarise_at(vars(sal), funs(round(mean(., na.rm=TRUE),0) ))%>%
arrange(desc(Count))
kable(head(salbytitle2,10))
| 70210 |
POLICE OFFICER (RECURRING NIGH |
94447 |
66208 |
| 70410 |
CORRECTION OFFICER |
35845 |
65330 |
| 70310 |
FIREFIGHTER |
33088 |
71896 |
| 70112 |
SANITATION WORKER |
26144 |
62442 |
| 60817 |
SCHOOL SAFETY AGENT |
20208 |
37925 |
| 56057 |
COMMUNITY ASSOCIATE |
19440 |
44111 |
| 10251 |
CLERICAL ASSOCIATE |
17609 |
41081 |
| 10124 |
PRINCIPAL ADMINISTRATIVE ASSOC |
16987 |
55415 |
| 70235 |
SERGEANT (RECURRING NIGHT SHIF |
16603 |
95804 |
| 90645 |
CARETAKER (HA) |
12881 |
38218 |
Export complete cases over 10K
#changed to eval=FALSE after first run.
write.csv(df10, file = "df10.csv")