#install.packages("tidyr")
# install.packages("dplyr")
# install.packages("ggplot2")
# install.packages("readr")
# install.packages("lubridate")
# install.packages("Plotly")
# install.packages("hrbrthemes")
# install.packages("viridis")
# install.packages("ggridges")
# install.packages("highcharter")
# install.packages("rio")
# install.packages("purrr")
library(readr)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
## Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
## if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(viridis)
## Loading required package: viridisLite
library(ggridges)
library(highcharter)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Highcharts (www.highcharts.com) is a Highsoft software product which is
## not free for commercial and Governmental use
library(rio)
##
## Attaching package: 'rio'
## The following object is masked from 'package:plotly':
##
## export
library(purrr)
library(stringr)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:viridis':
##
## viridis_pal
## The following object is masked from 'package:readr':
##
## col_factor
library(stringr)
df <- read_csv("HRdataset_v14.csv")
## Rows: 311 Columns: 36
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (19): Employee_Name, Position, State, Zip, DOB, Sex, MaritalDesc, Citize...
## dbl (17): EmpID, MarriedID, MaritalStatusID, GenderID, EmpStatusID, DeptID, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(df)
## Rows: 311
## Columns: 36
## $ Employee_Name <chr> "Adinolfi, Wilson K", "Ait Sidi, Karthikey…
## $ EmpID <dbl> 10026, 10084, 10196, 10088, 10069, 10002, 1…
## $ MarriedID <dbl> 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0…
## $ MaritalStatusID <dbl> 0, 1, 1, 1, 2, 0, 0, 4, 0, 2, 1, 1, 2, 0, 2…
## $ GenderID <dbl> 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1…
## $ EmpStatusID <dbl> 1, 5, 5, 1, 5, 1, 1, 1, 3, 1, 5, 5, 1, 1, 5…
## $ DeptID <dbl> 5, 3, 5, 5, 5, 5, 4, 5, 5, 3, 5, 5, 3, 5, 5…
## $ PerfScoreID <dbl> 4, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 4, 3, 3…
## $ FromDiversityJobFairID <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0, 0…
## $ Salary <dbl> 62506, 104437, 64955, 64991, 50825, 57568, …
## $ Termd <dbl> 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1…
## $ PositionID <dbl> 19, 27, 20, 19, 19, 19, 24, 19, 19, 14, 19,…
## $ Position <chr> "Production Technician I", "Sr. DBA", "Prod…
## $ State <chr> "MA", "MA", "MA", "MA", "MA", "MA", "MA", "…
## $ Zip <chr> "01960", "02148", "01810", "01886", "02169"…
## $ DOB <chr> "07/10/83", "05/05/75", "09/19/88", "09/27/…
## $ Sex <chr> "M", "M", "F", "F", "F", "F", "F", "M", "F"…
## $ MaritalDesc <chr> "Single", "Married", "Married", "Married", …
## $ CitizenDesc <chr> "US Citizen", "US Citizen", "US Citizen", "…
## $ HispanicLatino <chr> "No", "No", "No", "No", "No", "No", "No", "…
## $ RaceDesc <chr> "White", "White", "White", "White", "White"…
## $ DateofHire <chr> "7/5/2011", "3/30/2015", "7/5/2011", "1/7/2…
## $ DateofTermination <chr> NA, "6/16/2016", "9/24/2012", NA, "9/6/2016…
## $ TermReason <chr> "N/A-StillEmployed", "career change", "hour…
## $ EmploymentStatus <chr> "Active", "Voluntarily Terminated", "Volunt…
## $ Department <chr> "Production", "IT/IS", "Production", "Produ…
## $ ManagerName <chr> "Michael Albert", "Simon Roup", "Kissy Sull…
## $ ManagerID <dbl> 22, 4, 20, 16, 39, 11, 10, 19, 12, 7, 14, 2…
## $ RecruitmentSource <chr> "LinkedIn", "Indeed", "LinkedIn", "Indeed",…
## $ PerformanceScore <chr> "Exceeds", "Fully Meets", "Fully Meets", "F…
## $ EngagementSurvey <dbl> 4.60, 4.96, 3.02, 4.84, 5.00, 5.00, 3.04, 5…
## $ EmpSatisfaction <dbl> 5, 3, 3, 5, 4, 5, 3, 4, 3, 5, 4, 3, 4, 4, 5…
## $ SpecialProjectsCount <dbl> 0, 6, 0, 0, 0, 0, 4, 0, 0, 6, 0, 0, 5, 0, 0…
## $ LastPerformanceReview_Date <chr> "1/17/2019", "2/24/2016", "5/15/2012", "1/3…
## $ DaysLateLast30 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Absences <dbl> 1, 17, 3, 15, 2, 15, 19, 19, 4, 16, 12, 15,…
class(df$CitizenDesc)
## [1] "character"
unique(df$CitizenDesc)
## [1] "US Citizen" "Eligible NonCitizen" "Non-Citizen"
#drop columns 1. ZIP 2. HispanicLatino 3. RaceDesc
df_clean <- df %>%
mutate_at(.vars = c("RecruitmentSource", "PerformanceScore", "EmpSatisfaction", "Department", "EmploymentStatus", "TermReason", "Sex","CitizenDesc" ),
.funs = as.factor) %>%
mutate_at(.vars = c("MarriedID", "FromDiversityJobFairID"),
.funs = as.logical)
df_clean$DateofHire <- mdy(df$DateofHire)
df_clean$DOB <- mdy(df$DOB)
df_clean$DateofTermination <- mdy(df$DateofTermination)
df_clean$LastPerformanceReview_Date <- mdy(df$LastPerformanceReview_Date)
df_clean <- select(df_clean, -Zip, -HispanicLatino, -RaceDesc)
glimpse(df_clean)
## Rows: 311
## Columns: 33
## $ Employee_Name <chr> "Adinolfi, Wilson K", "Ait Sidi, Karthikey…
## $ EmpID <dbl> 10026, 10084, 10196, 10088, 10069, 10002, 1…
## $ MarriedID <lgl> FALSE, TRUE, TRUE, TRUE, FALSE, FALSE, FALS…
## $ MaritalStatusID <dbl> 0, 1, 1, 1, 2, 0, 0, 4, 0, 2, 1, 1, 2, 0, 2…
## $ GenderID <dbl> 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1…
## $ EmpStatusID <dbl> 1, 5, 5, 1, 5, 1, 1, 1, 3, 1, 5, 5, 1, 1, 5…
## $ DeptID <dbl> 5, 3, 5, 5, 5, 5, 4, 5, 5, 3, 5, 5, 3, 5, 5…
## $ PerfScoreID <dbl> 4, 3, 3, 3, 3, 4, 3, 3, 3, 3, 3, 3, 4, 3, 3…
## $ FromDiversityJobFairID <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, F…
## $ Salary <dbl> 62506, 104437, 64955, 64991, 50825, 57568, …
## $ Termd <dbl> 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1…
## $ PositionID <dbl> 19, 27, 20, 19, 19, 19, 24, 19, 19, 14, 19,…
## $ Position <chr> "Production Technician I", "Sr. DBA", "Prod…
## $ State <chr> "MA", "MA", "MA", "MA", "MA", "MA", "MA", "…
## $ DOB <date> 1983-07-10, 1975-05-05, 1988-09-19, 1988-0…
## $ Sex <fct> M, M, F, F, F, F, F, M, F, M, F, M, M, M, M…
## $ MaritalDesc <chr> "Single", "Married", "Married", "Married", …
## $ CitizenDesc <fct> US Citizen, US Citizen, US Citizen, US Citi…
## $ DateofHire <date> 2011-07-05, 2015-03-30, 2011-07-05, 2008-0…
## $ DateofTermination <date> NA, 2016-06-16, 2012-09-24, NA, 2016-09-06…
## $ TermReason <fct> "N/A-StillEmployed", "career change", "hour…
## $ EmploymentStatus <fct> Active, Voluntarily Terminated, Voluntarily…
## $ Department <fct> Production, IT/IS, Production, Production, …
## $ ManagerName <chr> "Michael Albert", "Simon Roup", "Kissy Sull…
## $ ManagerID <dbl> 22, 4, 20, 16, 39, 11, 10, 19, 12, 7, 14, 2…
## $ RecruitmentSource <fct> LinkedIn, Indeed, LinkedIn, Indeed, Google …
## $ PerformanceScore <fct> Exceeds, Fully Meets, Fully Meets, Fully Me…
## $ EngagementSurvey <dbl> 4.60, 4.96, 3.02, 4.84, 5.00, 5.00, 3.04, 5…
## $ EmpSatisfaction <fct> 5, 3, 3, 5, 4, 5, 3, 4, 3, 5, 4, 3, 4, 4, 5…
## $ SpecialProjectsCount <dbl> 0, 6, 0, 0, 0, 0, 4, 0, 0, 6, 0, 0, 5, 0, 0…
## $ LastPerformanceReview_Date <date> 2019-01-17, 2016-02-24, 2012-05-15, 2019-0…
## $ DaysLateLast30 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ Absences <dbl> 1, 17, 3, 15, 2, 15, 19, 19, 4, 16, 12, 15,…
df_clean
## # A tibble: 311 × 33
## Employe…¹ EmpID Marri…² Marit…³ Gende…⁴ EmpSt…⁵ DeptID PerfS…⁶ FromD…⁷ Salary
## <chr> <dbl> <lgl> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <dbl>
## 1 Adinolfi… 10026 FALSE 0 1 1 5 4 FALSE 62506
## 2 Ait Sidi… 10084 TRUE 1 1 5 3 3 FALSE 104437
## 3 Akinkuol… 10196 TRUE 1 0 5 5 3 FALSE 64955
## 4 Alagbe,T… 10088 TRUE 1 0 1 5 3 FALSE 64991
## 5 Anderson… 10069 FALSE 2 0 5 5 3 FALSE 50825
## 6 Anderson… 10002 FALSE 0 0 1 5 4 FALSE 57568
## 7 Andreola… 10194 FALSE 0 0 1 4 3 FALSE 95660
## 8 Athwal, … 10062 FALSE 4 1 1 5 3 FALSE 59365
## 9 Bachioch… 10114 FALSE 0 0 3 5 3 TRUE 47837
## 10 Bacong, … 10250 FALSE 2 1 1 3 3 FALSE 50178
## # … with 301 more rows, 23 more variables: Termd <dbl>, PositionID <dbl>,
## # Position <chr>, State <chr>, DOB <date>, Sex <fct>, MaritalDesc <chr>,
## # CitizenDesc <fct>, DateofHire <date>, DateofTermination <date>,
## # TermReason <fct>, EmploymentStatus <fct>, Department <fct>,
## # ManagerName <chr>, ManagerID <dbl>, RecruitmentSource <fct>,
## # PerformanceScore <fct>, EngagementSurvey <dbl>, EmpSatisfaction <fct>,
## # SpecialProjectsCount <dbl>, LastPerformanceReview_Date <date>, …
plot1 <-
ggplot(data= df_clean, mapping = aes(x = Department, y = EngagementSurvey)) +
geom_boxplot(mapping = aes(fill = Department))
ggplotly(plot1)
plot5 <-
ggplot(data = df_clean, mapping = aes(x = df_clean$EngagementSurvey, y = Salary))+
geom_line()
ggplotly(plot5)
## Warning: Use of `df_clean$EngagementSurvey` is discouraged. Use
## `EngagementSurvey` instead.
hc <- hchart(
density(df_clean$Salary), type = "area",
color = "steelblue", name = "Male"
)
hc
p2 <- ggplot(data=df_clean, aes(x=Salary, group=EmpSatisfaction, fill=EmpSatisfaction)) +
geom_density(adjust=1.5, alpha=.4) +
theme_ipsum()
ggplotly(p2)
hchart(df_clean,
type = "column",
hcaes(x = PerformanceScore,
y = Salary,
group = Department))
hchart(df_clean,
type = "column",
hcaes(x = Department,
y = Salary,
group = EmpSatisfaction))%>%
hc_tooltip(
useHTML = TRUE
) %>% hc_exporting(enabled = TRUE)
# %>%
# hc_tooltip(crosshairs = TRUE,
# shared = TRUE, borderWidth = 2)
hc <- hchart(df_clean, "packedbubble", hcaes(name = Position,value = EngagementSurvey,group = RecruitmentSource))
q95 <- as.numeric(quantile(df_clean$EngagementSurvey, .95))
hc %>%
hc_tooltip(
useHTML = TRUE,
pointFormat = "<b>{point.name}:</b> {point.value}"
) %>%
hc_plotOptions(
packedbubble = list(
maxSize = "50%",
zMin = 0,
layoutAlgorithm = list(
gravitationalConstant = 0.05,
splitSeries = TRUE, # TRUE to group points
seriesInteraction = TRUE,
dragBetweenSeries = TRUE,
parentNodeLimit = TRUE
),
dataLabels = list(
enabled = TRUE,
format = "{point.name}",
filter = list(
property = "y",
operator = ">",
value = q95
),
style = list(
color = "black",
textOutline = "none",
fontWeight = "normal"
)
)
)
)
options(dplyr.summarise.inform = FALSE)
df_baru <-
select(df_clean, Department, Position, Salary) %>%
arrange(desc(Salary))
df_baru_2 <- df_baru %>%
group_by(Department) %>%
summarise(dailyAvg = mean(Salary))
drilldown <- df_baru %>%
# group_by(Department, Position, .add = T) %>%
group_nest(Department) %>%
mutate(
id = Department,
type = "column",
data = map(data, ~ .x %>%
mutate(
name = Position,
y = Salary
) %>%
list_parse())
)
mygraph <- hchart(
df_baru_2,
"column",
hcaes(x = Department, y = dailyAvg, name = Department, drilldown = Department),
name = "Salary",
colorByPoint = TRUE
)
mygraph <- mygraph %>%
hc_drilldown(
allowPointDrilldown = TRUE,
series = list_parse(drilldown)
)
mygraph <- mygraph %>%
hc_tooltip(
useHTML = TRUE
) %>%
hc_yAxis( title = "") %>%
hc_xAxis( title = "" ) %>%
hc_title(text = "Salary" )
mygraph
unique(df_clean$Department)
## [1] Production IT/IS Software Engineering
## [4] Admin Offices Sales Executive Office
## 6 Levels: Admin Offices Executive Office IT/IS Production ... Software Engineering
hchart(df_clean, "scatter", hcaes(x= EngagementSurvey,y= SpecialProjectsCount, name = Position, group = Department)) %>%
hc_chart(type = "scatter") %>%
hc_tooltip(
headerFormat = "",
pointFormat = "<b>{point.Position}</b> is in <b>{point.Department}</b>"
)
unique(df_clean$ManagerName)
## [1] "Michael Albert" "Simon Roup" "Kissy Sullivan"
## [4] "Elijiah Gray" "Webster Butler" "Amy Dunn"
## [7] "Alex Sweetwater" "Ketsia Liebig" "Brannon Miller"
## [10] "Peter Monroe" "David Stanley" "Kelley Spirea"
## [13] "Brandon R. LeBlanc" "Janet King" "John Smith"
## [16] "Jennifer Zamora" "Lynn Daneault" "Eric Dougall"
## [19] "Debra Houlihan" "Brian Champaigne" "Board of Directors"
hchart(df_clean, "organization", hcaes(from = ManagerName, to = Employee_Name)) %>%
hc_chart(inverted = TRUE)
hchart(df_clean, "lollipop", hcaes(name = Employee_Name, low = Absences ),name = "Absences") %>%
hc_xAxis(type = "category") %>%
hc_yAxis(labels = list(format = "{value}"))
df_clean_2 <- df_clean %>% mutate_at(.vars = c("EmpSatisfaction"), .funs = as.numeric)
hchart(df_clean_2, "lollipop", hcaes(name = Employee_Name, low = EmpSatisfaction),name = "EmpSatisfaction") %>%
hc_xAxis(type = "category") %>%
hc_yAxis(labels = list(format = "{value}")) %>%
hc_title(text="Employee Satisfaction under {var}",align="center")
mean(df$Absences)
## [1] 10.23794
# https://rafif.shinyapps.io/HumanResource/