This is a programming assignment.

Part A

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tibble)
library(xtable)
library(ggplot2)
## [1] 14999    10
## 'data.frame':    14999 obs. of  10 variables:
##  $ satisfaction_level   : num  0.38 0.8 0.11 0.72 0.37 0.41 0.1 0.92 0.89 0.42 ...
##  $ last_evaluation      : num  0.53 0.86 0.88 0.87 0.52 0.5 0.77 0.85 1 0.53 ...
##  $ number_project       : int  2 5 7 5 2 2 6 5 5 2 ...
##  $ average_montly_hours : int  157 262 272 223 159 153 247 259 224 142 ...
##  $ time_spend_company   : int  3 6 4 5 3 3 4 5 5 3 ...
##  $ Work_accident        : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ left                 : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ promotion_last_5years: int  0 0 0 0 0 0 0 0 0 0 ...
##  $ sales                : Factor w/ 10 levels "accounting","hr",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $ salary               : Factor w/ 3 levels "high","low","medium": 2 3 3 2 2 2 2 2 2 2 ...
## % latex table generated in R 3.4.1 by xtable 1.8-2 package
## % Sun Oct 01 03:12:18 2017
## \begin{table}[ht]
## \centering
## \begin{tabular}{rrrrrrrrr}
##   \hline
##  & satisfaction\_level & last\_evaluation & number\_project & average\_montly\_hours & time\_spend\_company & Work\_accident & left & promotion\_last\_5years \\ 
##   \hline
## satisfaction\_level & 1.00 & 0.11 & -0.14 & -0.02 & -0.10 & 0.06 & -0.39 & 0.03 \\ 
##   last\_evaluation & 0.11 & 1.00 & 0.35 & 0.34 & 0.13 & -0.01 & 0.01 & -0.01 \\ 
##   number\_project & -0.14 & 0.35 & 1.00 & 0.42 & 0.20 & -0.00 & 0.02 & -0.01 \\ 
##   average\_montly\_hours & -0.02 & 0.34 & 0.42 & 1.00 & 0.13 & -0.01 & 0.07 & -0.00 \\ 
##   time\_spend\_company & -0.10 & 0.13 & 0.20 & 0.13 & 1.00 & 0.00 & 0.14 & 0.07 \\ 
##   Work\_accident & 0.06 & -0.01 & -0.00 & -0.01 & 0.00 & 1.00 & -0.15 & 0.04 \\ 
##   left & -0.39 & 0.01 & 0.02 & 0.07 & 0.14 & -0.15 & 1.00 & -0.06 \\ 
##   promotion\_last\_5years & 0.03 & -0.01 & -0.01 & -0.00 & 0.07 & 0.04 & -0.06 & 1.00 \\ 
##    \hline
## \end{tabular}
## \end{table}
library(reshape2)
melted_cormat <- melt(cor(subset(mydf, select = -c(sales, salary))))
head(melted_cormat)
##                   Var1               Var2       value
## 1   satisfaction_level satisfaction_level  1.00000000
## 2      last_evaluation satisfaction_level  0.10502121
## 3       number_project satisfaction_level -0.14296959
## 4 average_montly_hours satisfaction_level -0.02004811
## 5   time_spend_company satisfaction_level -0.10086607
## 6        Work_accident satisfaction_level  0.05869724
g <- ggplot(data = melted_cormat, aes(x=Var1, y=Var2, fill=value)) + 
  geom_raster()
g <- g + theme(axis.text.x = element_text(angle = 90, hjust = 1))
g

mydf$status <- ifelse(mydf$left == 1, "Left", "Stay")
table(mydf$status,  mydf$salary, mydf$sales)
## , ,  = accounting
## 
##       
##        high  low medium
##   Left    5   99    100
##   Stay   69  259    235
## 
## , ,  = hr
## 
##       
##        high  low medium
##   Left    6   92    117
##   Stay   39  243    242
## 
## , ,  = IT
## 
##       
##        high  low medium
##   Left    4  172     97
##   Stay   79  437    438
## 
## , ,  = management
## 
##       
##        high  low medium
##   Left    1   59     31
##   Stay  224  121    194
## 
## , ,  = marketing
## 
##       
##        high  low medium
##   Left    9  126     68
##   Stay   71  276    308
## 
## , ,  = product_mng
## 
##       
##        high  low medium
##   Left    6  105     87
##   Stay   62  346    296
## 
## , ,  = RandD
## 
##       
##        high  low medium
##   Left    4   55     62
##   Stay   47  309    310
## 
## , ,  = sales
## 
##       
##        high  low medium
##   Left   14  697    303
##   Stay  255 1402   1469
## 
## , ,  = support
## 
##       
##        high  low medium
##   Left    8  389    158
##   Stay  133  757    784
## 
## , ,  = technical
## 
##       
##        high  low medium
##   Left   25  378    294
##   Stay  176  994    853
ggplot(mydf[ which(mydf$status=='Left'),], 
       aes(x = sales, fill = status)) + geom_bar() + guides(fill=FALSE) + 
  theme(axis.text.x = element_text(angle = 90, hjust = 1)) 

- Complete this for employees who stay in the company by department

ggplot(mydf[ which(mydf$status=='Stay'),], 
       aes(x = sales, fill = status)) + 
  geom_bar() +
  labs(x = "DEPARTMENT", y = "STAY") + 
  guides(fill=FALSE) + 
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) 

Part B

mydf$salary <- factor(mydf$salary, levels = c("low", "medium", "high"))

ggplot(mydf[ which(mydf$status=='Left'),], 
       aes(x = salary, fill = status)) + 
  geom_bar() + 
  labs(x = "SALARY", y = "LEFT") + 
  guides(fill=FALSE) + 
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) 

ggplot(mydf[ which(mydf$status=='Stay'),], 
       aes(x = salary, fill = status)) + 
  geom_bar() + 
  labs(x = "SALARY", y = "STAY") + 
  guides(fill=FALSE) + 
  theme(axis.text.x = element_text(angle = 0, hjust = 0.5)) 

mydf$time_spend_company <- as.factor(mydf$time_spend_company)

ggplot(mydf[ which(mydf$status=='Left'),], 
       aes(x = time_spend_company, fill = status)) + 
  geom_bar() + 
  labs(x = "Time Spent in the company", y = "LEFT") + 
  guides(fill=FALSE) + 
  theme(axis.text.x = element_text(angle = 0, hjust = 0.3)) 

ggplot(mydf[ which(mydf$status=='Left'),], 
       aes(x = time_spend_company, fill = status)) + 
  geom_bar() + 
  labs(x = "Time Spent in the company", y = "LEFT") + 
  guides(fill=FALSE) + 
  theme(axis.text.x = element_text(angle = 0, hjust = 0.3)) +
  facet_grid(salary~sales)

ggplot(mydf[ which(mydf$status=='Stay'),], 
       aes(x = time_spend_company, fill = status)) + 
  geom_bar() + 
  labs(x = "Time Spent in the company", y = "STAY") + 
  guides(fill=FALSE) + 
  theme(axis.text.x = element_text(angle = 0, hjust = 0.3)) +
  facet_grid(salary~sales)

Part C

mydf$time_spend_company <- as.factor(mydf$time_spend_company)

ggplot(mydf[ which(mydf$status=='Left'),], 
       aes(x = time_spend_company, fill = status)) + 
  geom_bar() + 
  labs(x = "Year stayed in the company", y = "LEFT") + 
  guides(fill=FALSE) + 
  theme(axis.text.x = element_text(angle = 0, hjust = 0.3)) 

ggplot(mydf[ which(mydf$status=='Left'),], 
       aes(x = time_spend_company, fill = status)) + 
  geom_bar() + 
  labs(x = "Year stayed in the company", y = "LEFT") + 
  guides(fill=FALSE) + 
  theme(axis.text.x = element_text(angle = 0, hjust = 0.3)) +
  facet_grid(. ~ sales)

ggplot(mydf[ which(mydf$status=='Left'),], 
       aes(x = time_spend_company, fill = status)) + 
  geom_bar() + 
  labs(x = "Year stayed in the company", y = "LEFT") + 
  guides(fill=FALSE) + 
  theme(axis.text.x = element_text(angle = 0, hjust = 0.3)) +
  facet_grid(. ~ salary)

Write-up