CRISP-DM Process ###My Shiny Web:https://jingwang123.shinyapps.io/Hw1_jing/ ###upload data set from computer and install packages #### Data Exploration and Descriptive Statistics CRISP-DM Process *clean and upload data set

getwd()
## [1] "/Users/jingwang/Desktop"
setwd("~/Desktop")
employee <-read.csv(file="employee_attrition.csv",header=T, stringsAsFactors = T)

cleanSurvey<- na.omit(employee)

library(arules)
## Loading required package: Matrix
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
library(arulesViz)
## Loading required package: grid
library(grid)

cleanSurvey$Age <-discretize(cleanSurvey$Age, method = "frequency", breaks = 3, lablels= c("low","medium","high"), order=T)
cleanSurvey$DailyRate<-discretize(cleanSurvey$DailyRate, method = "frequency", breaks = 3, lablels= c("low","medium","high"), order=T)
cleanSurvey$DistanceFromHome <-as.factor(c("near","median","far"))
cleanSurvey$Education <-as.factor(c("low","median","high"))
cleanSurvey[["EmployeeNumber"]] <-NULL
cleanSurvey[["EmployeeCount"]] <-NULL
cleanSurvey[["Over18"]]<- NULL
cleanSurvey[["OverTime"]]<-NULL
cleanSurvey[["NumCompaniesWorked"]] <- NULL
cleanSurvey[["StandardHoures"]] <-NULL
cleanSurvey$EnvironmentSatisfaction[cleanSurvey$EnvironmentSatisfaction >= 2] ="satisfaction" 
cleanSurvey$EnvironmentSatisfaction[cleanSurvey$EnvironmentSatisfaction <2] = "unsatisfaction"
cleanSurvey$HourlyRate [cleanSurvey$HourlyRate >= 60]="high"
cleanSurvey$HourlyRate[cleanSurvey$HourlyRate <69]="low"
cleanSurvey$JobInvolvement <-as.factor(c("low","median","high"))
cleanSurvey$JobLevel <-as.factor(c("low","median","high"))
cleanSurvey$JobSatisfaction[cleanSurvey$JobSatisfaction <=3]="unsatisfaction"
cleanSurvey$JobSatisfaction[cleanSurvey$JobSatisfaction >3]="satisfaction"
cleanSurvey$MonthlyIncome[cleanSurvey$MonthlyIncome< 4941]="lower income"
cleanSurvey$MonthlyIncome[cleanSurvey$MonthlyIncome> 4941  | cleanSurvey$MonthlyIncome <8363] ="median income"
cleanSurvey$MonthlyIncome[cleanSurvey$MonthlyIncome > 8363]= "high income"
cleanSurvey$MonthlyRate <- discretize(cleanSurvey$MonthlyRate,method="frequency", breaks=3, labels=c("short","mediun","high"), order=T)
cleanSurvey$StockOptionLevel<- as.factor(c("low","median","high"))
cleanSurvey$TotalWorkingYears [cleanSurvey$TotalWorkingYears <10]="short"
cleanSurvey$TotalWorkingYears[cleanSurvey$TotalWorkingYears >11 | cleanSurvey$TotalWorkingYears <17]="median"
cleanSurvey$TotalWorkingYears[cleanSurvey$TotalWorkingYears >17]= "long"
cleanSurvey$TrainingTimesLastYear <-discretize(cleanSurvey$TrainingTimesLastYear, method = "frequency", breaks = 3, lablels= c("low","medium","high"), order=T)

cleanSurvey$YearsAtCompany <-discretize(cleanSurvey$YearsAtCompany,method="frequency", breaks=3, labels=c("short","mediun","high"), order=T)
cleanSurvey$YearsInCurrentRole <-discretize(cleanSurvey$YearsInCurrentRole,method="frequency", breaks=3, labels=c("short","mediun","high"), order=T)                                        
   
cleanSurvey$YearsWithCurrManager<- discretize(cleanSurvey$YearsWithCurrManager,method="frequency", breaks=3, labels=c("short","mediun","high"), order=T)
cleanSurvey$WorkLifeBalance[cleanSurvey$WorkLifeBalance == 1] = "Min"
cleanSurvey$WorkLifeBalance[cleanSurvey$WorkLifeBalance == 2] = "1st Quantile"
cleanSurvey$WorkLifeBalance[cleanSurvey$WorkLifeBalance == 3] = "Medium"
cleanSurvey$WorkLifeBalance[cleanSurvey$WorkLifeBalance == 4] = "3th Quantile"
cleanSurvey$WorkLifeBalance[cleanSurvey$WorkLifeBalance == 5] = "Max"                                              
                                              
cleanSurvey$YearsSinceLastPromotion[cleanSurvey$YearsSinceLastPromotion <= 1] = "Min"
cleanSurvey$YearsSinceLastPromotion[cleanSurvey$YearsSinceLastPromotion > 2 | cleanSurvey$YearsSinceLastPromotion < 5] = "Medium"
cleanSurvey$YearsSinceLastPromotion[cleanSurvey$YearsSinceLastPromotion > 5 ] = "Medium"


cleanSurvey <- as(data.frame(lapply(cleanSurvey, as.character), stringsAsFactors=T), "transactions")

Association Rules Mining

frequent_items <- eclat(cleanSurvey, parameter = list(support = 0.7, minlen = 2))
## Eclat
## 
## parameter specification:
##  tidLists support minlen maxlen            target   ext
##     FALSE     0.7      2     10 frequent itemsets FALSE
## 
## algorithmic control:
##  sparse sort verbose
##       7   -2    TRUE
## 
## Absolute minimum support count: 816 
## 
## create itemset ... 
## set transactions ...[99 item(s), 1167 transaction(s)] done [0.00s].
## sorting and recoding items ... [9 item(s)] done [0.00s].
## creating bit matrix ... [9 row(s), 1167 column(s)] done [0.00s].
## writing  ... [182 set(s)] done [0.00s].
## Creating S4 object  ... done [0.00s].
inspect(head(frequent_items, 2))
##     items                              support count
## [1] {BusinessTravel=Travel_Rarely,                  
##      JobSatisfaction=satisfaction,                  
##      MonthlyIncome=high income,                     
##      StandardHours=80,                              
##      TotalWorkingYears=long,                        
##      YearsSinceLastPromotion=Medium} 0.7137961   833
## [2] {BusinessTravel=Travel_Rarely,                  
##      JobSatisfaction=satisfaction,                  
##      MonthlyIncome=high income,                     
##      StandardHours=80,                              
##      TotalWorkingYears=long}         0.7137961   833
itemFrequencyPlot(cleanSurvey, topN = 10, type = "absolute", main = "Item frequency")

rules <- apriori(cleanSurvey, parameter = list(support = 0.01, confidence = 0.5,minlen=3), appearance = list(default="lhs",rhs=c("Attrition=Yes")),control = list(verbose=F))
quality(head(rules, 3))
##      support confidence     lift count
## 1 0.01285347  0.7142857 4.555035    15
## 2 0.01285347  0.5769231 3.679067    15
## 3 0.01371037  0.5714286 3.644028    16
inspect(head(sort(rules, by = "lift", decreasing = T), 7))
##     lhs                                   rhs                support confidence     lift count
## [1] {Age=[18,32),                                                                             
##      MaritalStatus=Single,                                                                    
##      MonthlyRate=mediun,                                                                      
##      YearsInCurrentRole=short}         => {Attrition=Yes} 0.01028278          1 6.377049    12
## [2] {Age=[18,32),                                                                             
##      MaritalStatus=Single,                                                                    
##      StockOptionLevel=high,                                                                   
##      YearsInCurrentRole=short}         => {Attrition=Yes} 0.01028278          1 6.377049    12
## [3] {Age=[18,32),                                                                             
##      JobLevel=high,                                                                           
##      MaritalStatus=Single,                                                                    
##      YearsInCurrentRole=short}         => {Attrition=Yes} 0.01028278          1 6.377049    12
## [4] {Age=[18,32),                                                                             
##      JobInvolvement=high,                                                                     
##      MaritalStatus=Single,                                                                    
##      YearsInCurrentRole=short}         => {Attrition=Yes} 0.01028278          1 6.377049    12
## [5] {Age=[18,32),                                                                             
##      Education=high,                                                                          
##      MaritalStatus=Single,                                                                    
##      YearsInCurrentRole=short}         => {Attrition=Yes} 0.01028278          1 6.377049    12
## [6] {Age=[18,32),                                                                             
##      DistanceFromHome=far,                                                                    
##      MaritalStatus=Single,                                                                    
##      YearsInCurrentRole=short}         => {Attrition=Yes} 0.01028278          1 6.377049    12
## [7] {Age=[18,32),                                                                             
##      BusinessTravel=Travel_Frequently,                                                        
##      PerformanceRating=3,                                                                     
##      YearsInCurrentRole=short,                                                                
##      YearsWithCurrManager=short}       => {Attrition=Yes} 0.01113967          1 6.377049    13
subset_rules1 <- which(colSums(is.subset(rules)) > 1)
rules <- sort(rules[-subset_rules1], by = "lift", descreasing = T)
inspect(head(rules, 5))
##     lhs                                   rhs                support confidence     lift count
## [1] {Age=[18,32),                                                                             
##      EducationField=Technical Degree,                                                         
##      MaritalStatus=Single}             => {Attrition=Yes} 0.01199657  0.7368421 4.698878    14
## [2] {BusinessTravel=Travel_Frequently,                                                        
##      JobRole=Sales Representative}     => {Attrition=Yes} 0.01285347  0.7142857 4.555035    15
## [3] {Age=[18,32),                                                                             
##      BusinessTravel=Travel_Frequently,                                                        
##      YearsAtCompany=short}             => {Attrition=Yes} 0.02056555  0.7058824 4.501446    24
## [4] {Age=[18,32),                                                                             
##      BusinessTravel=Travel_Frequently,                                                        
##      YearsWithCurrManager=short}       => {Attrition=Yes} 0.01628106  0.7037037 4.487553    19
## [5] {Age=[18,32),                                                                             
##      MaritalStatus=Single,                                                                    
##      YearsWithCurrManager=short}       => {Attrition=Yes} 0.02570694  0.6818182 4.347988    30
plot(rules, measure = c("support", "lift"), shading = "confidence")
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.


library(shiny) run_example(“01_hello”) ui <- fluidPage( titlePanel(“Dataset exploration”),

sidebarLayout( sidebarPanel( selectInput(inputId = “cleanSurvey”, label = “cleanSurvey”, choices = ls(name = “cleanSurvey”), selected = “Attrition”) ),

mainPanel(
  textOutput(outputId = "DatasetName")
)

) )

server <- function(input, output){} shinyApp(ui = ui, server = server)
ui

Report
###Analysis Report

####According to the data analysis part, several factors are significant related with “Attrition=Yes” and “Attrition = No”, I have found that: ####which influences “Attrition = Yes”: ####"Age=[18,32),
##EducationField=Technical Degree,
##MaritalStatus=Single}
##They like to change the job

####Which influences “Attrition = No” ####“StockOptionLevel=1st Quantile” ####“YearsSinceLastPromotion=Medium” ####They do not like change the job