options(repos = c(CRAN = "https://cloud.r-project.org"))
#Packages to install
install.packages("car")
## Installing package into 'C:/Users/chris/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'car' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\chris\AppData\Local\Temp\Rtmpee2ldJ\downloaded_packages
install.packages("rcompanion")
## Installing package into 'C:/Users/chris/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
##
## There is a binary version available but the source version is later:
## binary source needs_compilation
## rcompanion 2.5.0 2.5.1 FALSE
## installing the source package 'rcompanion'
install.packages("lmtest")
## Installing package into 'C:/Users/chris/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'lmtest' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\chris\AppData\Local\Temp\Rtmpee2ldJ\downloaded_packages
install.packages("pastecs")
## Installing package into 'C:/Users/chris/AppData/Local/R/win-library/4.5'
## (as 'lib' is unspecified)
## package 'pastecs' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\chris\AppData\Local\Temp\Rtmpee2ldJ\downloaded_packages
library("car")
## Warning: package 'car' was built under R version 4.5.2
## Loading required package: carData
library(rcompanion)
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.5.2
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library("pastecs")
## Warning: package 'pastecs' was built under R version 4.5.2
#Getting the data ready using a CSV file
Data <- read.csv(choose.files(),header = TRUE)
attach (Data)
options(scipen = 999)
#Create data frame with assignment 4 variables
DataFrameLR <- Data[, c("left", "satisfaction_level", "last_evaluation","average_monthly_hours")]
#Descriptive Statistics (mean and SD) and Correlation
stat.desc(DataFrameLR)
## left satisfaction_level last_evaluation
## nbr.val 14999.000000000 14999.000000000 14999.000000000
## nbr.null 11428.000000000 0.000000000 0.000000000
## nbr.na 0.000000000 0.000000000 0.000000000
## min 0.000000000 0.090000000 0.360000000
## max 1.000000000 1.000000000 1.000000000
## range 1.000000000 0.910000000 0.640000000
## sum 3571.000000000 9191.890000000 10740.810000000
## median 0.000000000 0.640000000 0.720000000
## mean 0.238082539 0.612833522 0.716101740
## SE.mean 0.003477772 0.002030128 0.001397637
## CI.mean.0.95 0.006816857 0.003979300 0.002739538
## var 0.181411338 0.061817201 0.029298864
## std.dev 0.425924099 0.248630651 0.171169111
## coef.var 1.788976636 0.405706676 0.239029039
## average_monthly_hours
## nbr.val 14999.0000000
## nbr.null 0.0000000
## nbr.na 0.0000000
## min 96.0000000
## max 310.0000000
## range 214.0000000
## sum 3015554.0000000
## median 200.0000000
## mean 201.0503367
## SE.mean 0.4077973
## CI.mean.0.95 0.7993325
## var 2494.3131748
## std.dev 49.9430994
## coef.var 0.2484109
cor(DataFrameLR)
## left satisfaction_level last_evaluation
## left 1.00000000 -0.38837498 0.00656712
## satisfaction_level -0.38837498 1.00000000 0.10502121
## last_evaluation 0.00656712 0.10502121 1.00000000
## average_monthly_hours 0.07128718 -0.02004811 0.33974180
## average_monthly_hours
## left 0.07128718
## satisfaction_level -0.02004811
## last_evaluation 0.33974180
## average_monthly_hours 1.00000000
#Linearity Assumption Interactions
Data$Log_Sat <- log(satisfaction_level)*satisfaction_level
Data$Log_Eval <- log(last_evaluation)*last_evaluation
Data$Log_Avg_Hours <- log(average_monthly_hours)*average_monthly_hours
attach(Data)
## The following objects are masked from Data (pos = 3):
##
## average_monthly_hours, department, last_evaluation, left,
## number_project, promotion_last_5years, salary, satisfaction_level,
## time_spend_company, Work_accident
names(Data)
## [1] "satisfaction_level" "last_evaluation" "number_project"
## [4] "average_monthly_hours" "time_spend_company" "Work_accident"
## [7] "left" "promotion_last_5years" "department"
## [10] "salary" "Log_Sat" "Log_Eval"
## [13] "Log_Avg_Hours"
#Test of Linearity Assumption Violations
model_assumption <- glm(left ~ satisfaction_level + last_evaluation + average_monthly_hours + Log_Sat + Log_Eval +
Log_Avg_Hours, data = Data, family = binomial(link = "logit"))
summary(model_assumption)
##
## Call:
## glm(formula = left ~ satisfaction_level + last_evaluation + average_monthly_hours +
## Log_Sat + Log_Eval + Log_Avg_Hours, family = binomial(link = "logit"),
## data = Data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 30.979109 1.029126 30.102 < 0.0000000000000002 ***
## satisfaction_level -3.906234 0.128646 -30.364 < 0.0000000000000002 ***
## last_evaluation -15.837704 0.730589 -21.678 < 0.0000000000000002 ***
## average_monthly_hours -0.426574 0.022868 -18.654 < 0.0000000000000002 ***
## Log_Sat 2.108074 0.355440 5.931 0.00000000301 ***
## Log_Eval 25.001119 1.120784 22.307 < 0.0000000000000002 ***
## Log_Avg_Hours 0.068116 0.003643 18.700 < 0.0000000000000002 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 16465 on 14998 degrees of freedom
## Residual deviance: 13185 on 14992 degrees of freedom
## AIC: 13199
##
## Number of Fisher Scoring iterations: 5
#LR Model
model = glm(left ~ satisfaction_level + last_evaluation + average_monthly_hours, data = Data, family =
binomial(link="logit"))
summary(model)
##
## Call:
## glm(formula = left ~ satisfaction_level + last_evaluation + average_monthly_hours,
## family = binomial(link = "logit"), data = Data)
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.3992286 0.1104972 3.613 0.000303 ***
## satisfaction_level -3.8139121 0.0879776 -43.351 < 0.0000000000000002 ***
## last_evaluation 0.2838230 0.1320874 2.149 0.031654 *
## average_monthly_hours 0.0018329 0.0004414 4.153 0.0000329 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 16465 on 14998 degrees of freedom
## Residual deviance: 14163 on 14995 degrees of freedom
## AIC: 14171
##
## Number of Fisher Scoring iterations: 4
#Chi-Squared Difference Significance
model_chi <- model$null.deviance - model$deviance
model_chi
## [1] 2302.049
chidf <- model$df.null - model$df.residual
chidf
## [1] 3
chisq.prob <- 1 - pchisq(model_chi, chidf)
chisq.prob
## [1] 0
#wald statistic
Anova(model, type="II", test="Wald")
## Analysis of Deviance Table (Type II tests)
##
## Response: left
## Df Chisq Pr(>Chisq)
## satisfaction_level 1 1879.3047 < 0.00000000000000022 ***
## last_evaluation 1 4.6171 0.03165 *
## average_monthly_hours 1 17.2440 0.00003287 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Odds Ratios
exp(model$coefficients)
## (Intercept) satisfaction_level last_evaluation
## 1.4906744 0.0220617 1.3281978
## average_monthly_hours
## 1.0018346
exp(confint(model))
## Waiting for profiling to be done...
## 2.5 % 97.5 %
## (Intercept) 1.20028074 1.85101188
## satisfaction_level 0.01855121 0.02619142
## last_evaluation 1.02522851 1.72070465
## average_monthly_hours 1.00096846 1.00270196
#Pseudo-R-squared
nagelkerke(model)
## $Models
##
## Model: "glm, left ~ satisfaction_level + last_evaluation + average_monthly_hours, binomial(link = \"logit\"), Data"
## Null: "glm, left ~ 1, binomial(link = \"logit\"), Data"
##
## $Pseudo.R.squared.for.model.vs.null
## Pseudo.R.squared
## McFadden 0.139817
## Cox and Snell (ML) 0.142282
## Nagelkerke (Cragg and Uhler) 0.213519
##
## $Likelihood.ratio.test
## Df.diff LogLik.diff Chisq p.value
## -3 -1151 2302 0
##
## $Number.of.observations
##
## Model: 14999
## Null: 14999
##
## $Messages
## [1] "Note: For models fit with REML, these statistics are based on refitting with ML"
##
## $Warnings
## [1] "None"
#Stay Odds Ratio (no code for this) -> 1/Odds ratio for each predictor – see example walk-through for more details on this.