trainingHR = read.table('C:/Classes/Stratigic Business Analysis/foundation/week 2/DATA_3.02_HR2.csv', header = T,sep=',')
testHR = read.table('DATA_4.02_HR3.csv', header = T,sep=',')
fitted = glm(left~., family = binomial(logit), data= trainingHR)
probaToLeave=predict(fitted,newdata=testHR,type="response")
predattrition = data.frame(probaToLeave)
which.min(predattrition[[1]])
## [1] 572
predattrition$performance=testHR$LPE
x<- which(predattrition$performance >0.9)
which.min(predattrition[x,1])
## [1] 46
predattrition[322,]
## probaToLeave performance
## 322 0.005690086 0.96
The Id is 322
data=read.table('DATA_4.03_MNT.csv',sep=',',header=TRUE)
linregmodel = lm(lifetime~.-broken,data=data)
dependantvars = Surv(data$lifetime, data$broken)
survreg = survreg(dependantvars~pressureInd+moistureInd+temperatureInd+team+provider, dist="gaussian",data=data)
summary(survreg)
##
## Call:
## survreg(formula = dependantvars ~ pressureInd + moistureInd +
## temperatureInd + team + provider, data = data, dist = "gaussian")
## Value Std. Error z p
## (Intercept) 8.04e+01 0.29371 273.574 0.00e+00
## pressureInd -7.14e-04 0.00122 -0.587 5.57e-01
## moistureInd 6.01e-03 0.00240 2.505 1.22e-02
## temperatureInd -1.04e-02 0.00121 -8.593 8.49e-18
## teamTeamB -5.67e-02 0.05882 -0.964 3.35e-01
## teamTeamC -6.22e+00 0.06132 -101.392 0.00e+00
## providerProvider2 1.25e+01 0.06665 187.464 0.00e+00
## providerProvider3 -1.44e+01 0.06275 -229.241 0.00e+00
## providerProvider4 7.92e+00 0.07056 112.233 0.00e+00
## Log(scale) -7.43e-01 0.03540 -20.998 6.86e-98
##
## Scale= 0.476
##
## Gaussian distribution
## Loglik(model)= -270.1 Loglik(intercept only)= -1557
## Chisq= 2573.75 on 8 degrees of freedom, p= 0
## Number of Newton-Raphson Iterations: 12
## n= 1000
NS NS S
survreg = survreg(dependantvars~pressureInd+moistureInd+temperatureInd, dist="gaussian",data=data)
summary(survreg)
##
## Call:
## survreg(formula = dependantvars ~ pressureInd + moistureInd +
## temperatureInd, data = data, dist = "gaussian")
## Value Std. Error z p
## (Intercept) 75.8525 5.9187 12.816 1.34e-37
## pressureInd 0.0228 0.0249 0.913 3.61e-01
## moistureInd 0.0394 0.0495 0.796 4.26e-01
## temperatureInd -0.0223 0.0251 -0.889 3.74e-01
## Log(scale) 2.3528 0.0340 69.187 0.00e+00
##
## Scale= 10.5
##
## Gaussian distribution
## Loglik(model)= -1555.9 Loglik(intercept only)= -1557
## Chisq= 2.19 on 3 degrees of freedom, p= 0.53
## Number of Newton-Raphson Iterations: 6
## n= 1000
NS NS NS
survreg = survreg(dependantvars~pressureInd+moistureInd+temperatureInd+team+provider, dist="gaussian",data=data)
Ebreak=predict(survreg, newdata=data, type="quantile", p=.5)
Forecast=data.frame(Ebreak)
Forecast$lifetime=data$lifetime
Forecast$broken=data$broken
Forecast$RemainingLT=Forecast$Ebreak-data$lifetime
which.max(Forecast$RemainingLT)
## [1] 53