Q1

trainingHR = read.table('C:/Classes/Stratigic Business Analysis/foundation/week 2/DATA_3.02_HR2.csv', header = T,sep=',')
testHR = read.table('DATA_4.02_HR3.csv', header = T,sep=',')

fitted = glm(left~., family = binomial(logit), data= trainingHR)
probaToLeave=predict(fitted,newdata=testHR,type="response")
predattrition = data.frame(probaToLeave)

 which.min(predattrition[[1]])
## [1] 572

Q2

predattrition$performance=testHR$LPE 
x<- which(predattrition$performance >0.9)
which.min(predattrition[x,1])
## [1] 46
predattrition[322,]
##     probaToLeave performance
## 322  0.005690086        0.96

The Id is 322

Q3

data=read.table('DATA_4.03_MNT.csv',sep=',',header=TRUE)
linregmodel = lm(lifetime~.-broken,data=data)
dependantvars = Surv(data$lifetime, data$broken)
survreg = survreg(dependantvars~pressureInd+moistureInd+temperatureInd+team+provider, dist="gaussian",data=data)
summary(survreg)
## 
## Call:
## survreg(formula = dependantvars ~ pressureInd + moistureInd + 
##     temperatureInd + team + provider, data = data, dist = "gaussian")
##                       Value Std. Error        z        p
## (Intercept)        8.04e+01    0.29371  273.574 0.00e+00
## pressureInd       -7.14e-04    0.00122   -0.587 5.57e-01
## moistureInd        6.01e-03    0.00240    2.505 1.22e-02
## temperatureInd    -1.04e-02    0.00121   -8.593 8.49e-18
## teamTeamB         -5.67e-02    0.05882   -0.964 3.35e-01
## teamTeamC         -6.22e+00    0.06132 -101.392 0.00e+00
## providerProvider2  1.25e+01    0.06665  187.464 0.00e+00
## providerProvider3 -1.44e+01    0.06275 -229.241 0.00e+00
## providerProvider4  7.92e+00    0.07056  112.233 0.00e+00
## Log(scale)        -7.43e-01    0.03540  -20.998 6.86e-98
## 
## Scale= 0.476 
## 
## Gaussian distribution
## Loglik(model)= -270.1   Loglik(intercept only)= -1557
##  Chisq= 2573.75 on 8 degrees of freedom, p= 0 
## Number of Newton-Raphson Iterations: 12 
## n= 1000

NS NS S

Q4

survreg = survreg(dependantvars~pressureInd+moistureInd+temperatureInd, dist="gaussian",data=data)
summary(survreg)
## 
## Call:
## survreg(formula = dependantvars ~ pressureInd + moistureInd + 
##     temperatureInd, data = data, dist = "gaussian")
##                  Value Std. Error      z        p
## (Intercept)    75.8525     5.9187 12.816 1.34e-37
## pressureInd     0.0228     0.0249  0.913 3.61e-01
## moistureInd     0.0394     0.0495  0.796 4.26e-01
## temperatureInd -0.0223     0.0251 -0.889 3.74e-01
## Log(scale)      2.3528     0.0340 69.187 0.00e+00
## 
## Scale= 10.5 
## 
## Gaussian distribution
## Loglik(model)= -1555.9   Loglik(intercept only)= -1557
##  Chisq= 2.19 on 3 degrees of freedom, p= 0.53 
## Number of Newton-Raphson Iterations: 6 
## n= 1000

NS NS NS

Q5

survreg = survreg(dependantvars~pressureInd+moistureInd+temperatureInd+team+provider, dist="gaussian",data=data)
Ebreak=predict(survreg, newdata=data, type="quantile", p=.5)
Forecast=data.frame(Ebreak)
Forecast$lifetime=data$lifetime
Forecast$broken=data$broken
Forecast$RemainingLT=Forecast$Ebreak-data$lifetime
which.max(Forecast$RemainingLT)
## [1] 53