setwd("C:/Job/ARS Data Science Test/Data")
data1 <-read.csv("inout data.csv")
view(data1)
data <-data1[-c(1001:2154),] %>% replace(.=="NULL", 0)
data[,13] = toupper(data[,13])
data$Referrals =as.numeric(data$Referrals)
data2 <- subset (data, select = -c(col21:col26,col6:col7,col28,col10,col11,col13,col15:col17))
view(data2)
#write.csv(data2, "Finalinout.csv")
sum(is.na(data2))
## [1] 0
#glimpse(data)
hist(data2$Referrals)
model <- lm(Referrals~. - col4 -ï..Id -col20 , data = data2)
summary(model)
##
## Call:
## lm(formula = Referrals ~ . - col4 - ï..Id - col20, data = data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -17.356 -3.347 -0.485 1.848 117.586
##
## Coefficients:
## Estimate
## (Intercept) -8.271e-01
## col1Attorney / Judge / Probation / Legal Support -1.636e-01
## col1Behavioral Health -2.941e+00
## col1Counseling Group / Private Practice 1.776e+00
## col1Employer / EAP / Union 2.656e+00
## col1Hospital 5.766e+00
## col1Insurance Company 7.039e+00
## col1Interventionist -1.440e+00
## col1Media 7.876e-01
## col1Monitoring programs -4.159e-01
## col1Other 2.180e+00
## col1Sober Living 1.556e+00
## col1Treatment Program 6.659e-01
## col2Development -8.497e+00
## col2Prospect -2.844e+00
## col3Cherry Hill 4.655e+00
## col3Columbus 2.536e+00
## col3NGV 1.963e+00
## col3ORC 4.663e-01
## col3Palmer Lake 3.847e+00
## col3Recovery Village Palm Beach 3.267e+00
## col3Ridgefield 5.267e+00
## col3TRV-MD 3.919e+00
## col3Umatilla 3.824e+00
## col5 -6.202e-04
## col8 3.089e+00
## col9Development -6.983e+00
## col9Prospect -3.127e-01
## col12AZ 1.317e+00
## col12CA -3.212e-01
## col12CO 2.167e+00
## col12CT -6.611e+00
## col12DE 4.468e+00
## col12FL 4.994e+00
## col12GA 9.384e-01
## col12ID 1.671e+00
## col12IL -3.147e-01
## col12IN 1.924e+00
## col12LA 1.319e+00
## col12MA -3.774e+00
## col12MD 2.813e+00
## col12MI -1.306e-01
## col12MO -7.995e-01
## col12MT -5.273e+00
## col12NC -1.425e+00
## col12NJ 1.300e+00
## col12NV -5.463e-01
## col12NY -2.113e+00
## col12OH 2.498e+00
## col12OK 4.004e+00
## col12OR 3.163e+00
## col12PA 1.772e-02
## col12SC -5.501e+00
## col12TN -1.276e+00
## col12TX -1.158e+00
## col12UT -7.650e+00
## col12WA 1.481e+00
## col12WV 2.183e+00
## col12WY 6.404e+00
## col14 8.308e-05
## col18Clinical/Psychiatric -5.319e-01
## col18Detox 5.223e+00
## col18General -4.591e+00
## col18Inpatient Psych -3.482e+00
## col18Not applicable 1.114e+00
## col18Not Applicable 1.844e+00
## col18Outpatient 2.794e+00
## col18Physician -1.247e+00
## col18Psychiatry -8.016e-01
## col18Residential treatment 6.913e+00
## col18Specialty -7.464e+00
## col19Addiction;Eating Disorders -2.404e+00
## col19Addiction;Eating Disorders;Mental Health -9.333e-01
## col19Addiction;Eating Disorders;Mental Health;Gambling -7.680e+00
## col19Addiction;Eating Disorders;Mental Health;Other 9.471e-01
## col19Addiction;Eating Disorders;Mental Health;Other;Gambling 3.872e+00
## col19Addiction;Eating Disorders;Other 1.567e+00
## col19Addiction;Mental Health -5.739e-03
## col19Addiction;Mental Health;Gambling 4.051e+00
## col19Addiction;Mental Health;Other 3.125e+00
## col19Addiction;Other 1.880e-01
## col19Eating Disorders 1.783e+00
## col19Eating Disorders;Mental Health -2.720e+00
## col19Eating Disorders;Mental Health;Other 3.455e+00
## col19Mental Health 7.553e-01
## col19Mental Health;Other -8.494e-01
## col19Not Applicable -4.894e-01
## col19Other 1.051e+00
## col27 1.067e-01
## Std. Error t value
## (Intercept) 8.112e+00 -0.102
## col1Attorney / Judge / Probation / Legal Support 6.080e+00 -0.027
## col1Behavioral Health 7.443e+00 -0.395
## col1Counseling Group / Private Practice 6.971e+00 0.255
## col1Employer / EAP / Union 1.044e+01 0.254
## col1Hospital 6.999e+00 0.824
## col1Insurance Company 8.807e+00 0.799
## col1Interventionist 6.605e+00 -0.218
## col1Media 8.245e+00 0.096
## col1Monitoring programs 7.139e+00 -0.058
## col1Other 5.593e+00 0.390
## col1Sober Living 5.518e+00 0.282
## col1Treatment Program 6.886e+00 0.097
## col2Development 1.654e+00 -5.138
## col2Prospect 9.869e-01 -2.881
## col3Cherry Hill 2.695e+00 1.727
## col3Columbus 2.554e+00 0.993
## col3NGV 5.006e+00 0.392
## col3ORC 1.754e+00 0.266
## col3Palmer Lake 2.955e+00 1.302
## col3Recovery Village Palm Beach 1.777e+00 1.838
## col3Ridgefield 4.337e+00 1.215
## col3TRV-MD 9.658e+00 0.406
## col3Umatilla 1.673e+00 2.286
## col5 5.363e-04 -1.157
## col8 7.601e-01 4.064
## col9Development 1.766e+00 -3.954
## col9Prospect 1.604e+00 -0.195
## col12AZ 7.159e+00 0.184
## col12CA 5.938e+00 -0.054
## col12CO 4.748e+00 0.456
## col12CT 7.758e+00 -0.852
## col12DE 1.022e+01 0.437
## col12FL 4.164e+00 1.199
## col12GA 5.858e+00 0.160
## col12ID 1.403e+01 0.119
## col12IL 7.106e+00 -0.044
## col12IN 6.414e+00 0.300
## col12LA 1.014e+01 0.130
## col12MA 7.757e+00 -0.487
## col12MD 5.591e+00 0.503
## col12MI 1.022e+01 -0.013
## col12MO 1.015e+01 -0.079
## col12MT 1.032e+01 -0.511
## col12NC 1.087e+01 -0.131
## col12NJ 4.417e+00 0.294
## col12NV 1.025e+01 -0.053
## col12NY 6.913e+00 -0.306
## col12OH 4.487e+00 0.557
## col12OK 1.039e+01 0.385
## col12OR 5.967e+00 0.530
## col12PA 4.574e+00 0.004
## col12SC 9.979e+00 -0.551
## col12TN 5.420e+00 -0.235
## col12TX 6.718e+00 -0.172
## col12UT 1.039e+01 -0.737
## col12WA 5.901e+00 0.251
## col12WV 7.994e+00 0.273
## col12WY 8.102e+00 0.790
## col14 4.797e-02 0.002
## col18Clinical/Psychiatric 1.783e+00 -0.298
## col18Detox 3.923e+00 1.331
## col18General 4.382e+00 -1.048
## col18Inpatient Psych 4.454e+00 -0.782
## col18Not applicable 3.113e+00 0.358
## col18Not Applicable 4.169e+00 0.442
## col18Outpatient 3.804e+00 0.734
## col18Physician 2.331e+00 -0.535
## col18Psychiatry 2.600e+00 -0.308
## col18Residential treatment 3.801e+00 1.819
## col18Specialty 6.239e+00 -1.196
## col19Addiction;Eating Disorders 6.631e+00 -0.362
## col19Addiction;Eating Disorders;Mental Health 1.479e+00 -0.631
## col19Addiction;Eating Disorders;Mental Health;Gambling 9.273e+00 -0.828
## col19Addiction;Eating Disorders;Mental Health;Other 2.019e+00 0.469
## col19Addiction;Eating Disorders;Mental Health;Other;Gambling 9.182e+00 0.422
## col19Addiction;Eating Disorders;Other 9.241e+00 0.170
## col19Addiction;Mental Health 9.966e-01 -0.006
## col19Addiction;Mental Health;Gambling 6.565e+00 0.617
## col19Addiction;Mental Health;Other 1.566e+00 1.995
## col19Addiction;Other 2.625e+00 0.072
## col19Eating Disorders 9.257e+00 0.193
## col19Eating Disorders;Mental Health 3.782e+00 -0.719
## col19Eating Disorders;Mental Health;Other 9.163e+00 0.377
## col19Mental Health 1.723e+00 0.438
## col19Mental Health;Other 2.572e+00 -0.330
## col19Not Applicable 2.542e+00 -0.193
## col19Other 1.570e+00 0.669
## col27 1.939e-02 5.501
## Pr(>|t|)
## (Intercept) 0.91881
## col1Attorney / Judge / Probation / Legal Support 0.97854
## col1Behavioral Health 0.69280
## col1Counseling Group / Private Practice 0.79897
## col1Employer / EAP / Union 0.79931
## col1Hospital 0.41022
## col1Insurance Company 0.42435
## col1Interventionist 0.82744
## col1Media 0.92392
## col1Monitoring programs 0.95355
## col1Other 0.69673
## col1Sober Living 0.77808
## col1Treatment Program 0.92298
## col2Development 3.39e-07 ***
## col2Prospect 0.00405 **
## col3Cherry Hill 0.08445 .
## col3Columbus 0.32097
## col3NGV 0.69510
## col3ORC 0.79038
## col3Palmer Lake 0.19328
## col3Recovery Village Palm Beach 0.06634 .
## col3Ridgefield 0.22482
## col3TRV-MD 0.68500
## col3Umatilla 0.02251 *
## col5 0.24776
## col8 5.24e-05 ***
## col9Development 8.27e-05 ***
## col9Prospect 0.84543
## col12AZ 0.85403
## col12CA 0.95687
## col12CO 0.64822
## col12CT 0.39430
## col12DE 0.66217
## col12FL 0.23072
## col12GA 0.87277
## col12ID 0.90518
## col12IL 0.96468
## col12IN 0.76426
## col12LA 0.89648
## col12MA 0.62672
## col12MD 0.61498
## col12MI 0.98980
## col12MO 0.93725
## col12MT 0.60944
## col12NC 0.89570
## col12NJ 0.76857
## col12NV 0.95752
## col12NY 0.75994
## col12OH 0.57783
## col12OK 0.70001
## col12OR 0.59616
## col12PA 0.99691
## col12SC 0.58156
## col12TN 0.81388
## col12TX 0.86313
## col12UT 0.46159
## col12WA 0.80186
## col12WV 0.78488
## col12WY 0.42946
## col14 0.99862
## col18Clinical/Psychiatric 0.76554
## col18Detox 0.18344
## col18General 0.29503
## col18Inpatient Psych 0.43456
## col18Not applicable 0.72067
## col18Not Applicable 0.65841
## col18Outpatient 0.46286
## col18Physician 0.59297
## col18Psychiatry 0.75794
## col18Residential treatment 0.06926 .
## col18Specialty 0.23191
## col19Addiction;Eating Disorders 0.71710
## col19Addiction;Eating Disorders;Mental Health 0.52812
## col19Addiction;Eating Disorders;Mental Health;Gambling 0.40780
## col19Addiction;Eating Disorders;Mental Health;Other 0.63905
## col19Addiction;Eating Disorders;Mental Health;Other;Gambling 0.67336
## col19Addiction;Eating Disorders;Other 0.86538
## col19Addiction;Mental Health 0.99541
## col19Addiction;Mental Health;Gambling 0.53735
## col19Addiction;Mental Health;Other 0.04630 *
## col19Addiction;Other 0.94293
## col19Eating Disorders 0.84734
## col19Eating Disorders;Mental Health 0.47211
## col19Eating Disorders;Mental Health;Other 0.70623
## col19Mental Health 0.66121
## col19Mental Health;Other 0.74127
## col19Not Applicable 0.84735
## col19Other 0.50335
## col27 4.91e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.016 on 911 degrees of freedom
## Multiple R-squared: 0.258, Adjusted R-squared: 0.1863
## F-statistic: 3.6 on 88 and 911 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(model, which=1:4)
## Warning: not plotting observations with leverage one:
## 63, 120, 289, 404, 420, 455, 508, 615, 749, 852, 866, 921, 928, 950, 956, 999
# Model Selection:
model.stepwise<-ols_step_both_p(model, pent = 0.05, prem = 0.05, details = FALSE)
model.stepwise
##
## Stepwise Selection Summary
## ---------------------------------------------------------------------------------------
## Added/ Adj.
## Step Variable Removed R-Square R-Square C(p) AIC RMSE
## ---------------------------------------------------------------------------------------
## 1 col2 addition 0.081 0.079 132.9010 7365.1441 9.5940
## 2 col9 addition 0.126 0.122 79.3040 7318.6411 9.3641
## 3 col27 addition 0.155 0.151 45.1320 7286.3589 9.2096
## 4 col18 addition 0.203 0.190 -11.9360 7249.7194 8.9934
## 5 col8 addition 0.216 0.202 -25.2360 7235.9527 8.9274
## ---------------------------------------------------------------------------------------
final = lm (Referrals~col2+col8+col9+col18+col27 ,data2)
summary(final)
##
## Call:
## lm(formula = Referrals ~ col2 + col8 + col9 + col18 + col27,
## data = data2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -14.100 -3.597 -0.510 1.830 119.768
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.69218 1.89513 3.531 0.000433 ***
## col2Development -8.02323 1.56700 -5.120 3.67e-07 ***
## col2Prospect -2.87904 0.84608 -3.403 0.000694 ***
## col8 2.65698 0.67259 3.950 8.36e-05 ***
## col9Development -6.51998 1.67332 -3.896 0.000104 ***
## col9Prospect -0.47425 1.52160 -0.312 0.755351
## col18Clinical/Psychiatric -0.41284 1.73359 -0.238 0.811823
## col18Detox 3.95293 1.41673 2.790 0.005370 **
## col18General -0.33847 1.14959 -0.294 0.768495
## col18Inpatient Psych 0.70887 1.50908 0.470 0.638649
## col18Not applicable 1.80479 1.56745 1.151 0.249843
## col18Not Applicable 1.15980 1.13556 1.021 0.307341
## col18Outpatient 1.60309 1.06836 1.501 0.133802
## col18Physician -0.40786 2.09785 -0.194 0.845889
## col18Psychiatry -0.07544 2.50483 -0.030 0.975979
## col18Residential treatment 5.09019 1.02254 4.978 7.59e-07 ***
## col18Specialty -2.52020 4.54696 -0.554 0.579527
## col27 0.10316 0.01643 6.280 5.08e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.927 on 982 degrees of freedom
## Multiple R-squared: 0.2158, Adjusted R-squared: 0.2023
## F-statistic: 15.9 on 17 and 982 DF, p-value: < 2.2e-16
plot(final, which = c(1:4))
# Final Model without Data Transformation (removed outliers):
final2 = lm (Referrals~col2+col8+col9+col18+col27 , data2[-c(110,236,434),] )
summary(final2)
##
## Call:
## lm(formula = Referrals ~ col2 + col8 + col9 + col18 + col27,
## data = data2[-c(110, 236, 434), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -11.548 -3.025 -0.505 1.362 81.702
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.92350 1.36300 4.346 1.53e-05 ***
## col2Development -6.57727 1.12782 -5.832 7.44e-09 ***
## col2Prospect -3.04597 0.60848 -5.006 6.59e-07 ***
## col8 1.58503 0.48493 3.269 0.00112 **
## col9Development -4.89945 1.20442 -4.068 5.13e-05 ***
## col9Prospect -0.79101 1.09419 -0.723 0.46990
## col18Clinical/Psychiatric -0.37065 1.24657 -0.297 0.76627
## col18Detox 1.75955 1.02480 1.717 0.08630 .
## col18General -0.59247 0.82669 -0.717 0.47375
## col18Inpatient Psych 0.65196 1.08515 0.601 0.54811
## col18Not applicable 1.86287 1.12710 1.653 0.09869 .
## col18Not Applicable 0.81386 0.81662 0.997 0.31919
## col18Outpatient 1.65336 0.76822 2.152 0.03163 *
## col18Physician -0.27669 1.50849 -0.183 0.85450
## col18Psychiatry 0.24200 1.80117 0.134 0.89315
## col18Residential treatment 4.19143 0.73646 5.691 1.66e-08 ***
## col18Specialty -1.94705 3.26962 -0.595 0.55165
## col27 0.09741 0.01182 8.242 5.40e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.419 on 979 degrees of freedom
## Multiple R-squared: 0.2648, Adjusted R-squared: 0.252
## F-statistic: 20.74 on 17 and 979 DF, p-value: < 2.2e-16
log.Referrals = log(data2$Referrals)
logdata = data.frame(cbind(data2, log.Referrals)) %>% replace(.=="-Inf", NA)
sum(is.na(logdata))
## [1] 178
sapply(logdata, function(x) sum(is.na(x)))
## ï..Id col1 col2 col3 col4
## 0 0 0 0 0
## col5 col8 col9 col12 col14
## 0 0 0 0 0
## col18 col19 col20 col27 Referrals
## 0 0 0 0 0
## log.Referrals
## 178
model2 = lm(log.Referrals~. - col4 -ï..Id -col20 -Referrals, data = logdata)
summary(model2)
##
## Call:
## lm(formula = log.Referrals ~ . - col4 - ï..Id - col20 - Referrals,
## data = logdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.25380 -0.48268 -0.01704 0.42072 2.68233
##
## Coefficients:
## Estimate
## (Intercept) 1.154e+00
## col1Attorney / Judge / Probation / Legal Support -9.007e-02
## col1Behavioral Health 1.184e-01
## col1Counseling Group / Private Practice -2.007e-01
## col1Employer / EAP / Union 5.862e-01
## col1Hospital 5.612e-01
## col1Insurance Company 5.720e-01
## col1Interventionist -2.350e-01
## col1Media -1.722e-01
## col1Monitoring programs -2.496e-01
## col1Other 3.654e-01
## col1Sober Living 4.237e-01
## col1Treatment Program -8.858e-01
## col2Development -1.061e+00
## col2Prospect -8.437e-01
## col3Cherry Hill 5.303e-01
## col3Columbus 4.602e-01
## col3NGV 2.331e-01
## col3ORC 2.007e-01
## col3Palmer Lake 4.263e-01
## col3Recovery Village Palm Beach 4.247e-01
## col3Ridgefield 6.780e-01
## col3TRV-MD 3.122e-01
## col3Umatilla 4.642e-01
## col5 -3.719e-05
## col8 2.270e-01
## col9Development -5.705e-01
## col9Prospect -1.297e-01
## col12AZ -4.984e-01
## col12CA -2.733e-01
## col12CO -2.705e-01
## col12CT -1.336e+00
## col12FL -3.493e-02
## col12GA -5.511e-01
## col12ID -8.356e-02
## col12IL -8.217e-01
## col12IN -8.065e-01
## col12MA -1.103e+00
## col12MD 1.811e-01
## col12MO -8.945e-01
## col12MT -1.681e+00
## col12NC -9.304e-01
## col12NJ -5.867e-01
## col12NV -4.327e-01
## col12NY -1.303e+00
## col12OH -4.441e-01
## col12OK -5.392e-01
## col12OR -3.875e-01
## col12PA -7.380e-01
## col12SC -3.867e-01
## col12TN -4.563e-01
## col12TX -4.664e-01
## col12UT -1.873e+00
## col12WA -4.257e-01
## col12WV -5.501e-01
## col12WY 6.153e-02
## col14 -5.016e-04
## col18Clinical/Psychiatric 7.459e-04
## col18Detox 1.289e+00
## col18General -7.052e-01
## col18Inpatient Psych -3.156e-01
## col18Not applicable 1.049e-01
## col18Not Applicable -1.804e-01
## col18Outpatient 1.181e+00
## col18Physician 1.047e-01
## col18Psychiatry 1.459e-01
## col18Residential treatment 1.431e+00
## col18Specialty -7.039e-01
## col19Addiction;Eating Disorders -6.321e-01
## col19Addiction;Eating Disorders;Mental Health -1.486e-01
## col19Addiction;Eating Disorders;Mental Health;Other 1.525e-01
## col19Addiction;Eating Disorders;Mental Health;Other;Gambling 3.516e-02
## col19Addiction;Mental Health -4.301e-02
## col19Addiction;Mental Health;Gambling 4.899e-01
## col19Addiction;Mental Health;Other 8.724e-02
## col19Addiction;Other 1.584e-01
## col19Eating Disorders;Mental Health -6.024e-01
## col19Mental Health -1.258e-02
## col19Mental Health;Other -4.023e-01
## col19Not Applicable -1.189e-01
## col19Other 4.457e-02
## col27 1.163e-02
## Std. Error t value
## (Intercept) 9.151e-01 1.261
## col1Attorney / Judge / Probation / Legal Support 5.505e-01 -0.164
## col1Behavioral Health 6.878e-01 0.172
## col1Counseling Group / Private Practice 8.430e-01 -0.238
## col1Employer / EAP / Union 9.413e-01 0.623
## col1Hospital 7.452e-01 0.753
## col1Insurance Company 8.804e-01 0.650
## col1Interventionist 6.324e-01 -0.372
## col1Media 9.424e-01 -0.183
## col1Monitoring programs 7.524e-01 -0.332
## col1Other 5.153e-01 0.709
## col1Sober Living 5.072e-01 0.835
## col1Treatment Program 8.132e-01 -1.089
## col2Development 2.307e-01 -4.601
## col2Prospect 1.251e-01 -6.745
## col3Cherry Hill 3.251e-01 1.631
## col3Columbus 4.326e-01 1.064
## col3NGV 6.277e-01 0.371
## col3ORC 2.026e-01 0.990
## col3Palmer Lake 3.771e-01 1.130
## col3Recovery Village Palm Beach 2.028e-01 2.094
## col3Ridgefield 5.354e-01 1.266
## col3TRV-MD 9.097e-01 0.343
## col3Umatilla 2.008e-01 2.311
## col5 5.361e-05 -0.694
## col8 7.612e-02 2.982
## col9Development 2.378e-01 -2.399
## col9Prospect 2.253e-01 -0.576
## col12AZ 7.688e-01 -0.648
## col12CA 7.054e-01 -0.387
## col12CO 5.984e-01 -0.452
## col12CT 8.059e-01 -1.658
## col12FL 5.362e-01 -0.065
## col12GA 7.136e-01 -0.772
## col12ID 1.342e+00 -0.062
## col12IL 8.844e-01 -0.929
## col12IN 8.166e-01 -0.988
## col12MA 7.944e-01 -1.389
## col12MD 6.521e-01 0.278
## col12MO 1.050e+00 -0.852
## col12MT 1.022e+00 -1.645
## col12NC 1.107e+00 -0.841
## col12NJ 5.899e-01 -0.995
## col12NV 1.014e+00 -0.427
## col12NY 7.593e-01 -1.716
## col12OH 6.549e-01 -0.678
## col12OK 1.026e+00 -0.526
## col12OR 7.478e-01 -0.518
## col12PA 6.067e-01 -1.217
## col12SC 9.794e-01 -0.395
## col12TN 6.319e-01 -0.722
## col12TX 7.189e-01 -0.649
## col12UT 1.023e+00 -1.832
## col12WA 7.492e-01 -0.568
## col12WV 8.883e-01 -0.619
## col12WY 1.035e+00 0.059
## col14 4.613e-03 -0.109
## col18Clinical/Psychiatric 1.826e-01 0.004
## col18Detox 5.736e-01 2.247
## col18General 5.944e-01 -1.187
## col18Inpatient Psych 5.994e-01 -0.526
## col18Not applicable 5.082e-01 0.206
## col18Not Applicable 6.854e-01 -0.263
## col18Outpatient 5.680e-01 2.079
## col18Physician 2.522e-01 0.415
## col18Psychiatry 3.155e-01 0.462
## col18Residential treatment 5.659e-01 2.529
## col18Specialty 7.524e-01 -0.936
## col19Addiction;Eating Disorders 8.260e-01 -0.765
## col19Addiction;Eating Disorders;Mental Health 1.482e-01 -1.003
## col19Addiction;Eating Disorders;Mental Health;Other 1.942e-01 0.785
## col19Addiction;Eating Disorders;Mental Health;Other;Gambling 8.286e-01 0.042
## col19Addiction;Mental Health 9.719e-02 -0.443
## col19Addiction;Mental Health;Gambling 5.967e-01 0.821
## col19Addiction;Mental Health;Other 1.544e-01 0.565
## col19Addiction;Other 2.691e-01 0.589
## col19Eating Disorders;Mental Health 3.959e-01 -1.521
## col19Mental Health 1.868e-01 -0.067
## col19Mental Health;Other 2.498e-01 -1.611
## col19Not Applicable 2.624e-01 -0.453
## col19Other 1.529e-01 0.291
## col27 1.790e-03 6.495
## Pr(>|t|)
## (Intercept) 0.20766
## col1Attorney / Judge / Probation / Legal Support 0.87008
## col1Behavioral Health 0.86336
## col1Counseling Group / Private Practice 0.81192
## col1Employer / EAP / Union 0.53360
## col1Hospital 0.45164
## col1Insurance Company 0.51607
## col1Interventionist 0.71030
## col1Media 0.85506
## col1Monitoring programs 0.74017
## col1Other 0.47847
## col1Sober Living 0.40382
## col1Treatment Program 0.27640
## col2Development 4.95e-06 ***
## col2Prospect 3.08e-11 ***
## col3Cherry Hill 0.10329
## col3Columbus 0.28777
## col3NGV 0.71049
## col3ORC 0.32234
## col3Palmer Lake 0.25866
## col3Recovery Village Palm Beach 0.03658 *
## col3Ridgefield 0.20574
## col3TRV-MD 0.73158
## col3Umatilla 0.02110 *
## col5 0.48807
## col8 0.00295 **
## col9Development 0.01669 *
## col9Prospect 0.56498
## col12AZ 0.51699
## col12CA 0.69854
## col12CO 0.65135
## col12CT 0.09780 .
## col12FL 0.94808
## col12GA 0.44017
## col12ID 0.95038
## col12IL 0.35318
## col12IN 0.32369
## col12MA 0.16535
## col12MD 0.78132
## col12MO 0.39473
## col12MT 0.10047
## col12NC 0.40080
## col12NJ 0.32024
## col12NV 0.66979
## col12NY 0.08667 .
## col12OH 0.49798
## col12OK 0.59922
## col12OR 0.60452
## col12PA 0.22418
## col12SC 0.69309
## col12TN 0.47041
## col12TX 0.51673
## col12UT 0.06739 .
## col12WA 0.57005
## col12WV 0.53594
## col12WY 0.95259
## col14 0.91345
## col18Clinical/Psychiatric 0.99674
## col18Detox 0.02495 *
## col18General 0.23580
## col18Inpatient Psych 0.59872
## col18Not applicable 0.83648
## col18Not Applicable 0.79248
## col18Outpatient 0.03795 *
## col18Physician 0.67797
## col18Psychiatry 0.64389
## col18Residential treatment 0.01163 *
## col18Specialty 0.34983
## col19Addiction;Eating Disorders 0.44434
## col19Addiction;Eating Disorders;Mental Health 0.31642
## col19Addiction;Eating Disorders;Mental Health;Other 0.43250
## col19Addiction;Eating Disorders;Mental Health;Other;Gambling 0.96617
## col19Addiction;Mental Health 0.65823
## col19Addiction;Mental Health;Gambling 0.41187
## col19Addiction;Mental Health;Other 0.57234
## col19Addiction;Other 0.55621
## col19Eating Disorders;Mental Health 0.12858
## col19Mental Health 0.94633
## col19Mental Health;Other 0.10770
## col19Not Applicable 0.65048
## col19Other 0.77083
## col27 1.52e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8117 on 740 degrees of freedom
## (178 observations deleted due to missingness)
## Multiple R-squared: 0.4133, Adjusted R-squared: 0.3491
## F-statistic: 6.436 on 81 and 740 DF, p-value: < 2.2e-16
hist(logdata$log.Referrals)
par(mfrow=c(2,2))
plot(model2, which=1:4)
## Warning: not plotting observations with leverage one:
## 63, 119, 137, 287, 363, 401, 417, 437, 452, 503, 607, 673, 674, 741
# Model Selection on Transformed Data:
model2.stepwise<-ols_step_both_p(model2, pent = 0.05, prem = 0.05, details = FALSE)
model2.stepwise
##
## Stepwise Selection Summary
## ---------------------------------------------------------------------------------------
## Added/ Adj.
## Step Variable Removed R-Square R-Square C(p) AIC RMSE
## ---------------------------------------------------------------------------------------
## 1 col2 addition 0.186 0.184 209.3460 2181.1070 0.9091
## 2 col18 addition 0.263 0.251 113.5900 2120.9145 0.8707
## 3 col27 addition 0.317 0.305 47.5430 2060.4277 0.8387
## 4 col9 addition 0.338 0.325 22.4940 2038.2063 0.8265
## 5 col3 addition 0.361 0.341 -4.4210 2027.2188 0.8166
## 6 col8 addition 0.369 0.348 -11.7890 2019.6043 0.8123
## ---------------------------------------------------------------------------------------
FinalModel = lm(log.Referrals~ col2+col18+col27+col9+col3+col8 , logdata[-c(237,136),] )
summary(FinalModel)
##
## Call:
## lm(formula = log.Referrals ~ col2 + col18 + col27 + col9 + col3 +
## col8, data = logdata[-c(237, 136), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.07346 -0.50751 -0.04967 0.43371 2.73841
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.804763 0.288781 2.787 0.00545 **
## col2Development -1.026684 0.223028 -4.603 4.84e-06 ***
## col2Prospect -0.840069 0.119040 -7.057 3.71e-12 ***
## col18Clinical/Psychiatric -0.013239 0.180158 -0.073 0.94144
## col18Detox 0.614603 0.143631 4.279 2.11e-05 ***
## col18General 0.082093 0.120322 0.682 0.49526
## col18Inpatient Psych 0.415699 0.155486 2.674 0.00766 **
## col18Not applicable 0.550695 0.200627 2.745 0.00619 **
## col18Not Applicable 0.218098 0.122137 1.786 0.07453 .
## col18Outpatient 0.463321 0.115057 4.027 6.20e-05 ***
## col18Physician 0.182575 0.236474 0.772 0.44030
## col18Psychiatry 0.165011 0.301059 0.548 0.58378
## col18Residential treatment 0.679216 0.111694 6.081 1.86e-09 ***
## col18Specialty 0.115145 0.480769 0.240 0.81078
## col27 0.011682 0.001618 7.222 1.20e-12 ***
## col9Development -0.541317 0.228917 -2.365 0.01829 *
## col9Prospect -0.153750 0.217495 -0.707 0.47983
## col3Cherry Hill -0.086209 0.208756 -0.413 0.67974
## col3Columbus 0.060550 0.200195 0.302 0.76239
## col3NGV 1.016626 0.839675 1.211 0.22636
## col3ORC 0.175211 0.194904 0.899 0.36895
## col3Palmer Lake 0.170806 0.195618 0.873 0.38284
## col3Recovery Village Palm Beach 0.365855 0.195622 1.870 0.06182 .
## col3Ridgefield 0.336324 0.203147 1.656 0.09820 .
## col3TRV-MD 0.038435 0.603291 0.064 0.94922
## col3Umatilla 0.398254 0.192630 2.067 0.03902 *
## col8 0.216468 0.069231 3.127 0.00183 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8113 on 793 degrees of freedom
## (178 observations deleted due to missingness)
## Multiple R-squared: 0.3706, Adjusted R-squared: 0.3499
## F-statistic: 17.96 on 26 and 793 DF, p-value: < 2.2e-16
1: After Log Transformation we found Model 2 is Normally Distributed with 37% of variance explained by Referrals which is a good model sign.
F- Statistics Rate is 17.96 and as we found P value is very small hence we can say our Model is Significant and can explain the relationships between Predictors and Response Variable.
col18Detox 0.614603 0.143631 4.279 2.11e-05 ***
col18Inpatient Psych 0.415699 0.155486 2.674 0.00766 **
col18Outpatient 0.463321 0.115057 4.027 6.20e-05 ***
col18Residential treatment 0.679216 0.111694 6.081 1.86e-09 ***
col27 0.011682 0.001618 7.222 1.20e-12 ***
col3Umatilla 0.398254 0.192630 2.067 0.03902 *
col8 0.216468 0.069231 3.127 0.00183 **
Prioritization of account can be as below:
Accounts which provides Residential Treatments (67%) , Detox (61%) , Outpatient (46%) , Inpatient Psych (41%) services and also factors can be considered col 8(21%) and col27(11%)and most of the referrals are received at location Umatilla.