setwd("C:/Job/ARS Data Science Test/Data")
data1 <-read.csv("inout data.csv")
view(data1)
data <-data1[-c(1001:2154),] %>% replace(.=="NULL", NA)
data[,13] = toupper(data[,13])
data$Referrals =as.numeric(data$Referrals)
sum(is.na(data))
## [1] 5249
#glimpse(data)
sapply(data, function(x) sum(is.na(x)))
## ï..Id col1 col2 col3 col4 col5 col6 col7
## 0 0 0 0 0 0 721 721
## col8 col9 col10 col11 col12 col13 col14 col15
## 0 0 7 8 7 8 0 0
## col16 col17 col18 col19 col20 col21 col22 col23
## 0 0 0 0 0 601 601 601
## col24 col25 col26 col27 col28 Referrals
## 601 601 602 0 0 170
sapply(data, function(x) sum(is.na(x)))
## ï..Id col1 col2 col3 col4 col5 col6 col7
## 0 0 0 0 0 0 721 721
## col8 col9 col10 col11 col12 col13 col14 col15
## 0 0 7 8 7 8 0 0
## col16 col17 col18 col19 col20 col21 col22 col23
## 0 0 0 0 0 601 601 601
## col24 col25 col26 col27 col28 Referrals
## 601 601 602 0 0 170
na.obs = is.na(data)
imp_data = mice(data [ ,-c(7,8,11,13,12,14,17,18,21,22,23,24,25,26,27,28,29)], m=5)
##
## iter imp variable
## 1 1 Referrals
## 1 2 Referrals
## 1 3 Referrals
## 1 4 Referrals
## 1 5 Referrals
## 2 1 Referrals
## 2 2 Referrals
## 2 3 Referrals
## 2 4 Referrals
## 2 5 Referrals
## 3 1 Referrals
## 3 2 Referrals
## 3 3 Referrals
## 3 4 Referrals
## 3 5 Referrals
## 4 1 Referrals
## 4 2 Referrals
## 4 3 Referrals
## 4 4 Referrals
## 4 5 Referrals
## 5 1 Referrals
## 5 2 Referrals
## 5 3 Referrals
## 5 4 Referrals
## 5 5 Referrals
## Warning: Number of logged events: 8
complete.data = complete(imp_data)
sapply(complete.data, function(x) sum(is.na(x)))
## ï..Id col1 col2 col3 col4 col5 col8 col9
## 0 0 0 0 0 0 0 0
## col14 col15 col18 col19 Referrals
## 0 0 0 0 0
#na.omit(complete.data)
sum(is.na(complete.data))
## [1] 0
sapply(complete.data, function(x) sum(is.na(x)))
## ï..Id col1 col2 col3 col4 col5 col8 col9
## 0 0 0 0 0 0 0 0
## col14 col15 col18 col19 Referrals
## 0 0 0 0 0
#write.csv(complete.data,"inoutdata.csv")
#view(complete.data)
#counts <- table(complete.data$col3,complete.data$col12)
#barplot(counts, main="Statewise Distribution of centers",
# xlab="States",
# legend = rownames(complete.data$Referrals))
lmRefer <- lm(Referrals ~ col1+col2+col3+col5+col8+col9+col18+col19+col14, data= complete.data)
summary(lmRefer)
##
## Call:
## lm(formula = Referrals ~ col1 + col2 + col3 + col5 + col8 + col9 +
## col18 + col19 + col14, data = complete.data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -16.938 -3.670 -0.839 1.831 119.679
##
## Coefficients:
## Estimate
## (Intercept) -0.7027062
## col1Attorney / Judge / Probation / Legal Support -1.2384183
## col1Behavioral Health -3.8723880
## col1Counseling Group / Private Practice 0.7326752
## col1Employer / EAP / Union 0.4902806
## col1Hospital 5.9447072
## col1Insurance Company 7.4503163
## col1Interventionist -2.7750108
## col1Media -0.1081827
## col1Monitoring programs -2.7466375
## col1Other 0.1916368
## col1Sober Living -0.1367038
## col1Treatment Program 0.0332272
## col2Development -5.2329346
## col2Prospect -1.9038824
## col3Cherry Hill 1.0191401
## col3Columbus 0.5976424
## col3NGV 5.2669217
## col3ORC 1.5170182
## col3Palmer Lake 1.6395726
## col3Recovery Village Palm Beach 3.2226990
## col3Ridgefield 2.7858313
## col3TRV-MD 1.4942374
## col3Umatilla 3.9895017
## col5 0.0003215
## col8 3.3364798
## col9Development -2.6602705
## col9Prospect 3.3540096
## col18Clinical/Psychiatric -0.9379108
## col18Detox 4.7329108
## col18General -4.4592846
## col18Inpatient Psych -2.8040853
## col18Not applicable -0.2777963
## col18Not Applicable 1.7117871
## col18Outpatient 1.7708124
## col18Physician -2.4277486
## col18Psychiatry -0.4690872
## col18Residential treatment 5.4718596
## col18Specialty -7.6973649
## col19Addiction;Eating Disorders -3.9087300
## col19Addiction;Eating Disorders;Mental Health -1.0463625
## col19Addiction;Eating Disorders;Mental Health;Gambling 1.1602280
## col19Addiction;Eating Disorders;Mental Health;Other 0.5250376
## col19Addiction;Eating Disorders;Mental Health;Other;Gambling 2.9601317
## col19Addiction;Eating Disorders;Other 7.5533210
## col19Addiction;Mental Health -0.1294519
## col19Addiction;Mental Health;Gambling 4.4877488
## col19Addiction;Mental Health;Other 2.6067869
## col19Addiction;Other -0.3806990
## col19Eating Disorders -0.2772918
## col19Eating Disorders;Mental Health -4.6116695
## col19Eating Disorders;Mental Health;Other 4.0900000
## col19Mental Health 0.7086089
## col19Mental Health;Other 0.1106400
## col19Not Applicable -0.6711313
## col19Other 1.8552807
## col14 0.0308334
## Std. Error t value
## (Intercept) 7.9579060 -0.088
## col1Attorney / Judge / Probation / Legal Support 6.2789724 -0.197
## col1Behavioral Health 7.6649827 -0.505
## col1Counseling Group / Private Practice 7.1442694 0.103
## col1Employer / EAP / Union 10.7886459 0.045
## col1Hospital 7.1510367 0.831
## col1Insurance Company 9.0424713 0.824
## col1Interventionist 6.8087330 -0.408
## col1Media 8.5219933 -0.013
## col1Monitoring programs 7.1962858 -0.382
## col1Other 5.7443761 0.033
## col1Sober Living 5.6917640 -0.024
## col1Treatment Program 7.0025372 0.005
## col2Development 1.6727474 -3.128
## col2Prospect 0.9692309 -1.964
## col3Cherry Hill 1.9346800 0.527
## col3Columbus 1.8316335 0.326
## col3NGV 5.1416989 1.024
## col3ORC 1.7625653 0.861
## col3Palmer Lake 1.7946611 0.914
## col3Recovery Village Palm Beach 1.7963730 1.794
## col3Ridgefield 1.9100796 1.458
## col3TRV-MD 6.9708000 0.214
## col3Umatilla 1.6774704 2.378
## col5 0.0005146 0.625
## col8 0.7666409 4.352
## col9Development 1.7896392 -1.486
## col9Prospect 1.6325246 2.054
## col18Clinical/Psychiatric 1.8334818 -0.512
## col18Detox 3.9487622 1.199
## col18General 4.4405908 -1.004
## col18Inpatient Psych 4.5175062 -0.621
## col18Not applicable 3.1510414 -0.088
## col18Not Applicable 4.2246248 0.405
## col18Outpatient 3.8265191 0.463
## col18Physician 2.3934988 -1.014
## col18Psychiatry 2.6796518 -0.175
## col18Residential treatment 3.8039484 1.438
## col18Specialty 6.3930015 -1.204
## col19Addiction;Eating Disorders 6.8461083 -0.571
## col19Addiction;Eating Disorders;Mental Health 1.4756151 -0.709
## col19Addiction;Eating Disorders;Mental Health;Gambling 9.5749221 0.121
## col19Addiction;Eating Disorders;Mental Health;Other 2.0422158 0.257
## col19Addiction;Eating Disorders;Mental Health;Other;Gambling 9.4880534 0.312
## col19Addiction;Eating Disorders;Other 9.5417450 0.792
## col19Addiction;Mental Health 1.0069162 -0.129
## col19Addiction;Mental Health;Gambling 6.7833351 0.662
## col19Addiction;Mental Health;Other 1.5951212 1.634
## col19Addiction;Other 2.6553931 -0.143
## col19Eating Disorders 9.5558243 -0.029
## col19Eating Disorders;Mental Health 3.7397775 -1.233
## col19Eating Disorders;Mental Health;Other 9.4704427 0.432
## col19Mental Health 1.7595460 0.403
## col19Mental Health;Other 2.5729083 0.043
## col19Not Applicable 2.5821648 -0.260
## col19Other 1.5891525 1.167
## col14 0.0453789 0.679
## Pr(>|t|)
## (Intercept) 0.92965
## col1Attorney / Judge / Probation / Legal Support 0.84369
## col1Behavioral Health 0.61353
## col1Counseling Group / Private Practice 0.91834
## col1Employer / EAP / Union 0.96376
## col1Hospital 0.40601
## col1Insurance Company 0.41019
## col1Interventionist 0.68368
## col1Media 0.98987
## col1Monitoring programs 0.70279
## col1Other 0.97339
## col1Sober Living 0.98084
## col1Treatment Program 0.99622
## col2Development 0.00181 **
## col2Prospect 0.04979 *
## col3Cherry Hill 0.59847
## col3Columbus 0.74428
## col3NGV 0.30593
## col3ORC 0.38963
## col3Palmer Lake 0.36117
## col3Recovery Village Palm Beach 0.07313 .
## col3Ridgefield 0.14504
## col3TRV-MD 0.83032
## col3Umatilla 0.01759 *
## col5 0.53227
## col8 1.5e-05 ***
## col9Development 0.13749
## col9Prospect 0.04020 *
## col18Clinical/Psychiatric 0.60909
## col18Detox 0.23099
## col18General 0.31554
## col18Inpatient Psych 0.53494
## col18Not applicable 0.92977
## col18Not Applicable 0.68543
## col18Outpatient 0.64363
## col18Physician 0.31070
## col18Psychiatry 0.86107
## col18Residential treatment 0.15063
## col18Specialty 0.22888
## col19Addiction;Eating Disorders 0.56818
## col19Addiction;Eating Disorders;Mental Health 0.47844
## col19Addiction;Eating Disorders;Mental Health;Gambling 0.90358
## col19Addiction;Eating Disorders;Mental Health;Other 0.79716
## col19Addiction;Eating Disorders;Mental Health;Other;Gambling 0.75512
## col19Addiction;Eating Disorders;Other 0.42879
## col19Addiction;Mental Health 0.89773
## col19Addiction;Mental Health;Gambling 0.50840
## col19Addiction;Mental Health;Other 0.10255
## col19Addiction;Other 0.88603
## col19Eating Disorders 0.97686
## col19Eating Disorders;Mental Health 0.21783
## col19Eating Disorders;Mental Health;Other 0.66593
## col19Mental Health 0.68724
## col19Mental Health;Other 0.96571
## col19Not Applicable 0.79499
## col19Other 0.24332
## col14 0.49701
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.324 on 943 degrees of freedom
## Multiple R-squared: 0.1846, Adjusted R-squared: 0.1362
## F-statistic: 3.813 on 56 and 943 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(lmRefer, which=1:4)
## Warning: not plotting observations with leverage one:
## 615, 866, 921, 928, 956
# log transformation of Response Variable
log.Referrals = log(complete.data$Referrals)
newdata = data.frame(cbind(complete.data, log.Referrals)) %>% replace(.=="-Inf", NA)%>%drop_na()
sum(is.na(newdata))
## [1] 0
view(newdata)
sapply(newdata, function(x) sum(is.na(x)))
## ï..Id col1 col2 col3 col4
## 0 0 0 0 0
## col5 col8 col9 col14 col15
## 0 0 0 0 0
## col18 col19 Referrals log.Referrals
## 0 0 0 0
#write.csv(newdata, "logdata.csv")
log.lmRefer <- lm(log.Referrals~ col1+col2+col3+col5+col8+col9+col18+col19+col14, data= newdata)
summary(log.lmRefer)
##
## Call:
## lm(formula = log.Referrals ~ col1 + col2 + col3 + col5 + col8 +
## col9 + col18 + col19 + col14, data = newdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.03689 -0.57092 -0.08289 0.44937 2.94913
##
## Coefficients:
## Estimate
## (Intercept) 4.306e-02
## col1Attorney / Judge / Probation / Legal Support -1.187e-01
## col1Behavioral Health 1.386e-01
## col1Counseling Group / Private Practice 5.312e-02
## col1Employer / EAP / Union 3.953e-01
## col1Hospital 4.888e-01
## col1Insurance Company 7.028e-01
## col1Interventionist -3.092e-01
## col1Media -1.605e-01
## col1Monitoring programs 4.009e-03
## col1Other 2.578e-01
## col1Sober Living 3.097e-01
## col1Treatment Program -2.835e-01
## col2Development -4.309e-01
## col2Prospect -4.646e-01
## col3Cherry Hill -5.955e-02
## col3Columbus -9.896e-03
## col3NGV 7.686e-01
## col3ORC 1.587e-01
## col3Palmer Lake 7.494e-02
## col3Recovery Village Palm Beach 2.853e-01
## col3Ridgefield 2.325e-01
## col3TRV-MD 2.967e-02
## col3Umatilla 3.042e-01
## col5 8.728e-05
## col8 1.974e-01
## col9Development 1.654e-01
## col9Prospect 4.892e-01
## col18Clinical/Psychiatric -3.101e-02
## col18Detox 8.426e-01
## col18General -3.312e-01
## col18Inpatient Psych 9.147e-02
## col18Not applicable 1.611e-01
## col18Not Applicable -5.855e-02
## col18Outpatient 6.746e-01
## col18Physician -1.054e-02
## col18Psychiatry 2.192e-02
## col18Residential treatment 8.936e-01
## col18Specialty -8.814e-02
## col19Addiction;Eating Disorders -6.518e-01
## col19Addiction;Eating Disorders;Mental Health -7.875e-02
## col19Addiction;Eating Disorders;Mental Health;Gambling 1.133e+00
## col19Addiction;Eating Disorders;Mental Health;Other 1.311e-01
## col19Addiction;Eating Disorders;Mental Health;Other;Gambling -2.776e-05
## col19Addiction;Eating Disorders;Other 1.864e+00
## col19Addiction;Mental Health -8.573e-03
## col19Addiction;Mental Health;Gambling 4.395e-01
## col19Addiction;Mental Health;Other 4.086e-02
## col19Addiction;Other 4.677e-02
## col19Eating Disorders -3.927e-01
## col19Eating Disorders;Mental Health -5.111e-01
## col19Eating Disorders;Mental Health;Other 8.933e-01
## col19Mental Health 2.631e-02
## col19Mental Health;Other -1.076e-01
## col19Not Applicable -4.013e-02
## col19Other 2.107e-01
## col14 3.811e-03
## Std. Error t value
## (Intercept) 7.458e-01 0.058
## col1Attorney / Judge / Probation / Legal Support 5.839e-01 -0.203
## col1Behavioral Health 7.132e-01 0.194
## col1Counseling Group / Private Practice 6.682e-01 0.080
## col1Employer / EAP / Union 1.003e+00 0.394
## col1Hospital 6.690e-01 0.731
## col1Insurance Company 8.430e-01 0.834
## col1Interventionist 6.334e-01 -0.488
## col1Media 7.925e-01 -0.202
## col1Monitoring programs 6.697e-01 0.006
## col1Other 5.343e-01 0.483
## col1Sober Living 5.293e-01 0.585
## col1Treatment Program 6.551e-01 -0.433
## col2Development 1.560e-01 -2.763
## col2Prospect 9.066e-02 -5.125
## col3Cherry Hill 1.814e-01 -0.328
## col3Columbus 1.718e-01 -0.058
## col3NGV 4.787e-01 1.605
## col3ORC 1.655e-01 0.959
## col3Palmer Lake 1.683e-01 0.445
## col3Recovery Village Palm Beach 1.685e-01 1.694
## col3Ridgefield 1.794e-01 1.296
## col3TRV-MD 6.488e-01 0.046
## col3Umatilla 1.573e-01 1.933
## col5 4.809e-05 1.815
## col8 7.159e-02 2.757
## col9Development 1.667e-01 0.992
## col9Prospect 1.519e-01 3.221
## col18Clinical/Psychiatric 1.706e-01 -0.182
## col18Detox 3.677e-01 2.292
## col18General 4.130e-01 -0.802
## col18Inpatient Psych 4.202e-01 0.218
## col18Not applicable 2.931e-01 0.550
## col18Not Applicable 3.969e-01 -0.147
## col18Outpatient 3.560e-01 1.895
## col18Physician 2.229e-01 -0.047
## col18Psychiatry 2.495e-01 0.088
## col18Residential treatment 3.538e-01 2.525
## col18Specialty 5.945e-01 -0.148
## col19Addiction;Eating Disorders 6.367e-01 -1.024
## col19Addiction;Eating Disorders;Mental Health 1.383e-01 -0.569
## col19Addiction;Eating Disorders;Mental Health;Gambling 8.908e-01 1.272
## col19Addiction;Eating Disorders;Mental Health;Other 1.903e-01 0.689
## col19Addiction;Eating Disorders;Mental Health;Other;Gambling 8.825e-01 0.000
## col19Addiction;Eating Disorders;Other 8.876e-01 2.101
## col19Addiction;Mental Health 9.415e-02 -0.091
## col19Addiction;Mental Health;Gambling 6.310e-01 0.697
## col19Addiction;Mental Health;Other 1.486e-01 0.275
## col19Addiction;Other 2.471e-01 0.189
## col19Eating Disorders 8.890e-01 -0.442
## col19Eating Disorders;Mental Health 3.479e-01 -1.469
## col19Eating Disorders;Mental Health;Other 8.807e-01 1.014
## col19Mental Health 1.647e-01 0.160
## col19Mental Health;Other 2.395e-01 -0.449
## col19Not Applicable 2.406e-01 -0.167
## col19Other 1.488e-01 1.417
## col14 4.226e-03 0.902
## Pr(>|t|)
## (Intercept) 0.95397
## col1Attorney / Judge / Probation / Legal Support 0.83892
## col1Behavioral Health 0.84595
## col1Counseling Group / Private Practice 0.93665
## col1Employer / EAP / Union 0.69366
## col1Hospital 0.46521
## col1Insurance Company 0.40470
## col1Interventionist 0.62558
## col1Media 0.83958
## col1Monitoring programs 0.99522
## col1Other 0.62949
## col1Sober Living 0.55863
## col1Treatment Program 0.66527
## col2Development 0.00585 **
## col2Prospect 3.62e-07 ***
## col3Cherry Hill 0.74281
## col3Columbus 0.95407
## col3NGV 0.10873
## col3ORC 0.33797
## col3Palmer Lake 0.65612
## col3Recovery Village Palm Beach 0.09069 .
## col3Ridgefield 0.19522
## col3TRV-MD 0.96354
## col3Umatilla 0.05354 .
## col5 0.06983 .
## col8 0.00595 **
## col9Development 0.32143
## col9Prospect 0.00132 **
## col18Clinical/Psychiatric 0.85580
## col18Detox 0.02215 *
## col18General 0.42283
## col18Inpatient Psych 0.82771
## col18Not applicable 0.58266
## col18Not Applicable 0.88277
## col18Outpatient 0.05839 .
## col18Physician 0.96229
## col18Psychiatry 0.93002
## col18Residential treatment 0.01172 *
## col18Specialty 0.88218
## col19Addiction;Eating Disorders 0.30625
## col19Addiction;Eating Disorders;Mental Health 0.56921
## col19Addiction;Eating Disorders;Mental Health;Gambling 0.20373
## col19Addiction;Eating Disorders;Mental Health;Other 0.49113
## col19Addiction;Eating Disorders;Mental Health;Other;Gambling 0.99997
## col19Addiction;Eating Disorders;Other 0.03595 *
## col19Addiction;Mental Health 0.92746
## col19Addiction;Mental Health;Gambling 0.48626
## col19Addiction;Mental Health;Other 0.78342
## col19Addiction;Other 0.84991
## col19Eating Disorders 0.65879
## col19Eating Disorders;Mental Health 0.14214
## col19Eating Disorders;Mental Health;Other 0.31069
## col19Mental Health 0.87317
## col19Mental Health;Other 0.65332
## col19Not Applicable 0.86759
## col19Other 0.15696
## col14 0.36743
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8671 on 934 degrees of freedom
## Multiple R-squared: 0.2694, Adjusted R-squared: 0.2256
## F-statistic: 6.151 on 56 and 934 DF, p-value: < 2.2e-16
par(mfrow=c(2,2))
plot(log.lmRefer, which=1:4)
## Warning: not plotting observations with leverage one:
## 607, 858, 912, 919, 947
# Hostogram of Response Variable(Referrals before and after log transformation)
hist(complete.data$Referrals)
hist(newdata$Referrals)
hist(newdata$log.Referrals)
# Model Selection:
model.stepwise<-ols_step_both_p(log.lmRefer, pent = 0.05, prem = 0.05, details = FALSE)
### stepwise selection result
### output in class note p26
model.stepwise
##
## Stepwise Selection Summary
## ---------------------------------------------------------------------------------------
## Added/ Adj.
## Step Variable Removed R-Square R-Square C(p) AIC RMSE
## ---------------------------------------------------------------------------------------
## 1 col2 addition 0.135 0.134 118.2360 2645.7456 0.9171
## 2 col18 addition 0.189 0.178 52.0660 2604.6556 0.8933
## 3 col5 addition 0.206 0.195 31.8460 2585.1918 0.8842
## 4 col9 addition 0.226 0.213 8.2660 2563.8923 0.8738
## 5 col8 addition 0.232 0.218 3.3620 2558.9525 0.8712
## 6 col3 addition 0.247 0.227 -14.1360 2557.0855 0.8666
## ---------------------------------------------------------------------------------------
final = lm (log.Referrals~col2+col5+col8+col9+col18 , newdata)
summary(final)
##
## Call:
## lm(formula = log.Referrals ~ col2 + col5 + col8 + col9 + col18,
## data = newdata)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9283 -0.5877 -0.1407 0.4925 3.1322
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.779e-01 1.904e-01 2.510 0.012249 *
## col2Development -4.834e-01 1.531e-01 -3.157 0.001644 **
## col2Prospect -4.188e-01 8.244e-02 -5.080 4.51e-07 ***
## col5 1.595e-04 3.666e-05 4.351 1.50e-05 ***
## col8 1.753e-01 6.705e-02 2.615 0.009063 **
## col9Development 9.364e-02 1.638e-01 0.572 0.567627
## col9Prospect 4.187e-01 1.489e-01 2.813 0.005009 **
## col18Clinical/Psychiatric -2.434e-02 1.693e-01 -0.144 0.885714
## col18Detox 4.638e-01 1.393e-01 3.328 0.000906 ***
## col18General 1.700e-01 1.080e-01 1.574 0.115802
## col18Inpatient Psych 5.289e-01 1.429e-01 3.702 0.000226 ***
## col18Not applicable 3.541e-01 1.543e-01 2.295 0.021930 *
## col18Not Applicable 9.225e-02 1.110e-01 0.831 0.406127
## col18Outpatient 3.160e-01 1.050e-01 3.011 0.002674 **
## col18Physician 1.451e-01 2.048e-01 0.708 0.479001
## col18Psychiatry 3.672e-02 2.445e-01 0.150 0.880654
## col18Residential treatment 5.560e-01 1.010e-01 5.504 4.75e-08 ***
## col18Specialty 4.318e-01 4.431e-01 0.974 0.330076
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8712 on 973 degrees of freedom
## Multiple R-squared: 0.2316, Adjusted R-squared: 0.2182
## F-statistic: 17.25 on 17 and 973 DF, p-value: < 2.2e-16
plot(final, which = c(1:4))
# Final Linear Model and its representation after removing outliers
final2 = lm (log.Referrals~col2+col5+col8+col9+col18 , newdata[-c(919,876,882,953,847, 834),] )
summary(final2)
##
## Call:
## lm(formula = log.Referrals ~ col2 + col5 + col8 + col9 + col18,
## data = newdata[-c(919, 876, 882, 953, 847, 834), ])
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.9263 -0.5821 -0.1327 0.4824 3.1893
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.205e-01 1.937e-01 2.688 0.007315 **
## col2Development -5.273e-01 1.573e-01 -3.352 0.000833 ***
## col2Prospect -4.169e-01 8.269e-02 -5.042 5.49e-07 ***
## col5 1.577e-04 3.669e-05 4.298 1.89e-05 ***
## col8 1.644e-01 6.722e-02 2.446 0.014635 *
## col9Development 4.945e-02 1.676e-01 0.295 0.767964
## col9Prospect 3.746e-01 1.532e-01 2.446 0.014638 *
## col18Clinical/Psychiatric -1.429e-02 1.693e-01 -0.084 0.932728
## col18Detox 4.703e-01 1.393e-01 3.377 0.000763 ***
## col18General 1.747e-01 1.081e-01 1.617 0.106247
## col18Inpatient Psych 5.381e-01 1.429e-01 3.766 0.000176 ***
## col18Not applicable 3.822e-01 1.556e-01 2.457 0.014187 *
## col18Not Applicable 9.312e-02 1.114e-01 0.836 0.403246
## col18Outpatient 3.254e-01 1.051e-01 3.096 0.002017 **
## col18Physician 1.496e-01 2.046e-01 0.731 0.464740
## col18Psychiatry -2.987e-02 2.529e-01 -0.118 0.906018
## col18Residential treatment 5.699e-01 1.015e-01 5.617 2.55e-08 ***
## col18Specialty 4.380e-01 4.424e-01 0.990 0.322439
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8697 on 967 degrees of freedom
## Multiple R-squared: 0.2337, Adjusted R-squared: 0.2202
## F-statistic: 17.35 on 17 and 967 DF, p-value: < 2.2e-16
plot(final2, which = c(1:4))
Potential better accounts contains following factors:
Col18 (Residential treatment > Detox > Inpatient Psych > Outpatient ) + Development and Prospect + Col5