Read Data / Map and Impute Missing
mydata=read.csv("output.csv")
missmap(mydata)

mynames=colnames(mydata)
#Check Columns for missing
a=rep(0,ncol(mydata))
for (i in 1:ncol(mydata)){
a[i]=1-sum(is.na(mydata[,i])==TRUE)/nrow(mydata)
}
names(a)=mynames
min(a)
## [1] 0.8897638
#Check Rows for missing
b=rep(0,nrow(mydata))
for (i in 1:nrow(mydata)){
b[i]=1-sum(is.na(mydata[i,])==TRUE)/ncol(mydata)
}
min(b)
## [1] 0.875
#Impute
mydata=as.data.frame(apply(mydata,2,function(x) {
if(is.numeric(x)) ifelse(is.na(x),median(x,na.rm=T),x) else x}))
colnames(mydata)=mynames
missmap(mydata)

10-Fold CV for Each Linear Model
Income
set.seed(1234)
# Set up repeated k-fold cross-validation
train.control <- trainControl(method = "cv", number = 10)
# Train the model
step.model <- train(newInc ~., data = mydata2[,-c(2:6)],
method = "leapSeq",
tuneGrid = data.frame(nvmax = 1:20),
trControl = train.control
)
step.model$results
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 0.06337933 0.05088840 0.02508772 0.01351051 0.02750524 0.004089769
## 2 2 0.06254159 0.07649263 0.02652763 0.01340660 0.03392418 0.003997721
## 3 3 0.06218674 0.09067744 0.02704355 0.01319934 0.03963595 0.003831080
## 4 4 0.06186542 0.10206007 0.02705059 0.01350053 0.04815626 0.003318599
## 5 5 0.06185193 0.10679781 0.02734402 0.01277887 0.07821420 0.003713093
## 6 6 0.06138884 0.11960984 0.02708057 0.01293413 0.07056798 0.003522467
## 7 7 0.06124917 0.12366546 0.02739086 0.01280936 0.07062142 0.003504298
## 8 8 0.06149263 0.11410057 0.02724758 0.01248966 0.07050069 0.003357073
## 9 9 0.06113702 0.11972154 0.02687074 0.01272441 0.08061468 0.003755214
## 10 10 0.06068929 0.13876630 0.02698294 0.01259605 0.07140145 0.003319137
## 11 11 0.06038730 0.14569580 0.02678205 0.01260708 0.07049279 0.003373226
## 12 12 0.06113337 0.13050370 0.02692345 0.01350216 0.06948921 0.003221270
## 13 13 0.06067845 0.13975979 0.02708276 0.01245987 0.06808505 0.003132562
## 14 14 0.06069373 0.13858708 0.02684828 0.01236375 0.06878853 0.003320041
## 15 15 0.06078112 0.13815382 0.02723188 0.01223712 0.06650399 0.002966820
## 16 16 0.06130175 0.12111546 0.02693621 0.01200045 0.07188331 0.003323332
## 17 17 0.06079103 0.13876457 0.02732045 0.01235390 0.06277989 0.003187588
## 18 18 0.06127276 0.12263323 0.02715091 0.01211386 0.07238264 0.003134451
## 19 19 0.06149454 0.11898218 0.02745652 0.01192686 0.06865479 0.003180263
## 20 20 0.06094786 0.13342239 0.02741879 0.01210056 0.06729249 0.003234711
step.model$bestTune
## nvmax
## 11 11
coef(step.model$finalModel, step.model$bestTune$nvmax)
## (Intercept) Beds newSM CMI
## 0.342043769 -0.005771568 0.026661254 0.450321545
## newPM `Beds x Rural` `Beds x Reg9` `newSM x Reg9`
## -0.015545662 0.004405962 0.014170739 -0.167438735
## `ALOS x Rural` `ALOS x Reg9` `newPM x Reg9` `WageIndex x Reg9`
## -0.041138495 -0.386075316 0.065567102 0.275822395
TPS
set.seed(1234)
# Set up repeated k-fold cross-validation
train.control <- trainControl(method = "cv", number = 10)
# Train the model
step.model <- train(TPS ~., data = mydata2[,-c(1, 3:6)],
method = "leapSeq",
tuneGrid = data.frame(nvmax = 1:20),
trControl = train.control
)
step.model$results
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 0.1049875 0.1588237 0.08393728 0.003982696 0.03555325 0.003739611
## 2 2 0.1044702 0.1686340 0.08322572 0.003888003 0.04289840 0.003844049
## 3 3 0.1037726 0.1799394 0.08257683 0.004013501 0.04668131 0.004121590
## 4 4 0.1028722 0.1949184 0.08157032 0.003807809 0.04925535 0.003907439
## 5 5 0.1022623 0.2035423 0.08108620 0.003671524 0.04577021 0.003651414
## 6 6 0.1016187 0.2137826 0.08064635 0.003964305 0.05248264 0.003526745
## 7 7 0.1005286 0.2305791 0.07973453 0.003864008 0.05390842 0.003768836
## 8 8 0.1004624 0.2317096 0.07966492 0.003918622 0.05357196 0.003818184
## 9 9 0.1006727 0.2285872 0.07989693 0.004007623 0.05502750 0.003802153
## 10 10 0.1006933 0.2278324 0.07978928 0.003866641 0.05451018 0.003675523
## 11 11 0.1005056 0.2313238 0.07972890 0.003954991 0.05562014 0.003790393
## 12 12 0.1008647 0.2254434 0.07995371 0.004046001 0.05439820 0.003954410
## 13 13 0.1007141 0.2276032 0.07982335 0.003813179 0.05222027 0.003822405
## 14 14 0.1005985 0.2292819 0.07972021 0.004080492 0.05572930 0.003836121
## 15 15 0.1007358 0.2275879 0.07990736 0.003718521 0.05202006 0.003690953
## 16 16 0.1006151 0.2294832 0.07979739 0.003685875 0.04988525 0.003611988
## 17 17 0.1000671 0.2372322 0.07927840 0.003715256 0.04482358 0.003685937
## 18 18 0.1004648 0.2319298 0.07965504 0.003768595 0.04980976 0.003683276
## 19 19 0.1005432 0.2310629 0.07975866 0.003875286 0.05035351 0.003713640
## 20 20 0.1004355 0.2326718 0.07980223 0.003632418 0.04687676 0.003613942
coef(step.model$finalModel, step.model$bestTune$nvmax)
## (Intercept) Beds newSM
## -0.418387053 0.007307671 0.091663288
## ALOS CMI newPM
## 0.482669757 0.488774050 -0.049040894
## WageIndex `Beds x Meditech` `Beds x Rural`
## -0.063990657 0.001092747 0.007631338
## `Beds x Reg3` `newSM x ForProfit` `newSM x Rural`
## 0.002642905 -0.027946812 -0.077922633
## `newSM x Reg9` `ALOS x Reg6` `newPM x Government`
## 0.050814508 0.354450897 -0.021466604
## `WageIndex x Reg6` `MCI x Epic` `OccRate x Reg5`
## -0.280453248 0.011872304 0.030826596
Experience
set.seed(1234)
# Set up repeated k-fold cross-validation
train.control <- trainControl(method = "cv", number = 10)
# Train the model
step.model <- train(newExp ~., data = mydata2[,-c(1:2,4:6)],
method = "leapSeq",
tuneGrid = data.frame(nvmax = 1:20),
trControl = train.control
)
step.model$results
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 0.02018444 0.1712626 0.01647977 0.0005127054 0.04288217 0.0005840029
## 2 2 0.01976529 0.2052349 0.01605004 0.0005508138 0.05213223 0.0004730902
## 3 3 0.01971466 0.2097451 0.01609437 0.0006048868 0.04262881 0.0005529930
## 4 4 0.01933531 0.2386309 0.01581346 0.0006850437 0.04729766 0.0007034550
## 5 5 0.01914721 0.2534828 0.01562834 0.0005781421 0.03372460 0.0005133457
## 6 6 0.01873984 0.2850548 0.01525457 0.0005765316 0.04347826 0.0005749302
## 7 7 0.01877641 0.2821126 0.01535509 0.0004944802 0.03974461 0.0004652419
## 8 8 0.01869113 0.2889294 0.01523926 0.0006113331 0.04389080 0.0005172970
## 9 9 0.01845900 0.3070218 0.01504761 0.0006023304 0.04292051 0.0005028743
## 10 10 0.01843801 0.3084245 0.01502899 0.0006452034 0.03993803 0.0005444998
## 11 11 0.01839990 0.3116028 0.01499386 0.0006471406 0.04110672 0.0005447250
## 12 12 0.01836105 0.3147364 0.01499998 0.0006278322 0.04064790 0.0005220325
## 13 13 0.01833622 0.3165657 0.01495287 0.0005789362 0.03801570 0.0005042727
## 14 14 0.01832275 0.3176548 0.01493798 0.0005943150 0.03797132 0.0004978840
## 15 15 0.01832475 0.3181169 0.01493779 0.0006359453 0.04023735 0.0005150009
## 16 16 0.01830755 0.3190170 0.01490814 0.0006435693 0.03865990 0.0005304033
## 17 17 0.01826973 0.3214542 0.01489155 0.0006513821 0.03751527 0.0005283820
## 18 18 0.01821644 0.3252658 0.01484081 0.0006819822 0.04048289 0.0005516012
## 19 19 0.01825776 0.3220651 0.01488852 0.0006203112 0.03730098 0.0005137048
## 20 20 0.01831175 0.3185673 0.01490143 0.0006366642 0.03867286 0.0005710548
coef(step.model$finalModel, step.model$bestTune$nvmax)
## (Intercept) Beds newSM
## 1.5046868410 -0.0018617217 -0.0205850649
## ALOS CMI newPM
## -0.0878971378 -0.3427621276 0.0193497270
## WageIndex `Beds x Teaching` `Beds x Reg9`
## -0.0155805048 -0.0008012096 0.0009901013
## `newSM x Reg2` `newSM x Reg6` `newSM x Reg8`
## 0.0101229751 0.0073256732 0.0137595717
## `ALOS x Epic` `newPM x Teaching` `MCI x Meditech`
## -0.0288076315 -0.0130804232 -0.0024135614
## `FacYears x Epic` `FacYears x Teaching` `OccRate x ForProfit`
## 0.0225230614 0.0140383289 0.0129826413
## `OccRate x Reg3`
## 0.0068731235
Clinical
set.seed(1234)
# Set up repeated k-fold cross-validation
train.control <- trainControl(method = "cv", number = 10)
# Train the model
step.model <- train(newClin ~., data = mydata2[,-c(1:3,5:6)],
method = "leapSeq",
tuneGrid = data.frame(nvmax = 1:20),
trControl = train.control
)
step.model$results
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 0.2008758 0.02616511 0.1635243 0.005500577 0.017329456 0.003959042
## 2 2 0.2009884 0.02476968 0.1637870 0.005456628 0.013863242 0.003750064
## 3 3 0.2001447 0.03268036 0.1628341 0.005185448 0.016496440 0.003517651
## 4 4 0.1998494 0.03574490 0.1626405 0.004992899 0.015855497 0.003436334
## 5 5 0.2007366 0.02966408 0.1631567 0.005855216 0.016123321 0.004066201
## 6 6 0.2004992 0.03150236 0.1629887 0.005674274 0.014877188 0.004015944
## 7 7 0.2005045 0.03132168 0.1629272 0.005067188 0.008150529 0.003424087
## 8 8 0.2013579 0.02643455 0.1636698 0.005109488 0.009964339 0.003822056
## 9 9 0.1999964 0.03679491 0.1626962 0.005048849 0.013142670 0.003833792
## 10 10 0.2006043 0.03198104 0.1629438 0.005257756 0.017120556 0.003406679
## 11 11 0.2001371 0.03675210 0.1626568 0.005040284 0.016985619 0.003295027
## 12 12 0.2008149 0.03074924 0.1631446 0.005480125 0.015143833 0.003877588
## 13 13 0.2004002 0.03629325 0.1627043 0.005246304 0.020500867 0.003666473
## 14 14 0.1999211 0.04101404 0.1623026 0.005355077 0.020653788 0.003517661
## 15 15 0.1992772 0.04374448 0.1618034 0.004911238 0.015831531 0.003658389
## 16 16 0.1999777 0.03956874 0.1624418 0.005427388 0.015879184 0.003701248
## 17 17 0.1994765 0.04329568 0.1618934 0.005192588 0.017106923 0.004057182
## 18 18 0.1997738 0.04374339 0.1621731 0.004981008 0.019987326 0.003900872
## 19 19 0.1996767 0.04350515 0.1624131 0.005137622 0.021451426 0.004209486
## 20 20 0.1996012 0.04600585 0.1622475 0.005231439 0.021493128 0.004221769
coef(step.model$finalModel, step.model$bestTune$nvmax)
## (Intercept) Beds newPM
## 0.692225874 0.006299859 -0.055797931
## `Beds x Reg5` `Beds x Reg8` `newSM x Cerner`
## 0.004615025 -0.003563778 0.052325831
## `newSM x Meditech` `ALOS x ForProfit` `CMI x Meditech`
## -0.167690759 0.505760127 0.157672079
## `newPM x Government` `newPM x Reg4` `MCI x Epic`
## -0.043115738 0.039003599 0.119443284
## `FacYears x ForProfit` `OccRate x Epic` `OccRate x System`
## -0.410706879 -0.153512291 0.043904739
## `OccRate x Reg3`
## 0.071129445
Efficiency
set.seed(1234)
# Set up repeated k-fold cross-validation
train.control <- trainControl(method = "cv", number = 10)
# Train the model
step.model <- train(newEff ~., data = mydata2[,-c(1:4,6)],
method = "leapSeq",
tuneGrid = data.frame(nvmax = 1:20),
trControl = train.control
)
step.model$results
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 0.2896881 0.1461183 0.2518221 0.004597757 0.03985004 0.004190876
## 2 2 0.2791169 0.2089165 0.2395452 0.007483415 0.04614344 0.005798035
## 3 3 0.2758219 0.2257484 0.2362237 0.008238920 0.03699060 0.005755572
## 4 4 0.2787384 0.2071226 0.2387260 0.006733130 0.05908776 0.006666165
## 5 5 0.2710157 0.2523813 0.2313515 0.008125217 0.04112329 0.005786787
## 6 6 0.2681152 0.2684379 0.2285577 0.007646833 0.03391608 0.005736668
## 7 7 0.2644257 0.2879875 0.2243439 0.008120642 0.04431906 0.006252417
## 8 8 0.2623097 0.2994571 0.2217370 0.007017347 0.03712658 0.005462761
## 9 9 0.2637761 0.2908695 0.2231125 0.007020246 0.04605848 0.005929322
## 10 10 0.2621553 0.3010400 0.2210045 0.007322946 0.04129590 0.004980015
## 11 11 0.2611356 0.3056395 0.2196128 0.008120073 0.04543594 0.005982147
## 12 12 0.2599665 0.3127366 0.2181540 0.008347903 0.04274689 0.005946791
## 13 13 0.2608161 0.3076404 0.2189445 0.006952045 0.04329522 0.005302009
## 14 14 0.2586713 0.3193464 0.2167223 0.006516284 0.03462023 0.005493978
## 15 15 0.2602066 0.3115068 0.2185924 0.008125777 0.03769146 0.007228833
## 16 16 0.2589276 0.3178559 0.2172832 0.008155533 0.03326989 0.008175711
## 17 17 0.2570889 0.3285036 0.2152999 0.006543176 0.03166285 0.004780642
## 18 18 0.2577514 0.3236899 0.2170187 0.007177841 0.04458697 0.006382691
## 19 19 0.2573275 0.3269796 0.2162180 0.007655942 0.03807273 0.006235361
## 20 20 0.2591979 0.3166704 0.2181587 0.007721064 0.03840690 0.007097640
coef(step.model$finalModel, step.model$bestTune$nvmax)
## (Intercept) Beds newSM
## 2.805720633 -0.011980125 -0.260982612
## ALOS newPM MCI
## -1.032551033 0.160350286 0.184034621
## `Beds x Reg3` `Beds x Reg6` `Beds x Reg8`
## -0.026091801 -0.014801077 -0.009382342
## `newSM x Reg3` `newSM x Reg9` `newPM x Cerner`
## 0.190703529 -0.465441186 -0.062073946
## `newPM x Reg2` `FacYears x Reg2` `OccRate x Epic`
## -0.270028926 0.139925023 -0.083224043
## `OccRate x Rural` `OccRate x Community` `OccRate x Reg7`
## -0.088878214 -0.115834318 0.138361167
Safety
set.seed(1234)
# Set up repeated k-fold cross-validation
train.control <- trainControl(method = "cv", number = 10)
# Train the model
step.model <- train(newSaf ~., data = mydata2[,-c(1:5)],
method = "leapSeq",
tuneGrid = data.frame(nvmax = 1:20),
trControl = train.control
)
step.model$results
## nvmax RMSE Rsquared MAE RMSESD RsquaredSD MAESD
## 1 1 0.08775669 0.2226421 0.07067807 0.003532428 0.03405369 0.003069550
## 2 2 0.08696297 0.2369158 0.07017783 0.003667345 0.03747332 0.003228776
## 3 3 0.08673580 0.2407750 0.06995282 0.003667016 0.03602798 0.002952153
## 4 4 0.08650140 0.2450084 0.06970781 0.003744367 0.03779273 0.002870363
## 5 5 0.08623889 0.2496304 0.06944704 0.003816576 0.03938725 0.002861463
## 6 6 0.08646313 0.2458932 0.06952543 0.003809662 0.04064248 0.002756662
## 7 7 0.08652457 0.2447902 0.06960816 0.003736983 0.03928872 0.002686125
## 8 8 0.08670762 0.2419512 0.06978644 0.003899450 0.03996981 0.002652550
## 9 9 0.08671010 0.2417094 0.06964333 0.003782482 0.03759326 0.002626920
## 10 10 0.08652062 0.2448073 0.06959240 0.003670674 0.03657175 0.002630034
## 11 11 0.08658466 0.2440191 0.06951006 0.003813752 0.04102740 0.002570868
## 12 12 0.08639254 0.2474105 0.06930046 0.004079982 0.04341994 0.002949906
## 13 13 0.08637376 0.2473713 0.06943799 0.003570458 0.03594095 0.002414970
## 14 14 0.08660754 0.2434167 0.06954976 0.003680074 0.03625526 0.002620454
## 15 15 0.08662135 0.2432990 0.06972208 0.003767579 0.03703409 0.002638549
## 16 16 0.08654742 0.2449088 0.06957657 0.003773152 0.03594186 0.002430941
## 17 17 0.08649161 0.2456478 0.06946083 0.003769825 0.03471866 0.002414587
## 18 18 0.08642627 0.2468937 0.06940148 0.003801986 0.03561557 0.002636054
## 19 19 0.08667860 0.2435339 0.06960308 0.003935021 0.04137298 0.002698640
## 20 20 0.08636362 0.2483559 0.06937176 0.003889155 0.03954342 0.002716226
coef(step.model$finalModel, step.model$bestTune$nvmax)
## (Intercept) Beds ALOS CMI
## 1.236123911 0.007098488 0.363654562 -0.893612308
## OccRate `WageIndex x Rural`
## -0.045735686 0.011917750