Read Data and Convert Strings to Factors

setwd("C:/users/lfult/Desktop/CAHME")
mydata=read.csv("cleanedupdate.csv", fileEncoding="UTF-8-BOM")

for (i in 7:14){mydata[,i]=as.factor(mydata[,i])}

for (i in 15:17){
mydata[,i]=as.factor(mydata[,i])
mydata[,i]=factor(mydata[,i],levels(mydata[,i])[c(1,3,4,2,5)])
}

Basic Missing Analysis

missmap(mydata)

Basic Descriptives

library(psych)
describe(mydata[, c(6:34)])
##              vars   n       mean         sd    median   trimmed       mad
## TPS             1 199      36.06       8.61     35.50     35.76      8.27
## CEO_CAHME*      2 199       1.77       0.92      1.00      1.72      0.00
## COO_CAHME*      3 199       1.93       0.67      2.00      1.92      0.00
## CFO_CAHME*      4 199       1.45       0.62      1.00      1.35      0.00
## CEO_FACHE*      5 199       1.87       0.96      1.00      1.84      0.00
## COO_FACHE*      6 199       1.76       0.64      2.00      1.71      0.00
## CFO_FACHE*      7 199       1.38       0.62      1.00      1.27      0.00
## Any_CAHME*      8 199       1.49       0.50      1.00      1.49      0.00
## Any_FACHE*      9 199       1.50       0.50      2.00      1.50      0.00
## CEO_Tenure*    10 199       2.04       1.34      1.00      1.83      0.00
## COO_Tenure*    11 199       3.56       1.74      5.00      3.69      0.00
## CFO_Tenure*    12 199       2.94       1.68      3.00      2.93      2.97
## NumCAHME       13 199       0.59       0.67      0.00      0.50      0.00
## NumFACHE       14 199       0.58       0.64      1.00      0.50      1.48
## Any            15 199       0.76       0.43      1.00      0.83      0.00
## Pop2020        16 199 1196559.63 1493919.49 423163.00 919768.66 584105.85
## Native         17 199       0.00       0.00      0.00      0.00      0.00
## Hispanic       18 199       0.38       0.22      0.34      0.34      0.15
## Black          19 199       0.11       0.07      0.09      0.11      0.09
## Prop65         20 199       0.13       0.04      0.12      0.13      0.02
## PopDensity     21 199   22299.12   38486.13   5453.71  13412.09   6696.45
## UE2019         22 199       3.57       0.94      3.30      3.41      0.59
## HH2018         23 199   59678.80   15568.71  59838.00  58018.46  14418.28
## AdultObesity   24 199      31.12       4.64     30.00     30.82      3.41
## Cancer         25 199       7.58       0.92      7.70      7.67      1.00
## COPD           26 199      11.45       2.63     10.76     11.22      2.44
## Diabetes       27 199      29.54       4.85     28.68     28.86      2.48
## HeartFail      28 199      15.86       2.87     15.59     15.67      1.96
## CMI            29 199       1.74       0.31      1.82      1.76      0.29
##                   min        max      range  skew kurtosis        se
## TPS             15.50      66.67      51.17  0.39     0.10      0.61
## CEO_CAHME*       1.00       3.00       2.00  0.46    -1.66      0.07
## COO_CAHME*       1.00       3.00       2.00  0.07    -0.77      0.05
## CFO_CAHME*       1.00       3.00       2.00  1.04     0.00      0.04
## CEO_FACHE*       1.00       3.00       2.00  0.26    -1.87      0.07
## COO_FACHE*       1.00       3.00       2.00  0.24    -0.68      0.05
## CFO_FACHE*       1.00       3.00       2.00  1.36     0.71      0.04
## Any_CAHME*       1.00       2.00       1.00  0.03    -2.01      0.04
## Any_FACHE*       1.00       2.00       1.00 -0.01    -2.01      0.04
## CEO_Tenure*      1.00       5.00       4.00  1.01    -0.40      0.10
## COO_Tenure*      1.00       5.00       4.00 -0.53    -1.56      0.12
## CFO_Tenure*      1.00       5.00       4.00  0.06    -1.69      0.12
## NumCAHME         0.00       2.00       2.00  0.68    -0.64      0.05
## NumFACHE         0.00       2.00       2.00  0.62    -0.61      0.05
## Any              0.00       1.00       1.00 -1.23    -0.48      0.03
## Pop2020       7306.00 4713325.00 4706019.00  1.34     0.62 105901.14
## Native           0.00       0.01       0.01  2.39     7.07      0.00
## Hispanic         0.07       0.99       0.92  1.20     0.85      0.02
## Black            0.00       0.34       0.34  0.33    -0.80      0.01
## Prop65           0.10       0.30       0.21  1.82     4.15      0.00
## PopDensity     856.99  246914.13  246057.14  2.95    10.33   2728.21
## UE2019           2.10       9.80       7.70  2.70    11.10      0.07
## HH2018       30490.00  102858.00   72368.00  0.91     0.21   1103.64
## AdultObesity    21.80      47.30      25.50  0.79     0.91      0.33
## Cancer           4.38       9.09       4.71 -0.85     0.44      0.07
## COPD             7.29      18.89      11.60  0.78     0.04      0.19
## Diabetes        19.65      47.15      27.51  1.49     2.56      0.34
## HeartFail       10.35      27.96      17.62  0.81     1.78      0.20
## CMI              0.98       2.23       1.25 -0.59    -0.54      0.02

Stacked Bar Plots for FACHE / CAHME by Leader

a=table(mydata$CEO_CAHME)/length(mydata$CEO_CAHME)
for (i in 8:12){ a=c(a,table(mydata[,i])/length(mydata[,i]))}
Proportion=as.vector(a)
Level=rep(c("No", "Unknown","Yes"),6)
Question=c(rep(c("CEO CAHME"),3), rep(c("COO CAHME"),3),
    rep(c("CFO CAHME"),3), rep(c("CEO FACHE"),3),
    rep(c("COO FACHE"),3), rep(c("CFO FACHE"),3))
mydf=data.frame(Proportion, Level, Question)

mydf$Proportion=round(mydf$Proportion,2)

ggplot(mydf, aes(fill=Level, y=Proportion, x=Question, label=Proportion))+
    geom_bar(position="stack", stat="identity")+
    geom_text(size = 3, position = position_stack(vjust = 0.5))

  #plot(myt, col=terrain.colors(5), las=3, main=myname)

Stacked Bar Plots for Tenure by Leader

a=table(mydata$CEO_Tenure)/length(mydata$CEO_Tenure)
for (i in 16:17){ a=c(a,table(mydata[,i])/length(mydata[,i]))}
Proportion=as.vector(a)
Level=rep(c("0 to 3 Y", "4 to 6 Y","7 to 9 Y", "10+ Y", "Unknown"),3)
Question=c(rep(c("CEO Tenure"),5), rep(c("COO Tenure"),5),
    rep(c("CFO Tenure"),5))
mydf=data.frame(Proportion, Level, Question)
mydf$Proportion=round(mydf$Proportion, 2)


ggplot(mydf, aes(fill=Level, y=Proportion, x=Question, label=Proportion))+
    geom_bar(position="stack", stat="identity")+
    scale_fill_brewer(palette="Dark2")+
    geom_text(size = 3, position = position_stack(vjust = 0.5))

  #plot(myt, col=terrain.colors(5), las=3, main=myname)

a
##     0 to 3     4 to 6     7 to 9 10 or more    Unknown     0 to 3     4 to 6 
## 0.52261307 0.20100503 0.07035176 0.13065327 0.07537688 0.23618090 0.12562814 
##     7 to 9 10 or more    Unknown     0 to 3     4 to 6     7 to 9 10 or more 
## 0.03015075 0.06030151 0.54773869 0.33165829 0.14070352 0.08542714 0.14070352 
##    Unknown 
## 0.30150754

Pairwise Boxplots of TPS / PM by Any FACHE or CAHME

par(mfrow=c(2,2))
boxplot(mydata$ProfitMargin~mydata$Any_FACHE, horizontal=TRUE, ylab="Any FACHE Leader?", xlab="Profit Margin", main="Profit Margin without Extreme Outliers", ylim=c(-.5,.5), notch=TRUE, col=c("red", "blue", "green"))
boxplot(mydata$ProfitMargin~mydata$Any_CAHME, horizontal=TRUE, ylab="CAHME Program?", xlab="Profit Margin", main="Profit Margin without Extreme Outliers", ylim=c(-.5,.5), notch=TRUE, col=c("red", "blue", "green"))


boxplot(mydata$TPS~mydata$Any_FACHE, horizontal=TRUE, ylab="Any FACHE Leader?", xlab="TPS", main="Total Performance Score (TPS)", notch=TRUE, col=c("red", "blue", "green"))
boxplot(mydata$TPS~mydata$Any_CAHME, horizontal=TRUE, ylab="CAHME Program?", xlab="TPS", main="Total Performance Score (TPS)", notch=TRUE, col=c("red", "blue", "green"))

Pairwise Boxplots of TPS / PM by Number FACHE or CAHME

par(mfrow=c(2,2))
boxplot(mydata$ProfitMargin~mydata$NumFACHE, horizontal=TRUE, ylab="Number of FACHE Leaders", xlab="Profit Margin", main="Profit Margin without Extreme Outliers", ylim=c(-.5,.5), notch=TRUE, col=c("red", "blue", "green"))
boxplot(mydata$ProfitMargin~mydata$NumCAHME, horizontal=TRUE, ylab="CAHME Program?", xlab="Profit Margin", main="Profit Margin without Extreme Outliers", ylim=c(-.5,.5), notch=TRUE, col=c("red", "blue", "green"))
boxplot(mydata$TPS~mydata$NumFACHE, horizontal=TRUE, ylab="Number of FACHE Leaders", xlab="TPS", main="Total Performance Score (TPS)", notch=TRUE, col=c("red", "blue", "green"))
boxplot(mydata$TPS~mydata$NumCAHME, horizontal=TRUE, ylab="CAHME Program?", xlab="TPS", main="Total Performance Score (TPS)", notch=TRUE, col=c("red", "blue", "green"))
## Warning in bxp(list(stats = structure(c(15.5, 31.67, 36.04, 43.33, 56.67, : some
## notches went outside hinges ('box'): maybe set notch=FALSE

# Pairwise Boxplots of TPS / PM by Any Program / Individual Certification

par(mfrow=c(2,1))
boxplot(mydata$ProfitMargin~mydata$Any, horizontal=TRUE, ylab="Any Program/Personal Cert?", xlab="Profit Margin", main="Profit Margin without Extreme Outliers", ylim=c(-.5,.5), notch=TRUE, col=c("red", "blue", "green"))


boxplot(mydata$TPS~mydata$Any, horizontal=TRUE, ylab="Any Program/Personal Cert?", xlab="TPS", main="Total Performance Score (TPS)", notch=TRUE, col=c("red", "blue", "green"))
## Warning in bxp(list(stats = structure(c(20, 35.375, 37.75, 46.23, 56.67, : some
## notches went outside hinges ('box'): maybe set notch=FALSE

# Normality test for PM and TPS (minus outliers)

shapiro.test(mydata[-121,6])
## 
##  Shapiro-Wilk normality test
## 
## data:  mydata[-121, 6]
## W = 0.98737, p-value = 0.07526
shapiro.test(mydata[-100,6])
## 
##  Shapiro-Wilk normality test
## 
## data:  mydata[-100, 6]
## W = 0.98627, p-value = 0.05161

Data Prep 1

set.seed(1234)
mydata1=mydata[,-c(1:4,6)] #ProfitMargin
mydata2=mydata[,-c(1:5)] #TPS
mydata1=dummy_cols(mydata1)
mydata2=dummy_cols(mydata2)
mydata1$CEO_CAHME_Unknown=mydata1$CEO_FACHE_Unknown=
  mydata1$COO_CAHME_Unknown=mydata1$COO_FACHE_Unknown=
  mydata1$CFO_CAHME_Unknown=mydata1$CFO_FACHE_Unknown=
  mydata1$CEO_Tenure_Unknown=mydata1$COO_Tenure_Unknown=
  mydata1$CFO_Tenure_Unknown=NULL

mydata2$CEO_CAHME_Unknown=mydata2$CEO_FACHE_Unknown=
  mydata2$COO_CAHME_Unknown=mydata2$COO_FACHE_Unknown=
  mydata2$CFO_CAHME_Unknown=mydata2$CFO_FACHE_Unknown=
  mydata2$CEO_Tenure_Unknown=mydata2$COO_Tenure_Unknown=
  mydata2$CFO_Tenure_Unknown=NULL

Data Prep 2

mytrain1=model.matrix(ProfitMargin~., mydata1)[,-1]
mytrain2=model.matrix(TPS~., mydata2)[,-1]
mytrain1=mytrain1[, -c(44:72)]
mytrain2=mytrain2[, -c(44:72)]

for (i in 27:43){
  
  mytrain1[,i]=as.numeric(scale(mytrain1[,i]))
  mytrain2[,i]=as.numeric(scale(mytrain2[,i]))
  }

y1=as.numeric(scale(mydata1$ProfitMargin))
y2=as.numeric(scale(mydata2$TPS))

Model 1 through Model 3, Profit Margin (EN, Lasso, Ridge)

#Elastic Net

model1 <- train(
  x=mytrain1, y=y1,
  method = "glmnet",
  trControl = trainControl("LOOCV"),
  tuneLength = 10
)

lambda <- 10^seq(-3, 3, length = 100)

#Lasso
model2<- train(
  x=mytrain1, y=y1, method = "glmnet",
  trControl = trainControl("LOOCV"),
  tuneGrid = expand.grid(alpha = 1, lambda = lambda)
  )

#Ridge
model3 <- train(
  x=mytrain1, y=y1,method = "glmnet",
  trControl = trainControl("LOOCV"),
  tuneGrid = expand.grid(alpha = 0, lambda = lambda)
  )


# Make predictions on the test data
predictions1 =predict(model1, mytrain1)
predictions2 <- predict(model2, mytrain1)
predictions3 <- predict(model3, mytrain1)
COEF1=coef(model1$finalModel,model1$bestTune$lambda)
COEF2=coef(model2$finalModel,model2$bestTune$lambda)
COEF3=coef(model3$finalModel,model3$bestTune$lambda)


EN1=c(RMSE(predictions1,y1), 1-(1-R2(predictions1,y1))*
        (length(y1)-1)/(length(y1)-sum(COEF1@p)-1))
LASSO1=c(RMSE(predictions2,y1), 1-(1-R2(predictions2,y1))*
        (length(y1)-1)/(length(y1)-sum(COEF2@p)-1))
## Warning in cor(obs, pred, use = ifelse(na.rm, "complete.obs", "everything")):
## the standard deviation is zero
RIDGE1=c(RMSE(predictions3,y1), 1-(1-R2(predictions3,y1))*
        (length(y1)-1)/(length(y1)-sum(COEF3@p)-1))

FIN1=rbind(EN1,LASSO1, RIDGE1)
rownames(FIN1)=c("EN1", "LASSO1", "RIDGE1")
colnames(FIN1)=c("RMSE", "R2")
FIN1
##             RMSE          R2
## EN1    0.9945111  0.01380349
## LASSO1 0.9974843          NA
## RIDGE1 0.9913740 -0.18741613
mydf=cbind(COEF1, COEF2, round(COEF3,2))
mydf
## 44 x 3 sparse Matrix of class "dgCMatrix"
##                                 1            1     1
## (Intercept)           0.006309159 1.029992e-17  0.01
## CEO_CAHMEUnknown      .           .             0.00
## CEO_CAHMEYes          .           .             0.00
## COO_CAHMEUnknown      .           .             0.00
## COO_CAHMEYes          .           .             0.00
## CFO_CAHMEUnknown      .           .             0.01
## CFO_CAHMEYes          .           .             0.00
## CEO_FACHEUnknown      .           .             0.00
## CEO_FACHEYes          .           .            -0.01
## COO_FACHEUnknown      .           .             0.00
## COO_FACHEYes          .           .             0.00
## CFO_FACHEUnknown      .           .             0.01
## CFO_FACHEYes          .           .             0.01
## Any_CAHMEYes          .           .             0.00
## Any_FACHEYes          .           .            -0.01
## CEO_Tenure4 to 6      .           .             0.00
## CEO_Tenure7 to 9      .           .             0.01
## CEO_Tenure10 or more  .           .             0.00
## CEO_TenureUnknown     .           .             0.00
## COO_Tenure4 to 6      .           .             0.00
## COO_Tenure7 to 9      .           .             0.01
## COO_Tenure10 or more  .           .             0.00
## COO_TenureUnknown     .           .             0.00
## CFO_Tenure4 to 6      .           .             0.00
## CFO_Tenure7 to 9     -0.073854270 .            -0.03
## CFO_Tenure10 or more  .           .            -0.01
## CFO_TenureUnknown     .           .             0.01
## NumCAHME              .           .             0.00
## NumFACHE              .           .             0.00
## Any                   .           .             0.00
## Pop2020               .           .             0.00
## Native                .           .             0.00
## Hispanic              .           .             0.00
## Black                 .           .             0.00
## Prop65                .           .             0.00
## PopDensity            .           .             0.00
## UE2019                .           .             0.00
## HH2018                .           .             0.00
## AdultObesity          .           .             0.00
## Cancer                .           .             0.00
## COPD                  .           .             0.00
## Diabetes              .           .             0.00
## HeartFail             .           .             0.00
## CMI                   .           .             0.00
#Elastic

Model 1 Final

No model other than the mean is appropriate for profit margin. To verify marginal effects of zero, we use the following model.

attach(as.data.frame(mytrain1))
mylmf=lm(y1~`CFO_Tenure7 to 9`)
summary(mylmf)
## 
## Call:
## lm(formula = y1 ~ `CFO_Tenure7 to 9`)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.0012  -0.0367   0.1073   0.2217   5.3862 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)  
## (Intercept)         0.04700    0.07342    0.64   0.5229  
## `CFO_Tenure7 to 9` -0.55013    0.25121   -2.19   0.0297 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9906 on 197 degrees of freedom
## Multiple R-squared:  0.02377,    Adjusted R-squared:  0.01881 
## F-statistic: 4.796 on 1 and 197 DF,  p-value: 0.02971

Model 1a through Model 3a, TPS (EN, Lasso, Ridge)

#Elastic Net
model1a <- train(
  x=mytrain2, y=y2,
  method = "glmnet",
  trControl = trainControl("LOOCV"),
  tuneLength = 10
)

lambda <- 10^seq(-3, 3, length = 100)

#Lasso
model2a <- train(
  x=mytrain2, y=y2,method = "glmnet",
  trControl = trainControl("LOOCV"),
  tuneGrid = expand.grid(alpha = 1, lambda = lambda)
  )

#Ridge
model3a <- train(
  x=mytrain2, y=y2,method = "glmnet",
  trControl = trainControl("LOOCV"),
  tuneGrid = expand.grid(alpha = 0, lambda = lambda)
  )


# Make predictions on the test data
predictions1a =predict(model1a, mytrain2)
predictions2a <- predict(model2a, mytrain2)
predictions3a <- predict(model3a, mytrain2)
COEF1a=coef(model1a$finalModel,model1a$bestTune$lambda)
COEF2a=coef(model2a$finalModel,model2a$bestTune$lambda)
COEF3a=coef(model3a$finalModel,model3a$bestTune$lambda)


EN1a=c(RMSE(predictions1a,y2), 1-(1-R2(predictions1a,y2))*
          (length(y2)-1)/(length(y2)-sum(COEF1a@p)-1))
LASSO1a=c(RMSE(predictions2a,y2), 1-(1-R2(predictions2a,y2))*
          (length(y2)-1)/(length(y2)-sum(COEF2a@p)-1))
RIDGE1a=c(RMSE(predictions3a,y2), 1-(1-R2(predictions3a,y2))*
          (length(y2)-1)/(length(y2)-sum(COEF3a@p)-1))

FIN1a=rbind(EN1a,LASSO1a, RIDGE1a)
rownames(FIN1a)=c("EN1", "LASSO1", "RIDGE1")
colnames(FIN1a)=c("RMSE", "R2")
FIN1a
##             RMSE          R2
## EN1    0.9430260  0.09789687
## LASSO1 0.9414182  0.09480411
## RIDGE1 0.9623162 -0.11672857
mydfa=cbind(COEF1a, COEF2a, round(COEF3a,2))
mydfa
## 44 x 3 sparse Matrix of class "dgCMatrix"
##                                 1            1     1
## (Intercept)          -0.027888928 -0.022990539 -0.04
## CEO_CAHMEUnknown      .            .            0.02
## CEO_CAHMEYes          .            .            0.00
## COO_CAHMEUnknown      0.035224206  0.027759940  0.03
## COO_CAHMEYes          .            .           -0.01
## CFO_CAHMEUnknown      .            .            0.01
## CFO_CAHMEYes         -0.133795814 -0.126722896 -0.07
## CEO_FACHEUnknown      .            .            0.02
## CEO_FACHEYes          .            .           -0.01
## COO_FACHEUnknown      0.030913458  0.030251396  0.03
## COO_FACHEYes          .            .           -0.01
## CFO_FACHEUnknown      .            .            0.01
## CFO_FACHEYes          .            .            0.02
## Any_CAHMEYes          .            .           -0.02
## Any_FACHEYes          .            .           -0.01
## CEO_Tenure4 to 6      .            .           -0.02
## CEO_Tenure7 to 9      .            .           -0.02
## CEO_Tenure10 or more  0.008066225  0.001095907  0.03
## CEO_TenureUnknown     .            .            0.01
## COO_Tenure4 to 6      .            .            0.02
## COO_Tenure7 to 9      .            .            0.02
## COO_Tenure10 or more  .            .           -0.02
## COO_TenureUnknown     .            .            0.03
## CFO_Tenure4 to 6      .            .            0.02
## CFO_Tenure7 to 9      .            .           -0.01
## CFO_Tenure10 or more  .            .            0.03
## CFO_TenureUnknown     .            .            0.00
## NumCAHME              .            .           -0.01
## NumFACHE              .            .            0.00
## Any                  -0.121750416 -0.135233701 -0.03
## Pop2020               .            .           -0.01
## Native               -0.053322125 -0.058530511 -0.02
## Hispanic              .            .            0.01
## Black                -0.064430002 -0.068853726 -0.02
## Prop65                .            .            0.01
## PopDensity            0.044014880  0.044550414  0.02
## UE2019                0.031931189  0.032663249  0.01
## HH2018                .            .            0.00
## AdultObesity          .            .           -0.01
## Cancer                .            .            0.00
## COPD                  .            .            0.00
## Diabetes              .            .            0.01
## HeartFail             .            .           -0.01
## CMI                   .            .           -0.01

Final Model

attach(as.data.frame(mytrain2))
## The following objects are masked from as.data.frame(mytrain1):
## 
##     AdultObesity, Any, Any_CAHMEYes, Any_FACHEYes, Black, Cancer,
##     CEO_CAHMEUnknown, CEO_CAHMEYes, CEO_FACHEUnknown, CEO_FACHEYes,
##     CEO_Tenure10 or more, CEO_Tenure4 to 6, CEO_Tenure7 to 9,
##     CEO_TenureUnknown, CFO_CAHMEUnknown, CFO_CAHMEYes,
##     CFO_FACHEUnknown, CFO_FACHEYes, CFO_Tenure10 or more, CFO_Tenure4
##     to 6, CFO_Tenure7 to 9, CFO_TenureUnknown, CMI, COO_CAHMEUnknown,
##     COO_CAHMEYes, COO_FACHEUnknown, COO_FACHEYes, COO_Tenure10 or more,
##     COO_Tenure4 to 6, COO_Tenure7 to 9, COO_TenureUnknown, COPD,
##     Diabetes, HeartFail, HH2018, Hispanic, Native, NumCAHME, NumFACHE,
##     Pop2020, PopDensity, Prop65, UE2019
mylmTPS=lm(y2~COO_CAHMEUnknown+CFO_CAHMEYes+COO_FACHEUnknown+
          `CEO_Tenure10 or more`+Any+Native+Black+PopDensity+UE2019)
summary(mylmTPS)
## 
## Call:
## lm(formula = y2 ~ COO_CAHMEUnknown + CFO_CAHMEYes + COO_FACHEUnknown + 
##     `CEO_Tenure10 or more` + Any + Native + Black + PopDensity + 
##     UE2019)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.11035 -0.60931 -0.04564  0.57065  2.58824 
## 
## Coefficients:
##                         Estimate Std. Error t value Pr(>|t|)   
## (Intercept)            -0.119059   0.105813  -1.125  0.26194   
## COO_CAHMEUnknown        0.007217   0.347099   0.021  0.98343   
## CFO_CAHMEYes           -0.345939   0.269505  -1.284  0.20085   
## COO_FACHEUnknown        0.180941   0.343791   0.526  0.59929   
## `CEO_Tenure10 or more`  0.315399   0.199701   1.579  0.11593   
## Any                    -0.189294   0.070174  -2.698  0.00762 **
## Native                 -0.157821   0.069255  -2.279  0.02379 * 
## Black                  -0.135142   0.070578  -1.915  0.05703 . 
## PopDensity              0.114610   0.073384   1.562  0.12001   
## UE2019                  0.120804   0.067807   1.782  0.07642 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.937 on 189 degrees of freedom
## Multiple R-squared:  0.162,  Adjusted R-squared:  0.122 
## F-statistic: 4.058 on 9 and 189 DF,  p-value: 8.934e-05

Final Model for TPS

myfinallm=lm(y2 ~ Any+Native+PopDensity)
summary(myfinallm)
## 
## Call:
## lm(formula = y2 ~ Any + Native + PopDensity)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.1655 -0.6785 -0.0804  0.6691  2.8133 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -2.780e-16  6.755e-02   0.000  1.00000   
## Any         -2.250e-01  6.874e-02  -3.273  0.00126 **
## Native      -1.725e-01  6.876e-02  -2.509  0.01293 * 
## PopDensity   1.631e-01  6.967e-02   2.341  0.02025 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9529 on 195 degrees of freedom
## Multiple R-squared:  0.1057, Adjusted R-squared:  0.09193 
## F-statistic: 7.682 on 3 and 195 DF,  p-value: 7.054e-05
hist(residuals(myfinallm), main="Residuals, TPS Final Model", xlab="Residuals, TPS Final Model")

shapiro.test(residuals(myfinallm))
## 
##  Shapiro-Wilk normality test
## 
## data:  residuals(myfinallm)
## W = 0.99098, p-value = 0.2503