Several Packages and Plots

pre

library("ranger")
library(data.table)
setwd("C:/Users/subas/Syncplicity/MyProjects_IMP/MY_Papers_V2/TRB 2021/EScotter_BayesianRule/")
it01 <- fread("IT_aadtMaster.csv")

mn= subset(it01, State=="MN")
dim(mn)
## [1] 11498    86
mn01 <- mn[, c("FC_RU", "Default_AADT", "HU", "Pop" , "WAC", "RAC", "Agg_Inc", "Agg_Veh", "Empl" )]
mn02= na.omit(mn01)
mn02$FC_RU= as.factor(mn02$FC_RU)

library(flexCWM)
attach(mn02)
fit1 <- cwm(Default_AADT~ HU+ Pop+ WAC+ RAC+Empl, data=mn02,
            familyY = list(gaussian), k = 1:3,
            initialization = "kmeans")
## 
## Estimating model with k=1, familyY=gaussian *
## Estimating model with k=2, familyY=gaussian ***
## Estimating model with k=3, familyY=gaussian *******************************************************************************************************************************************************************************
## 
## Best model according to AIC, AICc, AICu, AIC3, AWE, BIC, CAIC, ICL is obtained with k = 3 group(s) and family gaussian(identity)
summary(fit1, criterion = "BIC", concomitant = TRUE)
## ----------------------------------
## Best fitted model according to BIC 
## ----------------------------------
##  loglikelihood     n df     BIC
##         -58165 10780 23 -116543
## 
## Clustering table:
##    1    2    3 
## 7061   44 3675 
## 
## Prior: comp.1 = 0.6545442, comp.2 = 0.0066591, comp.3 = 0.3387967
## 
## Distribution used for GLM: gaussian(identity). Parameters:
## 
## Component 1
##                Estimate  Std. Error  t value  Pr(>|t|)    
## (Intercept) 52.59072847  0.35624782 147.6240 < 2.2e-16 ***
## HU          -0.01344872  0.00074439 -18.0668 < 2.2e-16 ***
## Pop         -0.00937205  0.00145266  -6.4516 1.154e-10 ***
## WAC         -0.00240664  0.00039640  -6.0712 1.312e-09 ***
## RAC          0.03039062  0.00196542  15.4627 < 2.2e-16 ***
## Empl         0.00779145  0.00237323   3.2831   0.00103 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## sigma = 15.366
## 
## Component 2
##                Estimate  Std. Error   t value  Pr(>|t|)    
## (Intercept) 229.6335016   1.3040939  176.0866 < 2.2e-16 ***
## HU           -0.3037524   0.0017730 -171.3215 < 2.2e-16 ***
## Pop           0.3658807   0.0041641   87.8651 < 2.2e-16 ***
## WAC           0.3901486   0.0014537  268.3883 < 2.2e-16 ***
## RAC           0.0126317   0.0039069    3.2332  0.001228 ** 
## Empl         -0.4008450   0.0065391  -61.2994 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## sigma = 57.899
## 
## Component 3
##                Estimate  Std. Error  t value  Pr(>|t|)    
## (Intercept)  7.1084e+02  1.7081e+00 416.1556 < 2.2e-16 ***
## HU           2.9122e-02  5.7979e-03   5.0228 5.174e-07 ***
## Pop         -5.8776e-02  4.1038e-03 -14.3223 < 2.2e-16 ***
## WAC          4.8798e-03  2.9331e-04  16.6375 < 2.2e-16 ***
## RAC         -2.5498e-02  4.9986e-03  -5.1011 3.435e-07 ***
## Empl         9.7786e-02  6.2887e-03  15.5495 < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## sigma = 82.693
plot(fit1)

fit2clust <- getCluster(fit1)
table(fit2clust, FC_RU)
##          FC_RU
## fit2clust   6R   7R   7U
##         1 3132 3693  236
##         2    5   18   21
##         3   22   93 3560