pre
library("ranger")
library(data.table)
setwd("C:/Users/subas/Syncplicity/MyProjects_IMP/MY_Papers_V2/TRB 2021/EScotter_BayesianRule/")
it01 <- fread("IT_aadtMaster.csv")
mn= subset(it01, State=="MN")
dim(mn)
## [1] 11498 86
mn01 <- mn[, c("FC_RU", "Default_AADT", "HU", "Pop" , "WAC", "RAC", "Agg_Inc", "Agg_Veh", "Empl" )]
mn02= na.omit(mn01)
mn02$FC_RU= as.factor(mn02$FC_RU)
library(flexCWM)
attach(mn02)
fit1 <- cwm(Default_AADT~ HU+ Pop+ WAC+ RAC+Empl, data=mn02,
familyY = list(gaussian), k = 1:3,
initialization = "kmeans")
##
## Estimating model with k=1, familyY=gaussian *
## Estimating model with k=2, familyY=gaussian ***
## Estimating model with k=3, familyY=gaussian *******************************************************************************************************************************************************************************
##
## Best model according to AIC, AICc, AICu, AIC3, AWE, BIC, CAIC, ICL is obtained with k = 3 group(s) and family gaussian(identity)
summary(fit1, criterion = "BIC", concomitant = TRUE)
## ----------------------------------
## Best fitted model according to BIC
## ----------------------------------
## loglikelihood n df BIC
## -58165 10780 23 -116543
##
## Clustering table:
## 1 2 3
## 7061 44 3675
##
## Prior: comp.1 = 0.6545442, comp.2 = 0.0066591, comp.3 = 0.3387967
##
## Distribution used for GLM: gaussian(identity). Parameters:
##
## Component 1
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 52.59072847 0.35624782 147.6240 < 2.2e-16 ***
## HU -0.01344872 0.00074439 -18.0668 < 2.2e-16 ***
## Pop -0.00937205 0.00145266 -6.4516 1.154e-10 ***
## WAC -0.00240664 0.00039640 -6.0712 1.312e-09 ***
## RAC 0.03039062 0.00196542 15.4627 < 2.2e-16 ***
## Empl 0.00779145 0.00237323 3.2831 0.00103 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## sigma = 15.366
##
## Component 2
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 229.6335016 1.3040939 176.0866 < 2.2e-16 ***
## HU -0.3037524 0.0017730 -171.3215 < 2.2e-16 ***
## Pop 0.3658807 0.0041641 87.8651 < 2.2e-16 ***
## WAC 0.3901486 0.0014537 268.3883 < 2.2e-16 ***
## RAC 0.0126317 0.0039069 3.2332 0.001228 **
## Empl -0.4008450 0.0065391 -61.2994 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## sigma = 57.899
##
## Component 3
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.1084e+02 1.7081e+00 416.1556 < 2.2e-16 ***
## HU 2.9122e-02 5.7979e-03 5.0228 5.174e-07 ***
## Pop -5.8776e-02 4.1038e-03 -14.3223 < 2.2e-16 ***
## WAC 4.8798e-03 2.9331e-04 16.6375 < 2.2e-16 ***
## RAC -2.5498e-02 4.9986e-03 -5.1011 3.435e-07 ***
## Empl 9.7786e-02 6.2887e-03 15.5495 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## sigma = 82.693

fit2clust <- getCluster(fit1)
table(fit2clust, FC_RU)
## FC_RU
## fit2clust 6R 7R 7U
## 1 3132 3693 236
## 2 5 18 21
## 3 22 93 3560