Pre-Analysis

Distribution of response variables


load("test.RData")
hist(rtr)




Distribution fitting of response variables

library(fitdistrplus)
## Warning: package 'fitdistrplus' was built under R version 3.2.5
## Loading required package: MASS
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.2.5
fg <- fitdist(rtr, "gamma")
fn <- fitdist(rtr, "norm")
fw <- fitdist(rtr, "weibull")
fexp<-fitdist(rtr, "exp")
plot.legend <- c("Weibull", "normal", "gamma","exponential")
denscomp(list(fw, fn, fg, fexp), legendtext = plot.legend)

qqcomp(list(fw, fn, fg, fexp), legendtext = plot.legend)

gofstat(list(fg, fn, fw, fexp), fitnames = c("gamma", "normal", "weibull", "exponential"))
## Goodness-of-fit statistics
##                                     gamma       normal      weibull
## Kolmogorov-Smirnov statistic 6.723598e-02 7.154486e-02   0.04479031
## Cramer-von Mises statistic   2.275419e+02 1.952990e+02  93.33560539
## Anderson-Darling statistic   1.238148e+03 1.467677e+03 571.21238303
##                               exponential
## Kolmogorov-Smirnov statistic    0.1284472
## Cramer-von Mises statistic   1083.4527136
## Anderson-Darling statistic   5602.7473720
## 
## Goodness-of-fit criteria
##                                  gamma  normal weibull exponential
## Aikake's Information Criterion 1845122 1874357 1838626     1864937
## Bayesian Information Criterion 1845142 1874377 1838646     1864947




Let’s cluster response variable. Generate 10 clusters

c<-kmeans(rtr,10)
rtrc<-data.frame(rtr,c$cluster)
c$centers
##        [,1]
## 1  255.3909
## 2  109.5000
## 3   48.0000
## 4  208.2817
## 5  417.2315
## 6  139.7988
## 7   79.0000
## 8  171.6865
## 9   16.5000
## 10 318.1274




Assing label to response variables

mn<-NULL
mx<-NULL

for (i in 1:10)
{
mi<-min(rtrc$rtr[rtrc$c.cluster==i] )
mn<-rbind(mn,mi)  

ma<-max(rtrc$rtr[rtrc$c.cluster==i] )
mx<-rbind(mx,ma)  
}

c_b<-data.frame(mn[,1],mx[,1])



Lets build classification trees

trc<-data.frame(rtrc$c.cluster,train[,3:26])
library(party)
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: sandwich
frmla<-rtrc.c.cluster ~ V3 + V4 + V5 + V6 + V7 + V8 + V9+ V10+ V11+ V12+ V13+ V14+V15+ V16+ V17+ V18+ V19+ V20+ V21+ V22+ V23+ V24+ V25+ V26
ct<-ctree(frmla, data = trc)
plot(ct)