Distribution of response variables
load("test.RData")
hist(rtr)
Distribution fitting of response variables
library(fitdistrplus)
## Warning: package 'fitdistrplus' was built under R version 3.2.5
## Loading required package: MASS
## Loading required package: survival
## Warning: package 'survival' was built under R version 3.2.5
fg <- fitdist(rtr, "gamma")
fn <- fitdist(rtr, "norm")
fw <- fitdist(rtr, "weibull")
fexp<-fitdist(rtr, "exp")
plot.legend <- c("Weibull", "normal", "gamma","exponential")
denscomp(list(fw, fn, fg, fexp), legendtext = plot.legend)
qqcomp(list(fw, fn, fg, fexp), legendtext = plot.legend)
gofstat(list(fg, fn, fw, fexp), fitnames = c("gamma", "normal", "weibull", "exponential"))
## Goodness-of-fit statistics
## gamma normal weibull
## Kolmogorov-Smirnov statistic 6.723598e-02 7.154486e-02 0.04479031
## Cramer-von Mises statistic 2.275419e+02 1.952990e+02 93.33560539
## Anderson-Darling statistic 1.238148e+03 1.467677e+03 571.21238303
## exponential
## Kolmogorov-Smirnov statistic 0.1284472
## Cramer-von Mises statistic 1083.4527136
## Anderson-Darling statistic 5602.7473720
##
## Goodness-of-fit criteria
## gamma normal weibull exponential
## Aikake's Information Criterion 1845122 1874357 1838626 1864937
## Bayesian Information Criterion 1845142 1874377 1838646 1864947
Let’s cluster response variable. Generate 10 clusters
c<-kmeans(rtr,10)
rtrc<-data.frame(rtr,c$cluster)
c$centers
## [,1]
## 1 255.3909
## 2 109.5000
## 3 48.0000
## 4 208.2817
## 5 417.2315
## 6 139.7988
## 7 79.0000
## 8 171.6865
## 9 16.5000
## 10 318.1274
Assing label to response variables
mn<-NULL
mx<-NULL
for (i in 1:10)
{
mi<-min(rtrc$rtr[rtrc$c.cluster==i] )
mn<-rbind(mn,mi)
ma<-max(rtrc$rtr[rtrc$c.cluster==i] )
mx<-rbind(mx,ma)
}
c_b<-data.frame(mn[,1],mx[,1])
Lets build classification trees
trc<-data.frame(rtrc$c.cluster,train[,3:26])
library(party)
## Loading required package: grid
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
frmla<-rtrc.c.cluster ~ V3 + V4 + V5 + V6 + V7 + V8 + V9+ V10+ V11+ V12+ V13+ V14+V15+ V16+ V17+ V18+ V19+ V20+ V21+ V22+ V23+ V24+ V25+ V26
ct<-ctree(frmla, data = trc)
plot(ct)