library(rpart)
fit <- rpart(Species~., data=iris)
library(rpart.plot)
rpart.plot(fit, main="Classification and Regression Trees")
library(vcrpart)
## Warning: package 'vcrpart' was built under R version 3.4.2
## Loading required package: parallel
## Loading required package: partykit
## Loading required package: grid

UCBA <- as.data.frame(UCBAdmissions)
UCBA$Admit <- 1 * (UCBA$Admit == "Admitted")
UCBA$Female <- 1 * (UCBA$Gender == "Female")
head(UCBA, 3)
## Admit Gender Dept Freq Female
## 1 1 Male A 512 0
## 2 0 Male A 313 0
## 3 1 Female A 89 1
glmS.UCBA <- glm(formula = Admit ~ Female, data = UCBA,
family = binomial(), weights = UCBA$Freq)
summary(glmS.UCBA)$coefficients[, -4]
## Estimate Std. Error z value
## (Intercept) -0.2201340 0.03878810 -5.675297
## Female -0.6103524 0.06389106 -9.553017
glmL.UCBA <- glm(formula = Admit ~ -1 + Dept + Dept:Female,
data = UCBA, family = binomial(),
weights = UCBA$Freq)
summary(glmL.UCBA)$coefficients[, -4]
## Estimate Std. Error z value
## DeptA 0.49212143 0.07174966 6.8588682
## DeptB 0.53374926 0.08754301 6.0969945
## DeptC -0.53551824 0.11494078 -4.6590797
## DeptD -0.70395810 0.10407020 -6.7642621
## DeptE -0.95696177 0.16159920 -5.9218225
## DeptF -2.76974377 0.21978055 -12.6023154
## DeptA:Female 1.05207596 0.26270810 4.0047336
## DeptB:Female 0.22002254 0.43759263 0.5028022
## DeptC:Female -0.12492163 0.14394243 -0.8678583
## DeptD:Female 0.08198719 0.15020837 0.5458231
## DeptE:Female -0.20018702 0.20024255 -0.9997227
## DeptF:Female 0.18889583 0.30516342 0.6189989
vcmL.UCBA <- tvcglm(formula = Admit ~ -1 + vc(Dept) + vc(Dept, by = Female),
data = UCBA, family = binomial(), weights = UCBA$Freq,
control = tvcglm_control(minsize = 30, mindev = 0.0, cv = FALSE))
plot(vcmL.UCBA, type = "coef", part = "A")

plot(vcmL.UCBA, type = "coef", part = "B")

fit <- rpart(Admit ~., data=UCBA)
library(rpart.plot)
rpart.plot(fit, main="Classification and Regression Trees")

library("Ecdat")
## Loading required package: Ecfun
##
## Attaching package: 'Ecfun'
## The following object is masked from 'package:base':
##
## sign
##
## Attaching package: 'Ecdat'
## The following object is masked from 'package:datasets':
##
## Orange
data("Schooling")
head(Schooling)
## smsa66 smsa76 nearc2 nearc4 nearc4a nearc4b ed76 ed66 age76 daded
## 1 yes yes no no no no 7 5 29 9.94
## 2 yes yes no no no no 12 11 27 8.00
## 3 yes yes no no no no 12 12 34 14.00
## 4 yes yes yes yes yes no 11 11 27 11.00
## 5 yes yes yes yes yes no 12 12 34 8.00
## 6 yes yes yes yes yes no 12 11 26 9.00
## nodaded momed nomomed momdad14 sinmom14 step14 south66 south76 lwage76
## 1 yes 10.25 yes yes no no no no 6.306275
## 2 no 8.00 no yes no no no no 6.175867
## 3 no 12.00 no yes no no no no 6.580639
## 4 no 12.00 no yes no no no no 5.521461
## 5 no 7.00 no yes no no no no 6.591674
## 6 no 12.00 no yes no no no no 6.214608
## famed black wage76 enroll76 kww iqscore mar76 libcrd14 exp76
## 1 9 yes 548 no 15 NA yes no 16
## 2 8 no 481 no 35 93 yes yes 9
## 3 2 no 721 no 42 103 yes yes 16
## 4 6 no 250 no 25 88 yes yes 10
## 5 8 no 729 no 34 108 yes no 16
## 6 6 no 500 no 38 85 yes yes 8
Schooling <- Schooling[c(19, 21, 7, 28, 9, 14, 17, 18, 20, 23, 2, 4)]
head(Schooling)
## lwage76 black ed76 exp76 age76 momdad14 south66 south76 famed enroll76
## 1 6.306275 yes 7 16 29 yes no no 9 no
## 2 6.175867 no 12 9 27 yes no no 8 no
## 3 6.580639 no 12 16 34 yes no no 2 no
## 4 5.521461 no 11 10 27 yes no no 6 no
## 5 6.591674 no 12 16 34 yes no no 8 no
## 6 6.214608 no 12 8 26 yes no no 6 no
## smsa76 nearc4
## 1 yes no
## 2 yes no
## 3 yes no
## 4 yes yes
## 5 yes yes
## 6 yes yes
Schooling$black <- 1 * (Schooling$black == "yes")
Schooling$ed76.IV <- fitted(lm(ed76 ~ nearc4, Schooling))
lm.School <- lm(lwage76 ~ ed76.IV + exp76 + I(exp76^2) + black, data = Schooling)
f.School <- lwage76 ~ -1 + ed76.IV + exp76 + I(exp76^2) +
vc(age76, momdad14, south66, south76, famed, enroll76, smsa76) +
vc(ed76.IV, exp76, age76, momdad14, south66,
south76, famed, enroll76, smsa76, by = black)
vcm.School <- tvcglm(formula = f.School, data = Schooling,
family = gaussian())
plot(vcm.School, type = "coef", part = "A")

plot(vcm.School, type = "coef", part = "B")

library(wsrf)
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 3.4.3
## wsrf: An R Package for Scalable Weighted Subspace Random Forests.
## Version 1.7.17
## Use C++ standard thread library for parallel computing
library("rattle.data")
## Warning: package 'rattle.data' was built under R version 3.4.1
ds <- weather
dim(ds)
## [1] 366 24
target <- "RainTomorrow"
ignore <- c("Date", "Location", "RISK_MM")
(vars <- setdiff(names(ds), ignore))
## [1] "MinTemp" "MaxTemp" "Rainfall" "Evaporation"
## [5] "Sunshine" "WindGustDir" "WindGustSpeed" "WindDir9am"
## [9] "WindDir3pm" "WindSpeed9am" "WindSpeed3pm" "Humidity9am"
## [13] "Humidity3pm" "Pressure9am" "Pressure3pm" "Cloud9am"
## [17] "Cloud3pm" "Temp9am" "Temp3pm" "RainToday"
## [21] "RainTomorrow"
library("randomForest")
## randomForest 4.6-12
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following objects are masked from 'package:wsrf':
##
## combine, importance
if (sum(is.na(ds[vars]))) ds[vars] <- na.roughfix(ds[vars])
ds[target] <- as.factor(ds[[target]])
(tt <- table(ds[target]))
##
## No Yes
## 300 66
(form <- as.formula(paste(target, "~ .")))
## RainTomorrow ~ .
## RainTomorrow ~ .
seed <- 42
set.seed(seed)
length(train <- sample(nrow(ds), 0.7*nrow(ds)))
## [1] 256
#head(ds)
#library("wsrf")
#model.wsrf.1 <- wsrf(form, data=ds[train, vars], parallel=FALSE)
#print(model.wsrf.1)
data("Titanic", package = "datasets")
ttnc <- as.data.frame(Titanic)
ttnc <- ttnc[rep(1:nrow(ttnc), ttnc$Freq), 1:4]
names(ttnc)[2] <- "Gender"
head(ttnc)
## Class Gender Age Survived
## 3 3rd Male Child No
## 3.1 3rd Male Child No
## 3.2 3rd Male Child No
## 3.3 3rd Male Child No
## 3.4 3rd Male Child No
## 3.5 3rd Male Child No
library("rpart")
(rp <- rpart(Survived ~ ., data = ttnc))
## n= 2201
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 2201 711 No (0.6769650 0.3230350)
## 2) Gender=Male 1731 367 No (0.7879838 0.2120162)
## 4) Age=Adult 1667 338 No (0.7972406 0.2027594) *
## 5) Age=Child 64 29 No (0.5468750 0.4531250)
## 10) Class=3rd 48 13 No (0.7291667 0.2708333) *
## 11) Class=1st,2nd 16 0 Yes (0.0000000 1.0000000) *
## 3) Gender=Female 470 126 Yes (0.2680851 0.7319149)
## 6) Class=3rd 196 90 No (0.5408163 0.4591837) *
## 7) Class=1st,2nd,Crew 274 20 Yes (0.0729927 0.9270073) *
plot(rp)
text(rp)

(party_rp <- as.party(rp))
##
## Model formula:
## Survived ~ Class + Gender + Age
##
## Fitted party:
## [1] root
## | [2] Gender in Male
## | | [3] Age in Adult: No (n = 1667, err = 20.3%)
## | | [4] Age in Child
## | | | [5] Class in 3rd: No (n = 48, err = 27.1%)
## | | | [6] Class in 1st, 2nd: Yes (n = 16, err = 0.0%)
## | [7] Gender in Female
## | | [8] Class in 3rd: No (n = 196, err = 45.9%)
## | | [9] Class in 1st, 2nd, Crew: Yes (n = 274, err = 7.3%)
##
## Number of inner nodes: 4
## Number of terminal nodes: 5
plot(party_rp)

#library(RWeka)
#(j48 <- J48(Survived ~ ., data = ttnc))
## (party_j48 <- as.party(j48))
## plot(party_j48)
library(party)
## Loading required package: mvtnorm
## Loading required package: modeltools
## Loading required package: stats4
## Loading required package: strucchange
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: sandwich
##
## Attaching package: 'party'
## The following objects are masked from 'package:partykit':
##
## cforest, ctree, ctree_control, edge_simple, mob, mob_control,
## node_barplot, node_bivplot, node_boxplot, node_inner,
## node_surv, node_terminal
data("treepipit", package = "coin")
head(treepipit)
## counts age coverstorey coverregen meanregen coniferous deadtree cbpiles
## 1 0 120 80 60 7 20 0 4
## 2 0 120 70 90 3 25 1 2
## 3 0 120 90 70 6 40 0 7
## 4 0 120 90 20 7 5 0 11
## 5 0 120 90 20 4 1 0 11
## 6 2 200 80 80 5 0 1 3
## ivytree fdist
## 1 0 100
## 2 0 10
## 3 0 110
## 4 0 10
## 5 0 110
## 6 0 30
tptree <- ctree(counts ~ ., data = treepipit)
plot(tptree, terminal_panel = node_hist(tptree, breaks = 0:6-0.5, ymax = 65,
horizontal = FALSE, freq = TRUE))

data("GlaucomaM", package = "TH.data")
head(GlaucomaM)
## ag at as an ai eag eat eas ean eai abrg abrt
## 2 2.220 0.354 0.580 0.686 0.601 1.267 0.336 0.346 0.255 0.331 0.479 0.260
## 43 2.681 0.475 0.672 0.868 0.667 2.053 0.440 0.520 0.639 0.454 1.090 0.377
## 25 1.979 0.343 0.508 0.624 0.504 1.200 0.299 0.396 0.259 0.246 0.465 0.209
## 65 1.747 0.269 0.476 0.525 0.476 0.612 0.147 0.017 0.044 0.405 0.170 0.062
## 70 2.990 0.599 0.686 1.039 0.667 2.513 0.543 0.607 0.871 0.492 1.800 0.431
## 16 2.917 0.483 0.763 0.901 0.770 2.200 0.462 0.637 0.504 0.597 1.311 0.394
## abrs abrn abri hic mhcg mhct mhcs mhcn mhci phcg
## 2 0.107 0.014 0.098 0.214 0.111 0.412 0.036 0.105 -0.022 -0.139
## 43 0.257 0.212 0.245 0.382 0.140 0.338 0.104 0.080 0.109 -0.015
## 25 0.112 0.041 0.103 0.195 0.062 0.356 0.045 -0.009 -0.048 -0.149
## 65 0.000 0.000 0.108 -0.030 -0.015 0.074 -0.084 -0.050 0.035 -0.182
## 70 0.494 0.601 0.274 0.383 0.089 0.233 0.145 0.023 0.007 -0.131
## 16 0.365 0.251 0.301 0.442 0.128 0.375 0.049 0.111 0.052 -0.088
## phct phcs phcn phci hvc vbsg vbst vbss vbsn vbsi vasg
## 2 0.242 -0.053 0.010 -0.139 0.613 0.303 0.103 0.088 0.022 0.090 0.062
## 43 0.296 -0.015 -0.015 0.036 0.382 0.676 0.181 0.186 0.141 0.169 0.029
## 25 0.206 -0.092 -0.081 -0.149 0.557 0.300 0.084 0.088 0.046 0.082 0.036
## 65 -0.097 -0.125 -0.138 -0.182 0.373 0.048 0.011 0.000 0.000 0.036 0.070
## 70 0.163 0.055 -0.131 -0.115 0.405 0.889 0.151 0.253 0.330 0.155 0.020
## 16 0.281 -0.067 -0.062 -0.088 0.507 0.972 0.213 0.316 0.197 0.246 0.043
## vast vass vasn vasi vbrg vbrt vbrs vbrn vbri varg vart vars
## 2 0.000 0.011 0.032 0.018 0.075 0.039 0.021 0.002 0.014 0.756 0.009 0.209
## 43 0.001 0.007 0.011 0.010 0.370 0.127 0.099 0.050 0.093 0.410 0.006 0.105
## 25 0.002 0.004 0.016 0.013 0.081 0.034 0.019 0.007 0.021 0.565 0.014 0.132
## 65 0.005 0.030 0.033 0.002 0.005 0.001 0.000 0.000 0.004 0.380 0.032 0.147
## 70 0.001 0.004 0.008 0.007 0.532 0.103 0.173 0.181 0.075 0.228 0.011 0.026
## 16 0.001 0.005 0.028 0.009 0.467 0.136 0.148 0.078 0.104 0.540 0.008 0.133
## varn vari mdg mdt mds mdn mdi tmg tmt tms tmn
## 2 0.298 0.240 0.705 0.637 0.738 0.596 0.691 -0.236 -0.018 -0.230 -0.510
## 43 0.181 0.117 0.898 0.850 0.907 0.771 0.940 -0.211 -0.014 -0.165 -0.317
## 25 0.243 0.177 0.687 0.643 0.689 0.684 0.700 -0.185 -0.097 -0.235 -0.337
## 65 0.151 0.050 0.207 0.171 0.022 0.046 0.221 -0.148 -0.035 -0.449 -0.217
## 70 0.105 0.087 0.721 0.638 0.730 0.730 0.640 -0.052 -0.105 0.084 -0.012
## 16 0.232 0.167 0.927 0.842 0.953 0.906 0.898 -0.040 0.087 0.018 -0.094
## tmi mr rnf mdic emd mv Class
## 2 -0.158 0.841 0.410 0.137 0.239 0.035 normal
## 43 -0.192 0.924 0.256 0.252 0.329 0.022 normal
## 25 -0.020 0.795 0.378 0.152 0.250 0.029 normal
## 65 -0.091 0.746 0.200 0.027 0.078 0.023 normal
## 70 -0.054 0.977 0.193 0.297 0.354 0.034 normal
## 16 -0.051 0.965 0.339 0.333 0.442 0.028 normal
gtree <- ctree(Class ~ ., data = GlaucomaM)
data("GBSG2", package = "TH.data")
head(GBSG2)
## horTh age menostat tsize tgrade pnodes progrec estrec time cens
## 1 no 70 Post 21 II 3 48 66 1814 1
## 2 yes 56 Post 12 II 7 61 77 2018 1
## 3 yes 58 Post 35 II 9 52 271 712 1
## 4 yes 59 Post 17 II 4 60 29 1807 1
## 5 no 73 Post 35 II 1 26 65 772 1
## 6 no 32 Pre 57 III 24 0 13 448 1
stree <- ctree(Surv(time, cens) ~ ., data = GBSG2)
plot(stree)
