NEW1

library(rpart)
fit <- rpart(Species~., data=iris)
library(rpart.plot)
rpart.plot(fit, main="Classification and Regression Trees")
library(vcrpart)

## Warning: package 'vcrpart' was built under R version 3.4.2

## Loading required package: parallel

## Loading required package: partykit

## Loading required package: grid

UCBA <- as.data.frame(UCBAdmissions)
UCBA$Admit <- 1 * (UCBA$Admit == "Admitted")
UCBA$Female <- 1 * (UCBA$Gender == "Female")
head(UCBA, 3)

##   Admit Gender Dept Freq Female
## 1     1   Male    A  512      0
## 2     0   Male    A  313      0
## 3     1 Female    A   89      1

glmS.UCBA <- glm(formula = Admit ~ Female, data = UCBA,
family = binomial(), weights = UCBA$Freq)
summary(glmS.UCBA)$coefficients[, -4]

##               Estimate Std. Error   z value
## (Intercept) -0.2201340 0.03878810 -5.675297
## Female      -0.6103524 0.06389106 -9.553017

glmL.UCBA <- glm(formula = Admit ~ -1 + Dept + Dept:Female,
data = UCBA, family = binomial(),
weights = UCBA$Freq)
summary(glmL.UCBA)$coefficients[, -4]

##                 Estimate Std. Error     z value
## DeptA         0.49212143 0.07174966   6.8588682
## DeptB         0.53374926 0.08754301   6.0969945
## DeptC        -0.53551824 0.11494078  -4.6590797
## DeptD        -0.70395810 0.10407020  -6.7642621
## DeptE        -0.95696177 0.16159920  -5.9218225
## DeptF        -2.76974377 0.21978055 -12.6023154
## DeptA:Female  1.05207596 0.26270810   4.0047336
## DeptB:Female  0.22002254 0.43759263   0.5028022
## DeptC:Female -0.12492163 0.14394243  -0.8678583
## DeptD:Female  0.08198719 0.15020837   0.5458231
## DeptE:Female -0.20018702 0.20024255  -0.9997227
## DeptF:Female  0.18889583 0.30516342   0.6189989

vcmL.UCBA <- tvcglm(formula = Admit ~ -1 + vc(Dept) + vc(Dept, by = Female),
       data = UCBA, family = binomial(), weights = UCBA$Freq,
       control = tvcglm_control(minsize = 30, mindev = 0.0, cv = FALSE))
plot(vcmL.UCBA, type = "coef", part = "A")

plot(vcmL.UCBA, type = "coef", part = "B")

fit <- rpart(Admit ~., data=UCBA)
library(rpart.plot)
rpart.plot(fit, main="Classification and Regression Trees")

library("Ecdat")

## Loading required package: Ecfun

## 
## Attaching package: 'Ecfun'

## The following object is masked from 'package:base':
## 
##     sign

## 
## Attaching package: 'Ecdat'

## The following object is masked from 'package:datasets':
## 
##     Orange

data("Schooling")
head(Schooling)

##   smsa66 smsa76 nearc2 nearc4 nearc4a nearc4b ed76 ed66 age76 daded
## 1    yes    yes     no     no      no      no    7    5    29  9.94
## 2    yes    yes     no     no      no      no   12   11    27  8.00
## 3    yes    yes     no     no      no      no   12   12    34 14.00
## 4    yes    yes    yes    yes     yes      no   11   11    27 11.00
## 5    yes    yes    yes    yes     yes      no   12   12    34  8.00
## 6    yes    yes    yes    yes     yes      no   12   11    26  9.00
##   nodaded momed nomomed momdad14 sinmom14 step14 south66 south76  lwage76
## 1     yes 10.25     yes      yes       no     no      no      no 6.306275
## 2      no  8.00      no      yes       no     no      no      no 6.175867
## 3      no 12.00      no      yes       no     no      no      no 6.580639
## 4      no 12.00      no      yes       no     no      no      no 5.521461
## 5      no  7.00      no      yes       no     no      no      no 6.591674
## 6      no 12.00      no      yes       no     no      no      no 6.214608
##   famed black wage76 enroll76 kww iqscore mar76 libcrd14 exp76
## 1     9   yes    548       no  15      NA   yes       no    16
## 2     8    no    481       no  35      93   yes      yes     9
## 3     2    no    721       no  42     103   yes      yes    16
## 4     6    no    250       no  25      88   yes      yes    10
## 5     8    no    729       no  34     108   yes       no    16
## 6     6    no    500       no  38      85   yes      yes     8

Schooling <- Schooling[c(19, 21, 7, 28, 9, 14, 17, 18, 20, 23, 2, 4)]
head(Schooling)

##    lwage76 black ed76 exp76 age76 momdad14 south66 south76 famed enroll76
## 1 6.306275   yes    7    16    29      yes      no      no     9       no
## 2 6.175867    no   12     9    27      yes      no      no     8       no
## 3 6.580639    no   12    16    34      yes      no      no     2       no
## 4 5.521461    no   11    10    27      yes      no      no     6       no
## 5 6.591674    no   12    16    34      yes      no      no     8       no
## 6 6.214608    no   12     8    26      yes      no      no     6       no
##   smsa76 nearc4
## 1    yes     no
## 2    yes     no
## 3    yes     no
## 4    yes    yes
## 5    yes    yes
## 6    yes    yes

Schooling$black <- 1 * (Schooling$black == "yes")
Schooling$ed76.IV <- fitted(lm(ed76 ~ nearc4, Schooling))
lm.School <- lm(lwage76 ~ ed76.IV + exp76 + I(exp76^2) + black, data = Schooling)
f.School <- lwage76 ~ -1 + ed76.IV + exp76 + I(exp76^2) + 
vc(age76, momdad14, south66, south76, famed, enroll76, smsa76) +
vc(ed76.IV, exp76, age76, momdad14, south66,
   south76, famed, enroll76, smsa76, by = black)

vcm.School <- tvcglm(formula = f.School, data = Schooling,
family = gaussian())
plot(vcm.School, type = "coef", part = "A")

plot(vcm.School, type = "coef", part = "B")

library(wsrf)

## Loading required package: Rcpp

## Warning: package 'Rcpp' was built under R version 3.4.3

## wsrf: An R Package for Scalable Weighted Subspace Random Forests.

## Version 1.7.17

## Use C++ standard thread library for parallel computing

library("rattle.data")

## Warning: package 'rattle.data' was built under R version 3.4.1

ds <- weather
dim(ds)

## [1] 366  24

target <- "RainTomorrow"
ignore <- c("Date", "Location", "RISK_MM")
(vars <- setdiff(names(ds), ignore))

##  [1] "MinTemp"       "MaxTemp"       "Rainfall"      "Evaporation"  
##  [5] "Sunshine"      "WindGustDir"   "WindGustSpeed" "WindDir9am"   
##  [9] "WindDir3pm"    "WindSpeed9am"  "WindSpeed3pm"  "Humidity9am"  
## [13] "Humidity3pm"   "Pressure9am"   "Pressure3pm"   "Cloud9am"     
## [17] "Cloud3pm"      "Temp9am"       "Temp3pm"       "RainToday"    
## [21] "RainTomorrow"

library("randomForest")

## randomForest 4.6-12

## Type rfNews() to see new features/changes/bug fixes.

## 
## Attaching package: 'randomForest'

## The following objects are masked from 'package:wsrf':
## 
##     combine, importance

if (sum(is.na(ds[vars]))) ds[vars] <- na.roughfix(ds[vars])
ds[target] <- as.factor(ds[[target]])
(tt <- table(ds[target]))

## 
##  No Yes 
## 300  66

(form <- as.formula(paste(target, "~ .")))

## RainTomorrow ~ .

## RainTomorrow ~ .
seed <- 42
set.seed(seed)
length(train <- sample(nrow(ds), 0.7*nrow(ds)))

## [1] 256

#head(ds)
#library("wsrf")
#model.wsrf.1 <- wsrf(form, data=ds[train, vars], parallel=FALSE)
#print(model.wsrf.1)

data("Titanic", package = "datasets")
ttnc <- as.data.frame(Titanic)
ttnc <- ttnc[rep(1:nrow(ttnc), ttnc$Freq), 1:4]
names(ttnc)[2] <- "Gender"
head(ttnc)

##     Class Gender   Age Survived
## 3     3rd   Male Child       No
## 3.1   3rd   Male Child       No
## 3.2   3rd   Male Child       No
## 3.3   3rd   Male Child       No
## 3.4   3rd   Male Child       No
## 3.5   3rd   Male Child       No

library("rpart")
(rp <- rpart(Survived ~ ., data = ttnc))

## n= 2201 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
##  1) root 2201 711 No (0.6769650 0.3230350)  
##    2) Gender=Male 1731 367 No (0.7879838 0.2120162)  
##      4) Age=Adult 1667 338 No (0.7972406 0.2027594) *
##      5) Age=Child 64  29 No (0.5468750 0.4531250)  
##       10) Class=3rd 48  13 No (0.7291667 0.2708333) *
##       11) Class=1st,2nd 16   0 Yes (0.0000000 1.0000000) *
##    3) Gender=Female 470 126 Yes (0.2680851 0.7319149)  
##      6) Class=3rd 196  90 No (0.5408163 0.4591837) *
##      7) Class=1st,2nd,Crew 274  20 Yes (0.0729927 0.9270073) *

plot(rp)
text(rp)

(party_rp <- as.party(rp))

## 
## Model formula:
## Survived ~ Class + Gender + Age
## 
## Fitted party:
## [1] root
## |   [2] Gender in Male
## |   |   [3] Age in Adult: No (n = 1667, err = 20.3%)
## |   |   [4] Age in Child
## |   |   |   [5] Class in 3rd: No (n = 48, err = 27.1%)
## |   |   |   [6] Class in 1st, 2nd: Yes (n = 16, err = 0.0%)
## |   [7] Gender in Female
## |   |   [8] Class in 3rd: No (n = 196, err = 45.9%)
## |   |   [9] Class in 1st, 2nd, Crew: Yes (n = 274, err = 7.3%)
## 
## Number of inner nodes:    4
## Number of terminal nodes: 5

plot(party_rp)

#library(RWeka)
#(j48 <- J48(Survived ~ ., data = ttnc))

## (party_j48 <- as.party(j48))
## plot(party_j48)


library(party)

## Loading required package: mvtnorm

## Loading required package: modeltools

## Loading required package: stats4

## Loading required package: strucchange

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: sandwich

## 
## Attaching package: 'party'

## The following objects are masked from 'package:partykit':
## 
##     cforest, ctree, ctree_control, edge_simple, mob, mob_control,
##     node_barplot, node_bivplot, node_boxplot, node_inner,
##     node_surv, node_terminal

data("treepipit", package = "coin")
head(treepipit)

##   counts age coverstorey coverregen meanregen coniferous deadtree cbpiles
## 1      0 120          80         60         7         20        0       4
## 2      0 120          70         90         3         25        1       2
## 3      0 120          90         70         6         40        0       7
## 4      0 120          90         20         7          5        0      11
## 5      0 120          90         20         4          1        0      11
## 6      2 200          80         80         5          0        1       3
##   ivytree fdist
## 1       0   100
## 2       0    10
## 3       0   110
## 4       0    10
## 5       0   110
## 6       0    30

tptree <- ctree(counts ~ ., data = treepipit)

plot(tptree, terminal_panel = node_hist(tptree, breaks = 0:6-0.5, ymax = 65,
horizontal = FALSE, freq = TRUE))

data("GlaucomaM", package = "TH.data")
head(GlaucomaM)

##       ag    at    as    an    ai   eag   eat   eas   ean   eai  abrg  abrt
## 2  2.220 0.354 0.580 0.686 0.601 1.267 0.336 0.346 0.255 0.331 0.479 0.260
## 43 2.681 0.475 0.672 0.868 0.667 2.053 0.440 0.520 0.639 0.454 1.090 0.377
## 25 1.979 0.343 0.508 0.624 0.504 1.200 0.299 0.396 0.259 0.246 0.465 0.209
## 65 1.747 0.269 0.476 0.525 0.476 0.612 0.147 0.017 0.044 0.405 0.170 0.062
## 70 2.990 0.599 0.686 1.039 0.667 2.513 0.543 0.607 0.871 0.492 1.800 0.431
## 16 2.917 0.483 0.763 0.901 0.770 2.200 0.462 0.637 0.504 0.597 1.311 0.394
##     abrs  abrn  abri    hic   mhcg  mhct   mhcs   mhcn   mhci   phcg
## 2  0.107 0.014 0.098  0.214  0.111 0.412  0.036  0.105 -0.022 -0.139
## 43 0.257 0.212 0.245  0.382  0.140 0.338  0.104  0.080  0.109 -0.015
## 25 0.112 0.041 0.103  0.195  0.062 0.356  0.045 -0.009 -0.048 -0.149
## 65 0.000 0.000 0.108 -0.030 -0.015 0.074 -0.084 -0.050  0.035 -0.182
## 70 0.494 0.601 0.274  0.383  0.089 0.233  0.145  0.023  0.007 -0.131
## 16 0.365 0.251 0.301  0.442  0.128 0.375  0.049  0.111  0.052 -0.088
##      phct   phcs   phcn   phci   hvc  vbsg  vbst  vbss  vbsn  vbsi  vasg
## 2   0.242 -0.053  0.010 -0.139 0.613 0.303 0.103 0.088 0.022 0.090 0.062
## 43  0.296 -0.015 -0.015  0.036 0.382 0.676 0.181 0.186 0.141 0.169 0.029
## 25  0.206 -0.092 -0.081 -0.149 0.557 0.300 0.084 0.088 0.046 0.082 0.036
## 65 -0.097 -0.125 -0.138 -0.182 0.373 0.048 0.011 0.000 0.000 0.036 0.070
## 70  0.163  0.055 -0.131 -0.115 0.405 0.889 0.151 0.253 0.330 0.155 0.020
## 16  0.281 -0.067 -0.062 -0.088 0.507 0.972 0.213 0.316 0.197 0.246 0.043
##     vast  vass  vasn  vasi  vbrg  vbrt  vbrs  vbrn  vbri  varg  vart  vars
## 2  0.000 0.011 0.032 0.018 0.075 0.039 0.021 0.002 0.014 0.756 0.009 0.209
## 43 0.001 0.007 0.011 0.010 0.370 0.127 0.099 0.050 0.093 0.410 0.006 0.105
## 25 0.002 0.004 0.016 0.013 0.081 0.034 0.019 0.007 0.021 0.565 0.014 0.132
## 65 0.005 0.030 0.033 0.002 0.005 0.001 0.000 0.000 0.004 0.380 0.032 0.147
## 70 0.001 0.004 0.008 0.007 0.532 0.103 0.173 0.181 0.075 0.228 0.011 0.026
## 16 0.001 0.005 0.028 0.009 0.467 0.136 0.148 0.078 0.104 0.540 0.008 0.133
##     varn  vari   mdg   mdt   mds   mdn   mdi    tmg    tmt    tms    tmn
## 2  0.298 0.240 0.705 0.637 0.738 0.596 0.691 -0.236 -0.018 -0.230 -0.510
## 43 0.181 0.117 0.898 0.850 0.907 0.771 0.940 -0.211 -0.014 -0.165 -0.317
## 25 0.243 0.177 0.687 0.643 0.689 0.684 0.700 -0.185 -0.097 -0.235 -0.337
## 65 0.151 0.050 0.207 0.171 0.022 0.046 0.221 -0.148 -0.035 -0.449 -0.217
## 70 0.105 0.087 0.721 0.638 0.730 0.730 0.640 -0.052 -0.105  0.084 -0.012
## 16 0.232 0.167 0.927 0.842 0.953 0.906 0.898 -0.040  0.087  0.018 -0.094
##       tmi    mr   rnf  mdic   emd    mv  Class
## 2  -0.158 0.841 0.410 0.137 0.239 0.035 normal
## 43 -0.192 0.924 0.256 0.252 0.329 0.022 normal
## 25 -0.020 0.795 0.378 0.152 0.250 0.029 normal
## 65 -0.091 0.746 0.200 0.027 0.078 0.023 normal
## 70 -0.054 0.977 0.193 0.297 0.354 0.034 normal
## 16 -0.051 0.965 0.339 0.333 0.442 0.028 normal

gtree <- ctree(Class ~ ., data = GlaucomaM)

data("GBSG2", package = "TH.data")
head(GBSG2)

##   horTh age menostat tsize tgrade pnodes progrec estrec time cens
## 1    no  70     Post    21     II      3      48     66 1814    1
## 2   yes  56     Post    12     II      7      61     77 2018    1
## 3   yes  58     Post    35     II      9      52    271  712    1
## 4   yes  59     Post    17     II      4      60     29 1807    1
## 5    no  73     Post    35     II      1      26     65  772    1
## 6    no  32      Pre    57    III     24       0     13  448    1

stree <- ctree(Surv(time, cens) ~ ., data = GBSG2)
plot(stree)