DEATH PENALTY ANALYSIS
deathpenalty <- read.csv("./DeathPenalty.csv")
dp <- tree(Death~.,data = deathpenalty)
dp
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 362 49.380 0.1630
## 2) Agg < 3.5 307 13.360 0.0456
## 4) Agg < 2.5 283 5.873 0.0212 *
## 5) Agg > 2.5 24 5.333 0.3333 *
## 3) Agg > 3.5 55 8.182 0.8182
## 6) VRace < 0.5 17 4.118 0.5882
## 12) Agg < 4.5 6 1.333 0.3333 *
## 13) Agg > 4.5 11 2.182 0.7273 *
## 7) VRace > 0.5 38 2.763 0.9211
## 14) Agg < 4.5 12 2.250 0.7500 *
## 15) Agg > 4.5 26 0.000 1.0000 *
cvdp <- cv.tree(dp)
plot(cvdp)

## 3 is the best number for the branches
dpcut <- prune.tree(dp,best =3)
dpcut
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 362 49.380 0.1630
## 2) Agg < 3.5 307 13.360 0.0456
## 4) Agg < 2.5 283 5.873 0.0212 *
## 5) Agg > 2.5 24 5.333 0.3333 *
## 3) Agg > 3.5 55 8.182 0.8182 *
plot(dpcut)
text(dpcut)

HOUSING PRICE ANALYSIS
house<-read.csv("./HousePrices.csv")
house = house[-1] ## Eliminating house ID
##Making dummmy variables in order to eliminate categrical variables
a1 = rep(1,length(house$Neighborhood))
a2 = rep(0,length(house$Neighborhood))
house$BrickYes = ifelse(house$Brick == "Yes",a1,a2)
house$NeighborhoodEast = ifelse(house$Neighborhood == "East",a1,a2)
house$NeighborhoodNorth = ifelse(house$Neighborhood == "North",a1,a2)
house = house[-6:-7]
hp <- tree(Price~.,data = house)
hp
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 128 9.169e+10 130400
## 2) NeighborhoodNorth < 0.5 84 5.316e+10 141000
## 4) NeighborhoodEast < 0.5 39 1.487e+10 159300
## 8) BrickYes < 0.5 23 4.024e+09 148200
## 16) SqFt < 2010 9 3.002e+08 137000 *
## 17) SqFt > 2010 14 1.844e+09 155500 *
## 9) BrickYes > 0.5 16 3.983e+09 175200
## 18) Bedrooms < 3.5 8 4.316e+08 164100 *
## 19) Bedrooms > 3.5 8 1.580e+09 186300 *
## 5) NeighborhoodEast > 0.5 45 1.404e+10 125200
## 10) SqFt < 2035 25 4.970e+09 115600
## 20) BrickYes < 0.5 15 1.648e+09 108100 *
## 21) BrickYes > 0.5 10 1.234e+09 126800 *
## 11) SqFt > 2035 20 3.831e+09 137300
## 22) BrickYes < 0.5 11 2.498e+09 130900 *
## 23) BrickYes > 0.5 9 3.284e+08 145100 *
## 3) NeighborhoodNorth > 0.5 44 1.097e+10 110200
## 6) SqFt < 1695 7 1.310e+09 93130 *
## 7) SqFt > 1695 37 7.249e+09 113400
## 14) SqFt < 2145 32 5.299e+09 111000
## 28) Offers < 3.5 22 2.942e+09 115700
## 56) SqFt < 1910 10 5.153e+08 108100 *
## 57) SqFt > 1910 12 1.366e+09 122000 *
## 29) Offers > 3.5 10 7.866e+08 100600 *
## 15) SqFt > 2145 5 5.809e+08 128800 *
cvhp <- cv.tree(hp)
plot(cvhp, pch=21, bg=8, type="p", cex=1.5)

##The best number of branches is 11
hpcut <- prune.tree(hp,best =11)
hpcut
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 128 9.169e+10 130400
## 2) NeighborhoodNorth < 0.5 84 5.316e+10 141000
## 4) NeighborhoodEast < 0.5 39 1.487e+10 159300
## 8) BrickYes < 0.5 23 4.024e+09 148200
## 16) SqFt < 2010 9 3.002e+08 137000 *
## 17) SqFt > 2010 14 1.844e+09 155500 *
## 9) BrickYes > 0.5 16 3.983e+09 175200
## 18) Bedrooms < 3.5 8 4.316e+08 164100 *
## 19) Bedrooms > 3.5 8 1.580e+09 186300 *
## 5) NeighborhoodEast > 0.5 45 1.404e+10 125200
## 10) SqFt < 2035 25 4.970e+09 115600
## 20) BrickYes < 0.5 15 1.648e+09 108100 *
## 21) BrickYes > 0.5 10 1.234e+09 126800 *
## 11) SqFt > 2035 20 3.831e+09 137300 *
## 3) NeighborhoodNorth > 0.5 44 1.097e+10 110200
## 6) SqFt < 1695 7 1.310e+09 93130 *
## 7) SqFt > 1695 37 7.249e+09 113400
## 14) SqFt < 2145 32 5.299e+09 111000
## 28) Offers < 3.5 22 2.942e+09 115700 *
## 29) Offers > 3.5 10 7.866e+08 100600 *
## 15) SqFt > 2145 5 5.809e+08 128800 *
plot(hpcut,col=10)
text(hpcut,cex=.8)

download.file("http://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data","spambase.data")
spam <- read.csv("./spambase.data")
head(spam)
## X0 X0.64 X0.64.1 X0.1 X0.32 X0.2 X0.3 X0.4 X0.5 X0.6 X0.7 X0.64.2 X0.8
## 1 0.21 0.28 0.50 0 0.14 0.28 0.21 0.07 0.00 0.94 0.21 0.79 0.65
## 2 0.06 0.00 0.71 0 1.23 0.19 0.19 0.12 0.64 0.25 0.38 0.45 0.12
## 3 0.00 0.00 0.00 0 0.63 0.00 0.31 0.63 0.31 0.63 0.31 0.31 0.31
## 4 0.00 0.00 0.00 0 0.63 0.00 0.31 0.63 0.31 0.63 0.31 0.31 0.31
## 5 0.00 0.00 0.00 0 1.85 0.00 0.00 1.85 0.00 0.00 0.00 0.00 0.00
## 6 0.00 0.00 0.00 0 1.92 0.00 0.00 0.00 0.00 0.64 0.96 1.28 0.00
## X0.9 X0.10 X0.32.1 X0.11 X1.29 X1.93 X0.12 X0.96 X0.13 X0.14 X0.15 X0.16
## 1 0.21 0.14 0.14 0.07 0.28 3.47 0.00 1.59 0 0.43 0.43 0
## 2 0.00 1.75 0.06 0.06 1.03 1.36 0.32 0.51 0 1.16 0.06 0
## 3 0.00 0.00 0.31 0.00 0.00 3.18 0.00 0.31 0 0.00 0.00 0
## 4 0.00 0.00 0.31 0.00 0.00 3.18 0.00 0.31 0 0.00 0.00 0
## 5 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0.00 0 0.00 0.00 0
## 6 0.00 0.00 0.96 0.00 0.32 3.85 0.00 0.64 0 0.00 0.00 0
## X0.17 X0.18 X0.19 X0.20 X0.21 X0.22 X0.23 X0.24 X0.25 X0.26 X0.27 X0.28
## 1 0 0 0 0 0 0 0 0 0 0 0 0.07
## 2 0 0 0 0 0 0 0 0 0 0 0 0.00
## 3 0 0 0 0 0 0 0 0 0 0 0 0.00
## 4 0 0 0 0 0 0 0 0 0 0 0 0.00
## 5 0 0 0 0 0 0 0 0 0 0 0 0.00
## 6 0 0 0 0 0 0 0 0 0 0 0 0.00
## X0.29 X0.30 X0.31 X0.33 X0.34 X0.35 X0.36 X0.37 X0.38 X0.39 X0.40 X0.41
## 1 0 0 0.00 0 0 0.00 0 0.00 0.00 0 0 0.00
## 2 0 0 0.06 0 0 0.12 0 0.06 0.06 0 0 0.01
## 3 0 0 0.00 0 0 0.00 0 0.00 0.00 0 0 0.00
## 4 0 0 0.00 0 0 0.00 0 0.00 0.00 0 0 0.00
## 5 0 0 0.00 0 0 0.00 0 0.00 0.00 0 0 0.00
## 6 0 0 0.00 0 0 0.00 0 0.00 0.00 0 0 0.00
## X0.42 X0.43 X0.778 X0.44 X0.45 X3.756 X61 X278 X1
## 1 0.132 0 0.372 0.180 0.048 5.114 101 1028 1
## 2 0.143 0 0.276 0.184 0.010 9.821 485 2259 1
## 3 0.137 0 0.137 0.000 0.000 3.537 40 191 1
## 4 0.135 0 0.135 0.000 0.000 3.537 40 191 1
## 5 0.223 0 0.000 0.000 0.000 3.000 15 54 1
## 6 0.054 0 0.164 0.054 0.000 1.671 4 112 1
##x1 is spam variable 1 is yes
sp <- tree(X1~., data = spam)
sp
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 4600 1098.00 0.39390
## 2) X0.44 < 0.0555 3470 623.60 0.23490
## 4) X0.3 < 0.055 3140 430.50 0.16400
## 8) X0.778 < 0.378 2737 247.40 0.10050
## 16) X0.32.1 < 0.2 2507 168.80 0.07260
## 32) X0.15 < 0.01 2439 135.50 0.05904 *
## 33) X0.15 > 0.01 68 16.76 0.55880 *
## 17) X0.32.1 > 0.2 230 55.40 0.40430 *
## 9) X0.778 > 0.378 403 97.07 0.59550
## 18) X278 < 55.5 182 37.14 0.28570 *
## 19) X278 > 55.5 221 28.07 0.85070 *
## 5) X0.3 > 0.055 330 27.27 0.90910
## 10) X0.18 < 0.14 317 16.09 0.94640 *
## 11) X0.18 > 0.14 13 0.00 0.00000 *
## 3) X0.44 > 0.0555 1130 117.30 0.88230
## 6) X0.16 < 0.4 1060 65.38 0.93400
## 12) X0.38 < 0.49 1045 52.11 0.94740 *
## 13) X0.38 > 0.49 15 0.00 0.00000 *
## 7) X0.16 > 0.4 70 6.30 0.10000 *
cvsp <- cv.tree(sp)
plot(cvsp)

spcut <- prune.tree(sp,best = 9)
spcut
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 4600 1098.00 0.39390
## 2) X0.44 < 0.0555 3470 623.60 0.23490
## 4) X0.3 < 0.055 3140 430.50 0.16400
## 8) X0.778 < 0.378 2737 247.40 0.10050
## 16) X0.32.1 < 0.2 2507 168.80 0.07260
## 32) X0.15 < 0.01 2439 135.50 0.05904 *
## 33) X0.15 > 0.01 68 16.76 0.55880 *
## 17) X0.32.1 > 0.2 230 55.40 0.40430 *
## 9) X0.778 > 0.378 403 97.07 0.59550
## 18) X278 < 55.5 182 37.14 0.28570 *
## 19) X278 > 55.5 221 28.07 0.85070 *
## 5) X0.3 > 0.055 330 27.27 0.90910 *
## 3) X0.44 > 0.0555 1130 117.30 0.88230
## 6) X0.16 < 0.4 1060 65.38 0.93400
## 12) X0.38 < 0.49 1045 52.11 0.94740 *
## 13) X0.38 > 0.49 15 0.00 0.00000 *
## 7) X0.16 > 0.4 70 6.30 0.10000 *
plot(spcut)
text(spcut)
