DEATH PENALTY ANALYSIS

deathpenalty <- read.csv("./DeathPenalty.csv")

dp <- tree(Death~.,data = deathpenalty)
dp
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 362 49.380 0.1630  
##    2) Agg < 3.5 307 13.360 0.0456  
##      4) Agg < 2.5 283  5.873 0.0212 *
##      5) Agg > 2.5 24  5.333 0.3333 *
##    3) Agg > 3.5 55  8.182 0.8182  
##      6) VRace < 0.5 17  4.118 0.5882  
##       12) Agg < 4.5 6  1.333 0.3333 *
##       13) Agg > 4.5 11  2.182 0.7273 *
##      7) VRace > 0.5 38  2.763 0.9211  
##       14) Agg < 4.5 12  2.250 0.7500 *
##       15) Agg > 4.5 26  0.000 1.0000 *
cvdp <- cv.tree(dp)
plot(cvdp)

## 3 is the best number for the branches
dpcut <- prune.tree(dp,best =3)
dpcut
## node), split, n, deviance, yval
##       * denotes terminal node
## 
## 1) root 362 49.380 0.1630  
##   2) Agg < 3.5 307 13.360 0.0456  
##     4) Agg < 2.5 283  5.873 0.0212 *
##     5) Agg > 2.5 24  5.333 0.3333 *
##   3) Agg > 3.5 55  8.182 0.8182 *
plot(dpcut)
text(dpcut)

HOUSING PRICE ANALYSIS

house<-read.csv("./HousePrices.csv")
house = house[-1] ## Eliminating house ID


##Making dummmy variables in order to eliminate categrical variables
a1 = rep(1,length(house$Neighborhood))
a2 = rep(0,length(house$Neighborhood))

house$BrickYes = ifelse(house$Brick == "Yes",a1,a2)
house$NeighborhoodEast = ifelse(house$Neighborhood == "East",a1,a2)
house$NeighborhoodNorth = ifelse(house$Neighborhood == "North",a1,a2)

house = house[-6:-7]

hp <- tree(Price~.,data = house)
hp
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 128 9.169e+10 130400  
##    2) NeighborhoodNorth < 0.5 84 5.316e+10 141000  
##      4) NeighborhoodEast < 0.5 39 1.487e+10 159300  
##        8) BrickYes < 0.5 23 4.024e+09 148200  
##         16) SqFt < 2010 9 3.002e+08 137000 *
##         17) SqFt > 2010 14 1.844e+09 155500 *
##        9) BrickYes > 0.5 16 3.983e+09 175200  
##         18) Bedrooms < 3.5 8 4.316e+08 164100 *
##         19) Bedrooms > 3.5 8 1.580e+09 186300 *
##      5) NeighborhoodEast > 0.5 45 1.404e+10 125200  
##       10) SqFt < 2035 25 4.970e+09 115600  
##         20) BrickYes < 0.5 15 1.648e+09 108100 *
##         21) BrickYes > 0.5 10 1.234e+09 126800 *
##       11) SqFt > 2035 20 3.831e+09 137300  
##         22) BrickYes < 0.5 11 2.498e+09 130900 *
##         23) BrickYes > 0.5 9 3.284e+08 145100 *
##    3) NeighborhoodNorth > 0.5 44 1.097e+10 110200  
##      6) SqFt < 1695 7 1.310e+09  93130 *
##      7) SqFt > 1695 37 7.249e+09 113400  
##       14) SqFt < 2145 32 5.299e+09 111000  
##         28) Offers < 3.5 22 2.942e+09 115700  
##           56) SqFt < 1910 10 5.153e+08 108100 *
##           57) SqFt > 1910 12 1.366e+09 122000 *
##         29) Offers > 3.5 10 7.866e+08 100600 *
##       15) SqFt > 2145 5 5.809e+08 128800 *
cvhp <- cv.tree(hp)
plot(cvhp, pch=21, bg=8, type="p", cex=1.5)

##The best number of branches is 11
hpcut <- prune.tree(hp,best =11)
hpcut
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 128 9.169e+10 130400  
##    2) NeighborhoodNorth < 0.5 84 5.316e+10 141000  
##      4) NeighborhoodEast < 0.5 39 1.487e+10 159300  
##        8) BrickYes < 0.5 23 4.024e+09 148200  
##         16) SqFt < 2010 9 3.002e+08 137000 *
##         17) SqFt > 2010 14 1.844e+09 155500 *
##        9) BrickYes > 0.5 16 3.983e+09 175200  
##         18) Bedrooms < 3.5 8 4.316e+08 164100 *
##         19) Bedrooms > 3.5 8 1.580e+09 186300 *
##      5) NeighborhoodEast > 0.5 45 1.404e+10 125200  
##       10) SqFt < 2035 25 4.970e+09 115600  
##         20) BrickYes < 0.5 15 1.648e+09 108100 *
##         21) BrickYes > 0.5 10 1.234e+09 126800 *
##       11) SqFt > 2035 20 3.831e+09 137300 *
##    3) NeighborhoodNorth > 0.5 44 1.097e+10 110200  
##      6) SqFt < 1695 7 1.310e+09  93130 *
##      7) SqFt > 1695 37 7.249e+09 113400  
##       14) SqFt < 2145 32 5.299e+09 111000  
##         28) Offers < 3.5 22 2.942e+09 115700 *
##         29) Offers > 3.5 10 7.866e+08 100600 *
##       15) SqFt > 2145 5 5.809e+08 128800 *
plot(hpcut,col=10)
text(hpcut,cex=.8)

download.file("http://archive.ics.uci.edu/ml/machine-learning-databases/spambase/spambase.data","spambase.data")
spam <- read.csv("./spambase.data")
head(spam)
##     X0 X0.64 X0.64.1 X0.1 X0.32 X0.2 X0.3 X0.4 X0.5 X0.6 X0.7 X0.64.2 X0.8
## 1 0.21  0.28    0.50    0  0.14 0.28 0.21 0.07 0.00 0.94 0.21    0.79 0.65
## 2 0.06  0.00    0.71    0  1.23 0.19 0.19 0.12 0.64 0.25 0.38    0.45 0.12
## 3 0.00  0.00    0.00    0  0.63 0.00 0.31 0.63 0.31 0.63 0.31    0.31 0.31
## 4 0.00  0.00    0.00    0  0.63 0.00 0.31 0.63 0.31 0.63 0.31    0.31 0.31
## 5 0.00  0.00    0.00    0  1.85 0.00 0.00 1.85 0.00 0.00 0.00    0.00 0.00
## 6 0.00  0.00    0.00    0  1.92 0.00 0.00 0.00 0.00 0.64 0.96    1.28 0.00
##   X0.9 X0.10 X0.32.1 X0.11 X1.29 X1.93 X0.12 X0.96 X0.13 X0.14 X0.15 X0.16
## 1 0.21  0.14    0.14  0.07  0.28  3.47  0.00  1.59     0  0.43  0.43     0
## 2 0.00  1.75    0.06  0.06  1.03  1.36  0.32  0.51     0  1.16  0.06     0
## 3 0.00  0.00    0.31  0.00  0.00  3.18  0.00  0.31     0  0.00  0.00     0
## 4 0.00  0.00    0.31  0.00  0.00  3.18  0.00  0.31     0  0.00  0.00     0
## 5 0.00  0.00    0.00  0.00  0.00  0.00  0.00  0.00     0  0.00  0.00     0
## 6 0.00  0.00    0.96  0.00  0.32  3.85  0.00  0.64     0  0.00  0.00     0
##   X0.17 X0.18 X0.19 X0.20 X0.21 X0.22 X0.23 X0.24 X0.25 X0.26 X0.27 X0.28
## 1     0     0     0     0     0     0     0     0     0     0     0  0.07
## 2     0     0     0     0     0     0     0     0     0     0     0  0.00
## 3     0     0     0     0     0     0     0     0     0     0     0  0.00
## 4     0     0     0     0     0     0     0     0     0     0     0  0.00
## 5     0     0     0     0     0     0     0     0     0     0     0  0.00
## 6     0     0     0     0     0     0     0     0     0     0     0  0.00
##   X0.29 X0.30 X0.31 X0.33 X0.34 X0.35 X0.36 X0.37 X0.38 X0.39 X0.40 X0.41
## 1     0     0  0.00     0     0  0.00     0  0.00  0.00     0     0  0.00
## 2     0     0  0.06     0     0  0.12     0  0.06  0.06     0     0  0.01
## 3     0     0  0.00     0     0  0.00     0  0.00  0.00     0     0  0.00
## 4     0     0  0.00     0     0  0.00     0  0.00  0.00     0     0  0.00
## 5     0     0  0.00     0     0  0.00     0  0.00  0.00     0     0  0.00
## 6     0     0  0.00     0     0  0.00     0  0.00  0.00     0     0  0.00
##   X0.42 X0.43 X0.778 X0.44 X0.45 X3.756 X61 X278 X1
## 1 0.132     0  0.372 0.180 0.048  5.114 101 1028  1
## 2 0.143     0  0.276 0.184 0.010  9.821 485 2259  1
## 3 0.137     0  0.137 0.000 0.000  3.537  40  191  1
## 4 0.135     0  0.135 0.000 0.000  3.537  40  191  1
## 5 0.223     0  0.000 0.000 0.000  3.000  15   54  1
## 6 0.054     0  0.164 0.054 0.000  1.671   4  112  1
##x1 is spam variable 1 is yes

sp <- tree(X1~., data = spam)
sp
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 4600 1098.00 0.39390  
##    2) X0.44 < 0.0555 3470  623.60 0.23490  
##      4) X0.3 < 0.055 3140  430.50 0.16400  
##        8) X0.778 < 0.378 2737  247.40 0.10050  
##         16) X0.32.1 < 0.2 2507  168.80 0.07260  
##           32) X0.15 < 0.01 2439  135.50 0.05904 *
##           33) X0.15 > 0.01 68   16.76 0.55880 *
##         17) X0.32.1 > 0.2 230   55.40 0.40430 *
##        9) X0.778 > 0.378 403   97.07 0.59550  
##         18) X278 < 55.5 182   37.14 0.28570 *
##         19) X278 > 55.5 221   28.07 0.85070 *
##      5) X0.3 > 0.055 330   27.27 0.90910  
##       10) X0.18 < 0.14 317   16.09 0.94640 *
##       11) X0.18 > 0.14 13    0.00 0.00000 *
##    3) X0.44 > 0.0555 1130  117.30 0.88230  
##      6) X0.16 < 0.4 1060   65.38 0.93400  
##       12) X0.38 < 0.49 1045   52.11 0.94740 *
##       13) X0.38 > 0.49 15    0.00 0.00000 *
##      7) X0.16 > 0.4 70    6.30 0.10000 *
cvsp <- cv.tree(sp)
plot(cvsp)

spcut <- prune.tree(sp,best = 9)
spcut
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 4600 1098.00 0.39390  
##    2) X0.44 < 0.0555 3470  623.60 0.23490  
##      4) X0.3 < 0.055 3140  430.50 0.16400  
##        8) X0.778 < 0.378 2737  247.40 0.10050  
##         16) X0.32.1 < 0.2 2507  168.80 0.07260  
##           32) X0.15 < 0.01 2439  135.50 0.05904 *
##           33) X0.15 > 0.01 68   16.76 0.55880 *
##         17) X0.32.1 > 0.2 230   55.40 0.40430 *
##        9) X0.778 > 0.378 403   97.07 0.59550  
##         18) X278 < 55.5 182   37.14 0.28570 *
##         19) X278 > 55.5 221   28.07 0.85070 *
##      5) X0.3 > 0.055 330   27.27 0.90910 *
##    3) X0.44 > 0.0555 1130  117.30 0.88230  
##      6) X0.16 < 0.4 1060   65.38 0.93400  
##       12) X0.38 < 0.49 1045   52.11 0.94740 *
##       13) X0.38 > 0.49 15    0.00 0.00000 *
##      7) X0.16 > 0.4 70    6.30 0.10000 *
plot(spcut)
text(spcut)