mushroom <- read.csv(url("http://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"), header = FALSE, sep = ",")
str(mushroom)
'data.frame': 8124 obs. of 23 variables:
$ V1 : Factor w/ 2 levels "e","p": 2 1 1 2 1 1 1 1 2 1 ...
$ V2 : Factor w/ 6 levels "b","c","f","k",..: 6 6 1 6 6 6 1 1 6 1 ...
$ V3 : Factor w/ 4 levels "f","g","s","y": 3 3 3 4 3 4 3 4 4 3 ...
$ V4 : Factor w/ 10 levels "b","c","e","g",..: 5 10 9 9 4 10 9 9 9 10 ...
$ V5 : Factor w/ 2 levels "f","t": 2 2 2 2 1 2 2 2 2 2 ...
$ V6 : Factor w/ 9 levels "a","c","f","l",..: 7 1 4 7 6 1 1 4 7 1 ...
$ V7 : Factor w/ 2 levels "a","f": 2 2 2 2 2 2 2 2 2 2 ...
$ V8 : Factor w/ 2 levels "c","w": 1 1 1 1 2 1 1 1 1 1 ...
$ V9 : Factor w/ 2 levels "b","n": 2 1 1 2 1 1 1 1 2 1 ...
$ V10: Factor w/ 12 levels "b","e","g","h",..: 5 5 6 6 5 6 3 6 8 3 ...
$ V11: Factor w/ 2 levels "e","t": 1 1 1 1 2 1 1 1 1 1 ...
$ V12: Factor w/ 5 levels "?","b","c","e",..: 4 3 3 4 4 3 3 3 4 3 ...
$ V13: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
$ V14: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
$ V15: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
$ V16: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
$ V17: Factor w/ 1 level "p": 1 1 1 1 1 1 1 1 1 1 ...
$ V18: Factor w/ 4 levels "n","o","w","y": 3 3 3 3 3 3 3 3 3 3 ...
$ V19: Factor w/ 3 levels "n","o","t": 2 2 2 2 2 2 2 2 2 2 ...
$ V20: Factor w/ 5 levels "e","f","l","n",..: 5 5 5 5 1 5 5 5 5 5 ...
$ V21: Factor w/ 9 levels "b","h","k","n",..: 3 4 4 3 4 3 3 4 3 3 ...
$ V22: Factor w/ 6 levels "a","c","n","s",..: 4 3 3 4 1 3 3 4 5 4 ...
$ V23: Factor w/ 7 levels "d","g","l","m",..: 6 2 4 6 2 2 4 4 2 4 ...
colnames(mushroom) <- c("edibility", "cap_shape", "cap_surface", "cap_color", "bruises", "odor", "grill_attachment", "grill_spacing", "grill_size", "grill_color", "stalk_shape", "stalk_root", "stalk_surface_above_ring", "stalk_surface_below_ring", "stalk_color_above_ring", "stalk_color_below_ring", "veil_type", "veil_color", "ring_number", "ring_type", "spore_print_color", "population", "habitat")
head(mushroom)
sum(is.na(mushroom))
[1] 0
mushroom <- subset(mushroom, select = -veil_type)
mushroom$cap_shape <- as.numeric(mushroom$cap_shape)
mushroom$cap_surface <- as.numeric(mushroom$cap_surface)
mushroom$cap_color <- as.numeric(mushroom$cap_color)
mushroom$bruises <- as.numeric(mushroom$bruises)
mushroom$odor <- as.numeric(mushroom$odor)
mushroom$grill_attachment <- as.numeric(mushroom$grill_attachment)
mushroom$grill_spacing <- as.numeric(mushroom$grill_spacing)
mushroom$grill_size <- as.numeric(mushroom$grill_size)
mushroom$grill_color <- as.numeric(mushroom$grill_color)
mushroom$stalk_shape <- as.numeric(mushroom$stalk_shape)
mushroom$stalk_root <- as.numeric(mushroom$stalk_root)
mushroom$stalk_surface_above_ring <- as.numeric(mushroom$cap_shape)
mushroom$stalk_surface_below_ring <- as.numeric(mushroom$cap_shape)
mushroom$stalk_color_above_ring <- as.numeric(mushroom$stalk_color_above_ring)
mushroom$stalk_color_below_ring <- as.numeric(mushroom$stalk_color_below_ring)
mushroom$veil_color <- as.numeric(mushroom$veil_color)
mushroom$ring_number <- as.numeric(mushroom$ring_number)
mushroom$ring_type <- as.numeric(mushroom$ring_type)
mushroom$spore_print_color <- as.numeric(mushroom$spore_print_color)
mushroom$population <- as.numeric(mushroom$population)
mushroom$habitat <- as.numeric(mushroom$habitat)
sauce <- as.numeric(mushroom$edibility)
hist(sauce)
table(sauce)
sauce
1 2
4208 3916
4208/8124
[1] 0.5179714426
3916/8124
[1] 0.4820285574
.7 * 8124
[1] 5686.8
.3 * 8124
[1] 2437.2
s <- sample(8124, 5687)
mush_train <- mushroom[s, ]
mush_test <- mushroom[-s, ]
dim(mush_train)
[1] 5687 22
dim(mush_test)
[1] 2437 22
install.packages("e1071")
Error in install.packages : Updating loaded packages
str(mush_train)
'data.frame': 5687 obs. of 22 variables:
$ edibility : Factor w/ 2 levels "e","p": 1 1 2 1 1 2 1 2 1 1 ...
$ cap_shape : num 6 3 4 1 3 4 4 3 3 6 ...
$ cap_surface : num 4 3 4 1 1 4 1 1 1 1 ...
$ cap_color : num 3 4 3 9 4 3 9 10 5 4 ...
$ bruises : num 2 1 1 1 2 1 1 1 1 1 ...
$ odor : num 6 6 9 6 6 9 6 3 6 6 ...
$ grill_attachment : num 2 2 2 2 2 2 2 2 2 2 ...
$ grill_spacing : num 1 2 1 2 1 1 2 1 2 2 ...
$ grill_size : num 1 1 2 1 1 2 1 1 2 1 ...
$ grill_color : num 2 8 1 8 8 1 3 3 11 11 ...
$ stalk_shape : num 1 2 2 1 2 2 1 1 1 1 ...
$ stalk_root : num 1 4 1 1 2 1 1 2 2 1 ...
$ stalk_surface_above_ring: num 6 3 4 1 3 4 4 3 3 6 ...
$ stalk_surface_below_ring: num 6 3 4 1 3 4 4 3 3 6 ...
$ stalk_color_above_ring : num 3 8 7 8 7 7 8 1 8 8 ...
$ stalk_color_below_ring : num 8 8 7 8 4 7 8 5 5 8 ...
$ veil_color : num 3 3 3 3 3 3 3 3 3 3 ...
$ ring_number : num 3 2 2 3 2 2 3 2 2 3 ...
$ ring_type : num 1 1 1 5 5 1 5 3 1 5 ...
$ spore_print_color : num 8 3 8 8 4 8 8 2 8 8 ...
$ population : num 2 1 5 4 6 5 4 5 5 3 ...
$ habitat : num 7 2 5 2 1 3 2 5 3 2 ...
library(e1071)
svm_model <- svm(edibility ~ ., data = mush_train, kernel = 'linear', cost = 1, scale = FALSE)
print(svm_model)
Call:
svm(formula = edibility ~ ., data = mush_train, kernel = "linear",
cost = 1, scale = FALSE)
Parameters:
SVM-Type: C-classification
SVM-Kernel: linear
cost: 1
gamma: 0.04761905
Number of Support Vectors: 994
library(gamlss)
plot(svm_model)
Error in ncol(data) : argument "data" is missing, with no default
svm.pred = predict(svm_model, mush_test[, !names(mush_test) %in% c("edibility")])
svm.table = table(svm.pred, mush_test$edibility)
svm.table
svm.pred e p
e 1193 39
p 52 1153
1193 + 1153
[1] 2346
1193 + 1153 + 52 + 39
[1] 2437
2346/2437
[1] 0.962659
svm2 <- svm(edibility ~ ., data = mush_train, kernel = 'linear', cost = 100, scale = FALSE)
svm.pred2 = predict(svm2, mush_test[, !names(mush_test) %in% c("edibility")])
svm.table2 = table(svm.pred2, mush_test$edibility)
svm.table2
svm.pred2 e p
e 1198 11
p 47 1181
(1198+1181)/2437
[1] 0.9762002
svm3 <- svm(edibility ~ ., data = mush_train, kernel = 'radial', cost = 100, scale = FALSE)
svm.pred3 = predict(svm3, mush_test[, !names(mush_test) %in% c("edibility")])
svm.table3 = table(svm.pred3, mush_test$edibility)
svm.table3
svm.pred3 e p
e 1245 0
p 0 1192
Nueral Network
mushroom <- read.csv(url("http://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"), header = FALSE, sep = ",")
colnames(mushroom) <- c("edibility", "cap_shape", "cap_surface", "cap_color", "bruises", "odor", "grill_attachment", "grill_spacing", "grill_size", "grill_color", "stalk_shape", "stalk_root", "stalk_surface_above_ring", "stalk_surface_below_ring", "stalk_color_above_ring", "stalk_color_below_ring", "veil_type", "veil_color", "ring_number", "ring_type", "spore_print_color", "population", "habitat")
mushroom <- subset(mushroom, select = -veil_type)
mushroom <- subset(mushroom, select = -stalk_root)
library(caret)
swag <- createDataPartition(mushroom$edibility, p = .7, list = FALSE)
dummy <- subset(mushroom, select = -edibility)
shroomDummy <- dummyVars(~., data = dummy, sep = ".")
shroomDummy <- data.frame(predict(shroomDummy, dummy))
ncol(shroomDummy)
[1] 111
shroomDummy$edibility <- mushroom$edibility
ncol(shroomDummy)
[1] 112
train <- shroomDummy[swag,]
test <- shroomDummy[-swag,]
testLabels <- subset(test, select = edibility)
testset <- subset(test, select = -edibility)
library(nnet)
?nnet
net <- nnet(edibility ~ ., data = train, size = 2, rang = 0.1, maxit = 200)
# weights: 227
initial value 3936.828655
iter 10 value 447.540701
iter 20 value 376.617989
iter 30 value 370.964845
iter 40 value 357.164663
iter 50 value 328.752483
iter 60 value 321.439439
iter 70 value 321.430150
iter 80 value 321.429931
final value 321.429921
converged
summary(net)
a 111-2-1 network with 227 weights
options were - entropy fitting
b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1
-3.87 6.20 -0.10 0.24 -4.80 2.35
i6->h1 i7->h1 i8->h1 i9->h1 i10->h1 i11->h1
-7.81 9.92 -0.07 -3.42 -10.50 -0.81
i12->h1 i13->h1 i14->h1 i15->h1 i16->h1 i17->h1
0.11 -5.39 1.99 2.12 -2.06 -0.06
i18->h1 i19->h1 i20->h1 i21->h1 i22->h1 i23->h1
-0.08 0.50 -0.19 -27.44 23.37 7.33
i24->h1 i25->h1 i26->h1 i27->h1 i28->h1 i29->h1
-14.42 -29.08 7.04 -0.52 49.79 -8.08
i30->h1 i31->h1 i32->h1 i33->h1 i34->h1 i35->h1
-7.79 -8.17 1.95 -5.93 -13.44 9.54
i36->h1 i37->h1 i38->h1 i39->h1 i40->h1 i41->h1
27.12 -30.98 -24.24 1.22 0.31 -4.70
i42->h1 i43->h1 i44->h1 i45->h1 i46->h1 i47->h1
3.99 6.66 0.66 3.98 -0.30 -0.32
i48->h1 i49->h1 i50->h1 i51->h1 i52->h1 i53->h1
8.40 0.33 -10.15 6.43 3.08 -24.88
i54->h1 i55->h1 i56->h1 i57->h1 i58->h1 i59->h1
17.56 0.26 2.86 -23.90 15.40 1.73
i60->h1 i61->h1 i62->h1 i63->h1 i64->h1 i65->h1
-5.62 -0.47 1.16 6.48 -5.23 2.22
i66->h1 i67->h1 i68->h1 i69->h1 i70->h1 i71->h1
-11.75 9.54 -0.01 -5.77 -0.52 1.09
i72->h1 i73->h1 i74->h1 i75->h1 i76->h1 i77->h1
6.69 -5.28 2.25 -11.59 9.64 -0.34
i78->h1 i79->h1 i80->h1 i81->h1 i82->h1 i83->h1
1.07 1.07 -6.07 -0.14 -0.43 -12.74
i84->h1 i85->h1 i86->h1 i87->h1 i88->h1 i89->h1
9.33 -12.97 -0.05 -17.11 -0.43 26.77
i90->h1 i91->h1 i92->h1 i93->h1 i94->h1 i95->h1
0.47 -21.04 13.85 14.29 0.50 -0.90
i96->h1 i97->h1 i98->h1 i99->h1 i100->h1 i101->h1
3.34 -15.03 0.47 4.74 2.74 7.08
i102->h1 i103->h1 i104->h1 i105->h1 i106->h1 i107->h1
-6.55 -20.80 8.83 -3.99 7.84 -6.10
i108->h1 i109->h1 i110->h1 i111->h1
2.44 -11.80 5.21 2.32
b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2
-0.35 3.28 -0.04 -2.14 0.90 0.53
i6->h2 i7->h2 i8->h2 i9->h2 i10->h2 i11->h2
-2.76 6.05 -0.04 -6.80 0.47 -3.69
i12->h2 i13->h2 i14->h2 i15->h2 i16->h2 i17->h2
5.95 -3.50 -12.15 4.41 -2.08 2.84
i18->h2 i19->h2 i20->h2 i21->h2 i22->h2 i23->h2
1.86 1.33 4.61 -8.83 8.50 15.83
i24->h2 i25->h2 i26->h2 i27->h2 i28->h2 i29->h2
-19.14 -27.71 7.85 -1.37 52.14 -17.40
i30->h2 i31->h2 i32->h2 i33->h2 i34->h2 i35->h2
-5.09 -5.34 0.00 -0.31 -12.81 12.50
i36->h2 i37->h2 i38->h2 i39->h2 i40->h2 i41->h2
15.66 -15.94 -15.71 0.54 5.93 -1.82
i42->h2 i43->h2 i44->h2 i45->h2 i46->h2 i47->h2
1.38 0.00 0.20 -5.27 -0.21 0.65
i48->h2 i49->h2 i50->h2 i51->h2 i52->h2 i53->h2
13.93 0.20 2.60 -2.93 8.85 -20.15
i54->h2 i55->h2 i56->h2 i57->h2 i58->h2 i59->h2
11.42 -0.38 16.32 -18.45 1.69 0.19
i60->h2 i61->h2 i62->h2 i63->h2 i64->h2 i65->h2
-3.77 -1.23 0.34 4.31 -3.95 1.05
i66->h2 i67->h2 i68->h2 i69->h2 i70->h2 i71->h2
-7.44 10.75 -0.14 -3.92 -1.37 0.31
i72->h2 i73->h2 i74->h2 i75->h2 i76->h2 i77->h2
4.39 10.86 0.87 -7.62 -3.70 -0.42
i78->h2 i79->h2 i80->h2 i81->h2 i82->h2 i83->h2
0.58 0.54 -1.13 -0.05 -1.24 -1.25
i84->h2 i85->h2 i86->h2 i87->h2 i88->h2 i89->h2
2.33 5.09 8.68 -11.53 -1.26 -1.28
i90->h2 i91->h2 i92->h2 i93->h2 i94->h2 i95->h2
0.30 -13.84 3.77 5.36 0.10 -0.69
i96->h2 i97->h2 i98->h2 i99->h2 i100->h2 i101->h2
4.18 0.32 0.10 2.64 0.07 3.65
i102->h2 i103->h2 i104->h2 i105->h2 i106->h2 i107->h2
-9.87 -4.71 7.64 -3.06 0.25 10.51
i108->h2 i109->h2 i110->h2 i111->h2
2.81 -7.44 -4.28 0.94
b->o h1->o h2->o
23.79 -2.96 -24.80
shroom.predict <- predict(net, testset, type = "class")
net.table <- table(test$edibility, shroom.predict)
net.table
shroom.predict
e p
e 1262 0
p 42 1132
confusionMatrix(net.table)
Confusion Matrix and Statistics
shroom.predict
e p
e 1262 0
p 42 1132
Accuracy : 0.9827586
95% CI : (0.9767655, 0.9875464)
No Information Rate : 0.5353038
P-Value [Acc > NIR] : < 0.00000000000000022204
Kappa : 0.9654291
Mcnemar's Test P-Value : 0.0000000002508861
Sensitivity : 0.9677914
Specificity : 1.0000000
Pos Pred Value : 1.0000000
Neg Pred Value : 0.9642249
Prevalence : 0.5353038
Detection Rate : 0.5180624
Detection Prevalence : 0.5180624
Balanced Accuracy : 0.9838957
'Positive' Class : e
install.packages("gamlss.add")
also installing the dependencies ‘gamlss.data’, ‘gamlss.dist’, ‘gamlss’
trying URL 'https://cran.rstudio.com/bin/macosx/mavericks/contrib/3.3/gamlss.data_5.0-0.tgz'
Content type 'application/x-gzip' length 1119780 bytes (1.1 MB)
==================================================
downloaded 1.1 MB
trying URL 'https://cran.rstudio.com/bin/macosx/mavericks/contrib/3.3/gamlss.dist_5.0-2.tgz'
Content type 'application/x-gzip' length 1130249 bytes (1.1 MB)
==================================================
downloaded 1.1 MB
trying URL 'https://cran.rstudio.com/bin/macosx/mavericks/contrib/3.3/gamlss_5.0-2.tgz'
Content type 'application/x-gzip' length 3158521 bytes (3.0 MB)
==================================================
downloaded 3.0 MB
trying URL 'https://cran.rstudio.com/bin/macosx/mavericks/contrib/3.3/gamlss.add_5.0-1.tgz'
Content type 'application/x-gzip' length 105656 bytes (103 KB)
==================================================
downloaded 103 KB
The downloaded binary packages are in
/var/folders/6b/yb20hcv16nz__qyd0fcg7b9h0000gn/T//RtmpfJyCaE/downloaded_packages
library(gamlss.add)
Loading required package: gamlss.dist
package ‘gamlss.dist’ was built under R version 3.3.2Loading required package: MASS
package ‘MASS’ was built under R version 3.3.2Loading required package: gamlss
package ‘gamlss’ was built under R version 3.3.2Loading required package: splines
Loading required package: gamlss.data
Loading required package: nlme
Loading required package: parallel
********** GAMLSS Version 5.0-2 **********
For more on GAMLSS look at http://www.gamlss.org/
Type gamlssNews() to see new features/changes/bug fixes.
Attaching package: ‘gamlss’
The following object is masked from ‘package:caret’:
calibration
Loading required package: mgcv
This is mgcv 1.8-12. For overview type 'help("mgcv-package")'.
Attaching package: ‘mgcv’
The following object is masked from ‘package:nnet’:
multinom
Loading required package: rpart
package ‘rpart’ was built under R version 3.3.2
plot(net, .3)
plot(svm_model)
Error in plot(svm_model) : object 'svm_model' not found