#A wrapper for libcurl, provides functions of general HTTP requests
library(RCurl)
## Loading required package: bitops
#Lets fetch a URL
mushrooms = read.csv(text=getURL('https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data'), header = FALSE, sep = ',')
Attribute (or column) information is found on the website. First, they classify the mushrooms as either poisonous, edible, or eat at your own risk (Also poisonous.) Then assign 22 variables (columns) to each case. Meaning in total there will be 23 columns starting with whether or not the mushroom is edible.
summary(mushrooms)
## V1 V2 V3 V4 V5 V6
## e:4208 b: 452 f:2320 n :2284 f:4748 n :3528
## p:3916 c: 4 g: 4 g :1840 t:3376 f :2160
## f:3152 s:2556 e :1500 s : 576
## k: 828 y:3244 y :1072 y : 576
## s: 32 w :1040 a : 400
## x:3656 b : 168 l : 400
## (Other): 220 (Other): 484
## V7 V8 V9 V10 V11 V12 V13
## a: 210 c:6812 b:5612 b :1728 e:3516 ?:2480 f: 552
## f:7914 w:1312 n:2512 p :1492 t:4608 b:3776 k:2372
## w :1202 c: 556 s:5176
## n :1048 e:1120 y: 24
## g : 752 r: 192
## h : 732
## (Other):1170
## V14 V15 V16 V17 V18 V19
## f: 600 w :4464 w :4384 p:8124 n: 96 n: 36
## k:2304 p :1872 p :1872 o: 96 o:7488
## s:4936 g : 576 g : 576 w:7924 t: 600
## y: 284 n : 448 n : 512 y: 8
## b : 432 b : 432
## o : 192 o : 192
## (Other): 140 (Other): 156
## V20 V21 V22 V23
## e:2776 w :2388 a: 384 d:3148
## f: 48 n :1968 c: 340 g:2148
## l:1296 k :1872 n: 400 l: 832
## n: 36 h :1632 s:1248 m: 292
## p:3968 r : 72 v:4040 p:1144
## b : 48 y:1712 u: 368
## (Other): 144 w: 192
We can see V1 has either E or P…I’m guessing edible or poisonous….V2 has BCFKSX…so does attribute 1 on the website. So I’m going to use their list provided with an offset of -1 per attribute adjusting for E/P column.
names(mushrooms) = c('edible','capshape','capsurface','capcolor','bruises','odor','gillattach','gillspace','gillsize','gillcolor','stalkshape','stalkroot','stalksurfaceabovering','stalksurfacebelowring','stalkcolorabovering','stalkcolorbelowring','veiltype','veilcolor','ringnumber','ringtype','sporeprintcolor','population','habitat')
myMushrooms = subset(mushrooms,population=='y',c(edible,veilcolor,veiltype,bruises))
myMushrooms$edible = as.character(myMushrooms$edible)
myMushrooms$veilcolor = as.character(myMushrooms$veilcolor)
myMushrooms$veiltype = as.character((myMushrooms$veiltype))
myMushrooms$bruises = as.character(myMushrooms$bruises)
myMushrooms$edible[myMushrooms$edible=='e'] = 'edible'
myMushrooms$edible[myMushrooms$edible=='p'] = 'poisonous'
myMushrooms$veilcolor[myMushrooms$veilcolor=='n'] = 'brown'
myMushrooms$veilcolor[myMushrooms$veilcolor== 'o'] = 'orange'
myMushrooms$veilcolor[myMushrooms$veilcolor=='w'] = 'white'
myMushrooms$veilcolor[myMushrooms$veilcolor=='y'] = 'yellow'
myMushrooms$veiltype[myMushrooms$veiltype=='p'] = 'partial'
myMushrooms$veiltype[myMushrooms$veiltype=='u'] = 'universal'
myMushrooms$bruises[myMushrooms$bruises=='t'] = 'true'
myMushrooms$bruises[myMushrooms$bruises=='f'] = 'false'
myMushrooms$edible = as.factor(myMushrooms$edible)
myMushrooms$veilcolor = as.factor(myMushrooms$veilcolor)
myMushrooms$veiltype = as.factor((myMushrooms$veiltype))
myMushrooms$bruises = as.factor(myMushrooms$bruises)
head(myMushrooms)
## edible veilcolor veiltype bruises
## 16 edible white partial false
## 29 edible white partial false
## 34 edible white partial true
## 42 edible white partial true
## 43 edible white partial false
## 60 edible white partial true
tail(myMushrooms)
## edible veilcolor veiltype bruises
## 7941 edible white partial true
## 7966 edible white partial true
## 7985 edible white partial false
## 7987 edible white partial false
## 8002 edible white partial false
## 8039 edible white partial true
dim(myMushrooms)
## [1] 1712 4