The following code retreives the UCI Mushroom Data Set and creates a data.frame, identifying the edibility of a mushroom by its color variants, with column names and full identifiers.
#retrieve and load data into a data.frame
theUrl <- "http://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
mushrooms <- read.table(file = theUrl, header = FALSE, sep = ",")
head(mushrooms)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
summary(mushrooms)
## V1 V2 V3 V4 V5 V6
## e:4208 b: 452 f:2320 n :2284 f:4748 n :3528
## p:3916 c: 4 g: 4 g :1840 t:3376 f :2160
## f:3152 s:2556 e :1500 s : 576
## k: 828 y:3244 y :1072 y : 576
## s: 32 w :1040 a : 400
## x:3656 b : 168 l : 400
## (Other): 220 (Other): 484
## V7 V8 V9 V10 V11 V12 V13
## a: 210 c:6812 b:5612 b :1728 e:3516 ?:2480 f: 552
## f:7914 w:1312 n:2512 p :1492 t:4608 b:3776 k:2372
## w :1202 c: 556 s:5176
## n :1048 e:1120 y: 24
## g : 752 r: 192
## h : 732
## (Other):1170
## V14 V15 V16 V17 V18 V19
## f: 600 w :4464 w :4384 p:8124 n: 96 n: 36
## k:2304 p :1872 p :1872 o: 96 o:7488
## s:4936 g : 576 g : 576 w:7924 t: 600
## y: 284 n : 448 n : 512 y: 8
## b : 432 b : 432
## o : 192 o : 192
## (Other): 140 (Other): 156
## V20 V21 V22 V23
## e:2776 w :2388 a: 384 d:3148
## f: 48 n :1968 c: 340 g:2148
## l:1296 k :1872 n: 400 l: 832
## n: 36 h :1632 s:1248 m: 292
## p:3968 r : 72 v:4040 p:1144
## b : 48 y:1712 u: 368
## (Other): 144 w: 192
#Load car package
require(car)
## Loading required package: car
## Warning: package 'car' was built under R version 3.1.3
#create separate vectors and replace codes with full identifiers
edibility <- as.vector(recode(mushrooms[,1], "'e' = 'edible'; 'p' = 'poisonous'"))
cap_color <- as.vector(recode(mushrooms[,4], "'n' = 'brown'; 'b' = 'buff'; 'c' = 'cinnamon'; 'g' = 'gray'; 'r' = 'green'; 'p' = 'pink'; 'u' = 'purple'; 'e' = 'red'; 'w' = 'white'; 'y' = 'yellow'"))
veil_color <- as.vector(recode(mushrooms[,18], "'n' = 'brown'; 'o' = 'orange'; 'w' = 'white'; 'y' = 'yellow'"))
spore_color <- as.vector(recode(mushrooms[,21], "'k' = 'black'; 'n' = 'brown'; 'b' = 'buff'; 'h' = 'chocolate'; 'r' = 'green'; 'o' = 'orange'; 'u' = 'purple'; 'w' = 'white'; 'y' = 'yellow'"))
#bind vectors into a data.frame
mushroom_colors <- as.data.frame(cbind(edibility, cap_color, veil_color, spore_color))
#check the data.frame
class(mushroom_colors)
## [1] "data.frame"
summary(mushroom_colors)
## edibility cap_color veil_color spore_color
## edible :4208 brown :2284 brown : 96 white :2388
## poisonous:3916 gray :1840 orange: 96 brown :1968
## red :1500 white :7924 black :1872
## yellow :1072 yellow: 8 chocolate:1632
## white :1040 green : 72
## buff : 168 buff : 48
## (Other): 220 (Other) : 144