#1. Take the Data and create a dataframe with a subset of the columns in the dataset.
MushData<-read.table("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data",sep=",")
head(MushData)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
summary(MushData)
## V1 V2 V3 V4 V5 V6
## e:4208 b: 452 f:2320 n :2284 f:4748 n :3528
## p:3916 c: 4 g: 4 g :1840 t:3376 f :2160
## f:3152 s:2556 e :1500 s : 576
## k: 828 y:3244 y :1072 y : 576
## s: 32 w :1040 a : 400
## x:3656 b : 168 l : 400
## (Other): 220 (Other): 484
## V7 V8 V9 V10 V11 V12 V13
## a: 210 c:6812 b:5612 b :1728 e:3516 ?:2480 f: 552
## f:7914 w:1312 n:2512 p :1492 t:4608 b:3776 k:2372
## w :1202 c: 556 s:5176
## n :1048 e:1120 y: 24
## g : 752 r: 192
## h : 732
## (Other):1170
## V14 V15 V16 V17 V18 V19
## f: 600 w :4464 w :4384 p:8124 n: 96 n: 36
## k:2304 p :1872 p :1872 o: 96 o:7488
## s:4936 g : 576 g : 576 w:7924 t: 600
## y: 284 n : 448 n : 512 y: 8
## b : 432 b : 432
## o : 192 o : 192
## (Other): 140 (Other): 156
## V20 V21 V22 V23
## e:2776 w :2388 a: 384 d:3148
## f: 48 n :1968 c: 340 g:2148
## l:1296 k :1872 n: 400 l: 832
## n: 36 h :1632 s:1248 m: 292
## p:3968 r : 72 v:4040 p:1144
## b : 48 y:1712 u: 368
## (Other): 144 w: 192
datasub<-subset(MushData,select = c(V1,V2,V4,V19,V23))
head(datasub)
## V1 V2 V4 V19 V23
## 1 p x n o u
## 2 e x y o g
## 3 e b w o m
## 4 p x w o u
## 5 e x g o g
## 6 e x y o g
colnames(datasub) <- c("V1"="Class","V2"="Cap-Shape","V4"="Gill-Color","V19"="Ring-Number","V23"="Habitat")
head(datasub)
## Class Cap-Shape Gill-Color Ring-Number Habitat
## 1 p x n o u
## 2 e x y o g
## 3 e b w o m
## 4 p x w o u
## 5 e x g o g
## 6 e x y o g
levels(datasub$Class) <- c(levels(datasub$Class), "edible", "poisonous")
datasub$Class[datasub$Class == 'e'] <- 'edible'
datasub$Class[datasub$Class == 'p'] <- 'poisonous'
levels(datasub$`Cap-Shape`) <- c(levels(datasub$`Cap-Shape`), "bell", "conical", "convex", "flat", "knobbed", "sunken")
datasub$`Cap-Shape`[datasub$`Cap-Shape` == 'b'] <- 'bell'
datasub$`Cap-Shape`[datasub$`Cap-Shape` == 'c'] <- 'conical'
datasub$`Cap-Shape`[datasub$`Cap-Shape` == 'x'] <- 'convex'
datasub$`Cap-Shape`[datasub$`Cap-Shape` == 'f'] <- 'flat'
datasub$`Cap-Shape`[datasub$`Cap-Shape` == 'k'] <- 'knobbed'
datasub$`Cap-Shape`[datasub$`Cap-Shape` == 's'] <- 'sunken'
levels(datasub$'Gill-Color') <- c(levels(datasub$'Gill-Color'), "black", "brown", "buff", "chocolate", "gray", "green", "orange", "pink", "purple","red","white","yellow")
datasub$'Gill-Color'[datasub$'Gill-Color' == 'k'] <- 'black'
datasub$'Gill-Color'[datasub$'Gill-Color' == 'n'] <- 'brown'
datasub$'Gill-Color'[datasub$'Gill-Color' == 'b'] <- 'buff'
datasub$'Gill-Color'[datasub$'Gill-Color' == 'h'] <- 'chocolate'
datasub$'Gill-Color'[datasub$'Gill-Color' == 'g'] <- 'gray'
datasub$'Gill-Color'[datasub$'Gill-Color' == 'r'] <- 'green'
datasub$'Gill-Color'[datasub$'Gill-Color' == 'o'] <- 'orange'
datasub$'Gill-Color'[datasub$'Gill-Color' == 'p'] <- 'pink'
datasub$'Gill-Color'[datasub$'Gill-Color' == 'u'] <- 'purple'
datasub$'Gill-Color'[datasub$'Gill-Color' == 'e'] <- 'red'
datasub$'Gill-Color'[datasub$'Gill-Color' == 'w'] <- 'white'
datasub$'Gill-Color'[datasub$'Gill-Color' == 'y'] <- 'yellow'
levels(datasub$'Ring-Number') <- c(levels(datasub$'Ring-Number'), "none", "one", "two")
datasub$'Ring-Number'[datasub$'Ring-Number' == 'n'] <- 'none'
datasub$'Ring-Number'[datasub$'Ring-Number' == 'o'] <- 'one'
datasub$'Ring-Number'[datasub$'Ring-Number' == 't'] <- 'two'
levels(datasub$Habitat) <- c(levels(datasub$Habitat), "grasses", "leaves", "meadows", "paths", "urban", "waste", "woods")
datasub$Habitat[datasub$Habitat == 'g'] <- 'grasses'
datasub$Habitat[datasub$Habitat == 'l'] <- 'leaves'
datasub$Habitat[datasub$Habitat == 'm'] <- 'meadows'
datasub$Habitat[datasub$Habitat == 'p'] <- 'paths'
datasub$Habitat[datasub$Habitat == 'u'] <- 'urban'
datasub$Habitat[datasub$Habitat == 'w'] <- 'waste'
datasub$Habitat[datasub$Habitat == 'd'] <- 'woods'
#The transformed dataset
head(datasub)
## Class Cap-Shape Gill-Color Ring-Number Habitat
## 1 poisonous convex brown one urban
## 2 edible convex yellow one grasses
## 3 edible bell white one meadows
## 4 poisonous convex white one urban
## 5 edible convex gray one grasses
## 6 edible convex yellow one grasses