Your task is to study the dataset and the associated description of the data (i.e. “data dictionary”). You may need to look around a bit, but it’s there! You should take the data, and create a data frame with a subset of the columns in the dataset. You should include the column that indicates edible or poisonous and three or four other columns. You should also add meaningful column names and replace the abbreviations used in the data—for example, in the appropriate column, “e” might become “edible.” Your deliverable is the R code to perform these transformation tasks.
#Read Mushroom Data
library(RCurl)
## Loading required package: bitops
x <- getURL("https://raw.githubusercontent.com/DaisyCai2019/Homework/master/agaricus-lepiota.csv")
mushrooms <- read.csv(text = x,header=FALSE)
head(mushrooms)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
dim(mushrooms)
## [1] 8124 23
summary(mushrooms)
## V1 V2 V3 V4 V5 V6
## e:4208 b: 452 f:2320 n :2284 f:4748 n :3528
## p:3916 c: 4 g: 4 g :1840 t:3376 f :2160
## f:3152 s:2556 e :1500 s : 576
## k: 828 y:3244 y :1072 y : 576
## s: 32 w :1040 a : 400
## x:3656 b : 168 l : 400
## (Other): 220 (Other): 484
## V7 V8 V9 V10 V11 V12 V13
## a: 210 c:6812 b:5612 b :1728 e:3516 ?:2480 f: 552
## f:7914 w:1312 n:2512 p :1492 t:4608 b:3776 k:2372
## w :1202 c: 556 s:5176
## n :1048 e:1120 y: 24
## g : 752 r: 192
## h : 732
## (Other):1170
## V14 V15 V16 V17 V18 V19
## f: 600 w :4464 w :4384 p:8124 n: 96 n: 36
## k:2304 p :1872 p :1872 o: 96 o:7488
## s:4936 g : 576 g : 576 w:7924 t: 600
## y: 284 n : 448 n : 512 y: 8
## b : 432 b : 432
## o : 192 o : 192
## (Other): 140 (Other): 156
## V20 V21 V22 V23
## e:2776 w :2388 a: 384 d:3148
## f: 48 n :1968 c: 340 g:2148
## l:1296 k :1872 n: 400 l: 832
## n: 36 h :1632 s:1248 m: 292
## p:3968 r : 72 v:4040 p:1144
## b : 48 y:1712 u: 368
## (Other): 144 w: 192
#Add column name to the data
names(mushrooms) <- c("edible_poisonous","shape","surface","color","bruises","odor","gill-attachment","gill-spacing","gill-size","gill-color","stalk-shape","stalk-root","stalk-surface-above-ring","stalk-surface-below-ring","stalk-color-above-ring","stalk-color-below-ring","veil-type","veil-color","ring-number","ring-type","spore-print-color","population","habitat")
head(mushrooms)
## edible_poisonous shape surface color bruises odor gill-attachment
## 1 p x s n t p f
## 2 e x s y t a f
## 3 e b s w t l f
## 4 p x y w t p f
## 5 e x s g f n f
## 6 e x y y t a f
## gill-spacing gill-size gill-color stalk-shape stalk-root
## 1 c n k e e
## 2 c b k e c
## 3 c b n e c
## 4 c n n e e
## 5 w b k t e
## 6 c b n e c
## stalk-surface-above-ring stalk-surface-below-ring stalk-color-above-ring
## 1 s s w
## 2 s s w
## 3 s s w
## 4 s s w
## 5 s s w
## 6 s s w
## stalk-color-below-ring veil-type veil-color ring-number ring-type
## 1 w p w o p
## 2 w p w o p
## 3 w p w o p
## 4 w p w o p
## 5 w p w o e
## 6 w p w o p
## spore-print-color population habitat
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
#subset the data
mushrooms<-subset(mushrooms,select=c(edible_poisonous,color,odor,habitat))
head(mushrooms)
## edible_poisonous color odor habitat
## 1 p n p u
## 2 e y a g
## 3 e w l m
## 4 p w p u
## 5 e g n g
## 6 e y a g
tail(mushrooms)
## edible_poisonous color odor habitat
## 8119 p n f d
## 8120 e n n l
## 8121 e n n l
## 8122 e n n l
## 8123 p n y l
## 8124 e n n l
#Replace Abbreviations
mushrooms$edible_poisonous <- as.character(mushrooms$edible_poisonous)
mushrooms$edible_poisonous[mushrooms$edible_poisonous == "e"] <- "edible"
mushrooms$edible_poisonous[mushrooms$edible_poisonous == "p"] <- "poisonous"
mushrooms$color <- as.character(mushrooms$color)
mushrooms$color[mushrooms$color == "n"] <- "brown"
mushrooms$color[mushrooms$color == "b"] <- "buff"
mushrooms$color[mushrooms$color == "c"] <- "cinnamon"
mushrooms$color[mushrooms$color == "g"] <- "gray"
mushrooms$color[mushrooms$color == "r"] <- "green"
mushrooms$color[mushrooms$color == "p"] <- "pink"
mushrooms$color[mushrooms$color == "u"] <- "purple"
mushrooms$color[mushrooms$color == "e"] <- "red"
mushrooms$color[mushrooms$color == "w"] <- "white"
mushrooms$color[mushrooms$color == "y"] <- "yellow"
mushrooms$odor <- as.character(mushrooms$odor)
mushrooms$odor[mushrooms$odor == "a"] <- "almond"
mushrooms$odor[mushrooms$odor == "l"] <- "anise"
mushrooms$odor[mushrooms$odor == "c"] <- "creosote"
mushrooms$odor[mushrooms$odor == "y"] <- "fishy"
mushrooms$odor[mushrooms$odor == "f"] <- "foul"
mushrooms$odor[mushrooms$odor == "m"] <- "musty"
mushrooms$odor[mushrooms$odor == "n"] <- "none"
mushrooms$odor[mushrooms$odor == "p"] <- "pungent"
mushrooms$odor[mushrooms$odor == "s"] <- "spicy"
mushrooms$habitat <- as.character(mushrooms$habitat)
mushrooms$habitat[mushrooms$habitat=="g"]<-"grasses"
mushrooms$habitat[mushrooms$habitat=="l"]<-"leaves"
mushrooms$habitat[mushrooms$habitat=="m"]<-"meadows"
mushrooms$habitat[mushrooms$habitat=="p"]<-"paths"
mushrooms$habitat[mushrooms$habitat=="p"]<-"paths"
mushrooms$habitat[mushrooms$habitat=="u"]<-"urban"
mushrooms$habitat[mushrooms$habitat=="w"]<-"waste"
mushrooms$habitat[mushrooms$habitat=="d"]<-"woods"
head(mushrooms)
## edible_poisonous color odor habitat
## 1 poisonous brown pungent urban
## 2 edible yellow almond grasses
## 3 edible white anise meadows
## 4 poisonous white pungent urban
## 5 edible gray none grasses
## 6 edible yellow almond grasses
#subset the data again with some critera
subset(mushrooms,edible_poisonous=="edible" & color=="white" & odor=="anise" & habitat=="meadows")
## edible_poisonous color odor habitat
## 3 edible white anise meadows
## 8 edible white anise meadows
## 25 edible white anise meadows
## 28 edible white anise meadows
## 48 edible white anise meadows
## 53 edible white anise meadows
## 72 edible white anise meadows
## 108 edible white anise meadows
## 130 edible white anise meadows
## 132 edible white anise meadows
## 176 edible white anise meadows
## 187 edible white anise meadows
## 193 edible white anise meadows
## 198 edible white anise meadows
## 199 edible white anise meadows
## 212 edible white anise meadows
## 216 edible white anise meadows
## 225 edible white anise meadows
## 256 edible white anise meadows
## 271 edible white anise meadows
## 285 edible white anise meadows
## 307 edible white anise meadows
## 332 edible white anise meadows
## 397 edible white anise meadows
## 405 edible white anise meadows
## 425 edible white anise meadows
## 438 edible white anise meadows
## 441 edible white anise meadows
## 442 edible white anise meadows
## 444 edible white anise meadows
## 449 edible white anise meadows
## 451 edible white anise meadows
## 455 edible white anise meadows
## 487 edible white anise meadows
## 503 edible white anise meadows
## 521 edible white anise meadows
## 598 edible white anise meadows
## 605 edible white anise meadows
## 609 edible white anise meadows
## 626 edible white anise meadows
## 648 edible white anise meadows
## 662 edible white anise meadows
## 704 edible white anise meadows
## 713 edible white anise meadows
## 717 edible white anise meadows
## 730 edible white anise meadows
## 780 edible white anise meadows
## 784 edible white anise meadows
## 829 edible white anise meadows
## 898 edible white anise meadows
## 899 edible white anise meadows
## 917 edible white anise meadows
## 956 edible white anise meadows
## 999 edible white anise meadows
## 1087 edible white anise meadows
## 1088 edible white anise meadows
## 1417 edible white anise meadows
## 1437 edible white anise meadows
## 1552 edible white anise meadows
## 1565 edible white anise meadows
## 1672 edible white anise meadows
## 1718 edible white anise meadows
## 1983 edible white anise meadows
## 2030 edible white anise meadows