Your task is to study the dataset and the associated description of the data (i.e. “data dictionary”). You may need to look around a bit, but it’s there! You should take the data, and create a data frame with a subset of the columns in the dataset. You should include the column that indicates edible or poisonous and three or four other columns. You should also add meaningful column names and replace the abbreviations used in the data-for example, in the appropriate column, “e” might become “edible.” Your deliverable is the R code to perform these transformation tasks.
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 3.0.0 v purrr 0.2.5
## v tibble 1.4.2 v dplyr 0.7.6
## v tidyr 0.8.1 v stringr 1.3.1
## v readr 1.1.1 v forcats 0.3.0
## -- Conflicts ----------------------------------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
mushroom <- read.csv("https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data", header = F)
head(mushroom)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
mushroom_names <- c("class", "cap-shape", "cap-surface","cap-color", "bruises", "odor", "gill-attachment",
"gill-spacing", "gill-size", "gill-color", "stalk-shape" ,"stalk-root", "stalk-surface-above-ring",
"stalk-surface-below-ring", "stalk-color-above-ring", "stalk-color-below-ring" ,"veil-type",
"veil-color", "ring-number", "ring-type", "spore-print-color", "population", "habitat")
colnames(mushroom) <- mushroom_names
head(mushroom)
## class cap-shape cap-surface cap-color bruises odor gill-attachment
## 1 p x s n t p f
## 2 e x s y t a f
## 3 e b s w t l f
## 4 p x y w t p f
## 5 e x s g f n f
## 6 e x y y t a f
## gill-spacing gill-size gill-color stalk-shape stalk-root
## 1 c n k e e
## 2 c b k e c
## 3 c b n e c
## 4 c n n e e
## 5 w b k t e
## 6 c b n e c
## stalk-surface-above-ring stalk-surface-below-ring stalk-color-above-ring
## 1 s s w
## 2 s s w
## 3 s s w
## 4 s s w
## 5 s s w
## 6 s s w
## stalk-color-below-ring veil-type veil-color ring-number ring-type
## 1 w p w o p
## 2 w p w o p
## 3 w p w o p
## 4 w p w o p
## 5 w p w o e
## 6 w p w o p
## spore-print-color population habitat
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
mushroom1 <- mushroom %>%
select(class, `cap-shape`, `cap-surface`, `cap-color`, `odor`)
levels(mushroom1$`class`) <- list(edible='e', poisonous='p')
levels(mushroom1$`cap-shape`) <- list(bell='b',conical='c',convex='x',flat='f', knobbed='k',sunken='s')
levels(mushroom1$`cap-surface`) <- list(fibrous='f',grooves='g',scaly='y',smooth='s')
levels(mushroom1$`cap-color`) <- list(brown='n',buff='b',cinnamon='c',gray='g',green='r',pink='p',purple='u',red='e',white='w',yellow='y')
levels(mushroom1$`odor`) <- list(almond='a',anise='l',creosote='c',fishy='y',foul='f',musty='m',none='n',pungent='p',spicy='s')
head(mushroom1)
## class cap-shape cap-surface cap-color odor
## 1 poisonous convex smooth brown pungent
## 2 edible convex smooth yellow almond
## 3 edible bell smooth white anise
## 4 poisonous convex scaly white pungent
## 5 edible convex smooth gray none
## 6 edible convex scaly yellow almond
summary(mushroom1)
## class cap-shape cap-surface cap-color
## edible :4208 bell : 452 fibrous:2320 brown :2284
## poisonous:3916 conical: 4 grooves: 4 gray :1840
## convex :3656 scaly :3244 red :1500
## flat :3152 smooth :2556 yellow :1072
## knobbed: 828 white :1040
## sunken : 32 buff : 168
## (Other): 220
## odor
## none :3528
## foul :2160
## fishy : 576
## spicy : 576
## almond : 400
## anise : 400
## (Other): 484