# Read the data
# mushroom <- read.table("agaricus-lepiota.data",header = FALSE, sep = ",", stringsAsFactors = FALSE)
# mushroom <- read.table("C:\\Users\\Yohannes\\Desktop\\MSDS SPRING SESSION 2019\\WEEK 1 ASSIGNMENT SOLUTIONS\\DATA607\\agaricus-lepiota.data",header = FALSE, sep = ",", stringsAsFactors = FALSE)
url <- "https://raw.githubusercontent.com/jonygeta/Data607-week-1/master/agaricus-lepiota.data"
mushroom <- read.table(url, header = FALSE, sep = ",", stringsAsFactors = FALSE)
head(mushroom)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
# Subset the data frame
mushroom <- mushroom[, 1:4]
head(mushroom)
## V1 V2 V3 V4
## 1 p x s n
## 2 e x s y
## 3 e b s w
## 4 p x y w
## 5 e x s g
## 6 e x y y
Add Column Names
colnames(mushroom) <- c("classes","cap-shape","cap-surface","cap-color")
head(mushroom)
## classes cap-shape cap-surface cap-color
## 1 p x s n
## 2 e x s y
## 3 e b s w
## 4 p x y w
## 5 e x s g
## 6 e x y y
Recode variables
classes
paste("classes Values before recode")
## [1] "classes Values before recode"
table(mushroom$classes)
##
## e p
## 4208 3916
mushroom$classes[mushroom$classes == "e"] <- "edible"
mushroom$classes[mushroom$classes == "p"] <- "poisonous"
paste("classes Values after recode")
## [1] "classes Values after recode"
table(mushroom$classes)
##
## edible poisonous
## 4208 3916
cap-shape
paste("cap-shape Values before recode")
## [1] "cap-shape Values before recode"
table(mushroom$`cap-shape`)
##
## b c f k s x
## 452 4 3152 828 32 3656
mushroom$`cap-shape`[mushroom$`cap-shape` == "b"] <- "bell"
mushroom$`cap-shape`[mushroom$`cap-shape` == "c"] <- "conical"
mushroom$`cap-shape`[mushroom$`cap-shape` == "x"] <- "convex"
mushroom$`cap-shape`[mushroom$`cap-shape` == "f"] <- "flat"
mushroom$`cap-shape`[mushroom$`cap-shape` == "k"] <- "knobbed"
mushroom$`cap-shape`[mushroom$`cap-shape` == "s"] <- "sunken"
paste("cap-shape Values after recode")
## [1] "cap-shape Values after recode"
table(mushroom$`cap-shape`)
##
## bell conical convex flat knobbed sunken
## 452 4 3656 3152 828 32
cap-surface
paste("cap-surface Values before recode")
## [1] "cap-surface Values before recode"
table(mushroom$`cap-surface`)
##
## f g s y
## 2320 4 2556 3244
mushroom$`cap-surface`[mushroom$`cap-surface` == "f"] <- "fibrous"
mushroom$`cap-surface`[mushroom$`cap-surface` == "g"] <- "grooves"
mushroom$`cap-surface`[mushroom$`cap-surface` == "y"] <- "scaly"
mushroom$`cap-surface`[mushroom$`cap-surface` == "s"] <- "smooth"
paste("cap-surface Values after recode")
## [1] "cap-surface Values after recode"
table(mushroom$`cap-surface`)
##
## fibrous grooves scaly smooth
## 2320 4 3244 2556
cap-color
paste("cap-color Values before recode")
## [1] "cap-color Values before recode"
table(mushroom$`cap-color`)
##
## b c e g n p r u w y
## 168 44 1500 1840 2284 144 16 16 1040 1072
mushroom$`cap-color`[mushroom$`cap-color` == "n"] <- "brown"
mushroom$`cap-color`[mushroom$`cap-color` == "b"] <- "buff"
mushroom$`cap-color`[mushroom$`cap-color` == "c"] <- "cinnamon"
mushroom$`cap-color`[mushroom$`cap-color` == "g"] <- "gray"
mushroom$`cap-color`[mushroom$`cap-color` == "r"] <- "green"
mushroom$`cap-color`[mushroom$`cap-color` == "p"] <- "pink"
mushroom$`cap-color`[mushroom$`cap-color` == "u"] <- "purple"
mushroom$`cap-color`[mushroom$`cap-color` == "e"] <- "red"
mushroom$`cap-color`[mushroom$`cap-color` == "w"] <- "white"
mushroom$`cap-color`[mushroom$`cap-color` == "y"] <- "yellow"
paste("cap-color Values after recode")
## [1] "cap-color Values after recode"
table(mushroom$`cap-color`)
##
## brown buff cinnamon gray green pink purple red
## 2284 168 44 1840 16 144 16 1500
## white yellow
## 1040 1072
Final Dataframe
head(mushroom)
## classes cap-shape cap-surface cap-color
## 1 poisonous convex smooth brown
## 2 edible convex smooth yellow
## 3 edible bell smooth white
## 4 poisonous convex scaly white
## 5 edible convex smooth gray
## 6 edible convex scaly yellow
Table of final dataframe
sapply(mushroom, table)
## $classes
##
## edible poisonous
## 4208 3916
##
## $`cap-shape`
##
## bell conical convex flat knobbed sunken
## 452 4 3656 3152 828 32
##
## $`cap-surface`
##
## fibrous grooves scaly smooth
## 2320 4 3244 2556
##
## $`cap-color`
##
## brown buff cinnamon gray green pink purple red
## 2284 168 44 1840 16 144 16 1500
## white yellow
## 1040 1072