Create url for mushrooms dataset: ***
shrooms_url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/
mushroom/agaricus-lepiota.data"
Create DataFrame by using read.csv to read mushroom data and display 6 rows using head function and summary:
shrooms_df <- read.csv(shrooms_url, header = FALSE, sep = ",")
head(shrooms_df)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
summary(shrooms_df)
## V1 V2 V3 V4 V5 V6
## e:4208 b: 452 f:2320 n :2284 f:4748 n :3528
## p:3916 c: 4 g: 4 g :1840 t:3376 f :2160
## f:3152 s:2556 e :1500 s : 576
## k: 828 y:3244 y :1072 y : 576
## s: 32 w :1040 a : 400
## x:3656 b : 168 l : 400
## (Other): 220 (Other): 484
## V7 V8 V9 V10 V11 V12 V13
## a: 210 c:6812 b:5612 b :1728 e:3516 ?:2480 f: 552
## f:7914 w:1312 n:2512 p :1492 t:4608 b:3776 k:2372
## w :1202 c: 556 s:5176
## n :1048 e:1120 y: 24
## g : 752 r: 192
## h : 732
## (Other):1170
## V14 V15 V16 V17 V18 V19
## f: 600 w :4464 w :4384 p:8124 n: 96 n: 36
## k:2304 p :1872 p :1872 o: 96 o:7488
## s:4936 g : 576 g : 576 w:7924 t: 600
## y: 284 n : 448 n : 512 y: 8
## b : 432 b : 432
## o : 192 o : 192
## (Other): 140 (Other): 156
## V20 V21 V22 V23
## e:2776 w :2388 a: 384 d:3148
## f: 48 n :1968 c: 340 g:2148
## l:1296 k :1872 n: 400 l: 832
## n: 36 h :1632 s:1248 m: 292
## p:3968 r : 72 v:4040 p:1144
## b : 48 y:1712 u: 368
## (Other): 144 w: 192
Rename default column names to descriptive names for the columns I will be extracting in my subset command:
names(shrooms_df)[1] <- "Edible/Poison"
names(shrooms_df)[2] <- "cap shape"
names(shrooms_df)[3] <- "cap surface"
names(shrooms_df)[4] <- "cap color"
names(shrooms_df)[19] <- "ring number"
names(shrooms_df)
## [1] "Edible/Poison" "cap shape" "cap surface" "cap color"
## [5] "V5" "V6" "V7" "V8"
## [9] "V9" "V10" "V11" "V12"
## [13] "V13" "V14" "V15" "V16"
## [17] "V17" "V18" "ring number" "V20"
## [21] "V21" "V22" "V23"
recode categorical levels to descriptive names for the columns I previously renamed for extract(diplay before and after values):
shrooms_df[,1] <- ifelse(shrooms_df$`Edible/Poison` == "e", "edible", "poison")
levels(shrooms_df$`cap surface`)
## [1] "f" "g" "s" "y"
levels(shrooms_df$`cap surface`) <- c("fibrous", "grooves", "scaly", "smooth")
levels(shrooms_df$`cap surface`)
## [1] "fibrous" "grooves" "scaly" "smooth"
levels(shrooms_df$`cap color`)
## [1] "b" "c" "e" "g" "n" "p" "r" "u" "w" "y"
levels(shrooms_df$`cap color`) <- c("buff", "cinnamon", "red", "gray", "brown", "pink", "green", "purple", "white", "yellow")
levels(shrooms_df$`cap color`)
## [1] "buff" "cinnamon" "red" "gray" "brown" "pink"
## [7] "green" "purple" "white" "yellow"
levels(shrooms_df$`cap shape`)
## [1] "b" "c" "f" "k" "s" "x"
levels(shrooms_df$`cap shape`) <- c("bell", "conical", "flat", "knobbed", "sunken", "convex")
levels(shrooms_df$`cap shape`)
## [1] "bell" "conical" "flat" "knobbed" "sunken" "convex"
levels(shrooms_df$`ring number`)
## [1] "n" "o" "t"
levels(shrooms_df$`ring number`) <- c("none", "one", "two")
levels(shrooms_df$`ring number`)
## [1] "none" "one" "two"
create 2 subset data frames from above, 1 for edible mushrooms and 1 for poison:
edible_df <- subset(shrooms_df, shrooms_df$`Edible/Poison` == "edible", c("Edible/Poison", "cap shape", "cap color", "ring number" ))
poison_df <- subset(shrooms_df, shrooms_df$`Edible/Poison` == "poison", c("Edible/Poison", "cap shape", "cap color", "ring number" ))
Edible Data Frame:
str(edible_df)
## 'data.frame': 4208 obs. of 4 variables:
## $ Edible/Poison: chr "edible" "edible" "edible" "edible" ...
## $ cap shape : Factor w/ 6 levels "bell","conical",..: 6 1 6 6 1 1 1 6 6 1 ...
## $ cap color : Factor w/ 10 levels "buff","cinnamon",..: 10 9 4 10 9 9 10 10 10 10 ...
## $ ring number : Factor w/ 3 levels "none","one","two": 2 2 2 2 2 2 2 2 2 2 ...
summary(edible_df)
## Edible/Poison cap shape cap color ring number
## Length:4208 bell : 404 brown :1264 none: 0
## Class :character conical: 0 gray :1032 one :3680
## Mode :character flat :1596 white : 720 two : 528
## knobbed: 228 red : 624
## sunken : 32 yellow : 400
## convex :1948 pink : 56
## (Other): 112
head(edible_df)
## Edible/Poison cap shape cap color ring number
## 2 edible convex yellow one
## 3 edible bell white one
## 5 edible convex gray one
## 6 edible convex yellow one
## 7 edible bell white one
## 8 edible bell white one
Poisonous Data Frame:
str(poison_df)
## 'data.frame': 3916 obs. of 4 variables:
## $ Edible/Poison: chr "poison" "poison" "poison" "poison" ...
## $ cap shape : Factor w/ 6 levels "bell","conical",..: 6 6 6 6 6 6 6 6 3 6 ...
## $ cap color : Factor w/ 10 levels "buff","cinnamon",..: 5 9 9 9 5 9 5 5 9 9 ...
## $ ring number : Factor w/ 3 levels "none","one","two": 2 2 2 2 2 2 2 2 2 2 ...
summary(poison_df)
## Edible/Poison cap shape cap color ring number
## Length:3916 bell : 48 brown :1020 none: 36
## Class :character conical: 4 red : 876 one :3808
## Mode :character flat :1556 gray : 808 two : 72
## knobbed: 600 yellow : 672
## sunken : 0 white : 320
## convex :1708 buff : 120
## (Other): 100
head(poison_df)
## Edible/Poison cap shape cap color ring number
## 1 poison convex brown one
## 4 poison convex white one
## 9 poison convex white one
## 14 poison convex white one
## 18 poison convex brown one
## 19 poison convex white one