Importing in the Mushroom Dataset from Github:
Load Libraries:
library(RCurl)
## Loading required package: bitops
mushroom <- read.csv("https://raw.githubusercontent.com/jcp9010/MSDA/master/agaricus-lepiota.data.csv", header = FALSE)
mushroom.names <- c("Deadly", "Cap.Shape","Cap.Surface","Cap.Color","Bruises?", "Odor","Gill.Attachment","Gill.Spacing","Gill.Size","Gill.Color","Stalk.Shape","Stalk.Root","Stalk.Surface.Above.Ring","Stalk.Surface.Below.Ring","Stalk.Color.Above.Ring","Stalk.Color.Below.Ring","Veil.Type","Veil.Color","Ring.Number","Ring.Type","Spore.Print.Color","Population","Habitat")
colnames(mushroom) <- mushroom.names
Take a subset of columns in the dataset. This should include the column that indicates edible or poisonous and three or four other columns.
mushroom.subset <- subset(mushroom, select = c(Deadly, Cap.Shape, Cap.Color, Odor, Stalk.Shape))
To start converting the letters in the data.frame into something more readible, I had used a for loop to replace all the letters with words.
mushroom.subset2 <- data.frame(lapply(mushroom.subset, as.character), stringsAsFactors = FALSE)
for (i in 1:length(mushroom.subset2$Deadly)){
if (mushroom.subset2[i, "Deadly"] == 'p'){
mushroom.subset2[i, "Deadly"] <- "Poisonous"
} else{
mushroom.subset2[i, "Deadly"] <- "Edible"
}
}
for (i in 1:length(mushroom.subset2$Cap.Shape)){
if (mushroom.subset2[i, "Cap.Shape"] == 'b'){
mushroom.subset2[i, "Cap.Shape"] <- "Bell"
} else if (mushroom.subset2[i, "Cap.Shape"] == 'c'){
mushroom.subset2[i, "Cap.Shape"] <- "Conical"
} else if (mushroom.subset2[i, "Cap.Shape"] == 'x'){
mushroom.subset2[i, "Cap.Shape"] <- "Convex"
} else if (mushroom.subset2[i, "Cap.Shape"] == 'f'){
mushroom.subset2[i, "Cap.Shape"] <- "Flat"
} else if (mushroom.subset2[i, "Cap.Shape"] == 'k'){
mushroom.subset2[i, "Cap.Shape"] <- "Knobbed"
} else{
mushroom.subset2[i, "Cap.Shape"] <- "Sunken"
}
}
for (i in 1:length(mushroom.subset2$Cap.Color)){
if (mushroom.subset2[i, "Cap.Color"] == 'n'){
mushroom.subset2[i, "Cap.Color"] <- "Brown"
} else if (mushroom.subset2[i, "Cap.Color"] == 'b'){
mushroom.subset2[i, "Cap.Color"] <- "Buff"
} else if (mushroom.subset2[i, "Cap.Color"] == 'c'){
mushroom.subset2[i, "Cap.Color"] <- "Cinnamon"
} else if (mushroom.subset2[i, "Cap.Color"] == 'g'){
mushroom.subset2[i, "Cap.Color"] <- "Gray"
} else if (mushroom.subset2[i, "Cap.Color"] == 'r'){
mushroom.subset2[i, "Cap.Color"] <- "Green"
} else if (mushroom.subset2[i, "Cap.Color"] == 'p'){
mushroom.subset2[i, "Cap.Color"] <- "Pink"
} else if (mushroom.subset2[i, "Cap.Color"] == 'u'){
mushroom.subset2[i, "Cap.Color"] <- "Purple"
} else if (mushroom.subset2[i, "Cap.Color"] == 'e'){
mushroom.subset2[i, "Cap.Color"] <- "Red"
} else if (mushroom.subset2[i, "Cap.Color"] == 'w'){
mushroom.subset2[i, "Cap.Color"] <- "White"
} else{
mushroom.subset2[i, "Cap.Color"] <- "Yellow"
}
}
for (i in 1:length(mushroom.subset2$Odor)){
if (mushroom.subset2[i, "Odor"] == 'a'){
mushroom.subset2[i, "Odor"] <- "Almond"
} else if (mushroom.subset2[i, "Odor"] == 'l'){
mushroom.subset2[i, "Odor"] <- "Anise"
} else if (mushroom.subset2[i, "Odor"] == 'c'){
mushroom.subset2[i, "Odor"] <- "Creosote"
} else if (mushroom.subset2[i, "Odor"] == 'y'){
mushroom.subset2[i, "Odor"] <- "Fishy"
} else if (mushroom.subset2[i, "Odor"] == 'f'){
mushroom.subset2[i, "Odor"] <- "Foul"
} else if (mushroom.subset2[i, "Odor"] == 'm'){
mushroom.subset2[i, "Odor"] <- "Musty"
} else if (mushroom.subset2[i, "Odor"] == 'n'){
mushroom.subset2[i, "Odor"] <- "None"
} else if (mushroom.subset2[i, "Odor"] == 'p'){
mushroom.subset2[i, "Odor"] <- "Pungent"
} else{
mushroom.subset2[i, "Odor"] <- "Spicy"
}
}
for (i in 1:length(mushroom.subset2$Stalk.Shape)){
if (mushroom.subset2[i, "Stalk.Shape"] == 'e'){
mushroom.subset2[i, "Stalk.Shape"] <- "Enlarging"
} else{
mushroom.subset2[i, "Stalk.Shape"] <- "Tapering"
}
}
Below is a print out (using the head() function to limit the output on the screen and not to overwhelm your screen) of the mushroom.subset2.
print(head(mushroom.subset2, 10))
## Deadly Cap.Shape Cap.Color Odor Stalk.Shape
## 1 Poisonous Convex Brown Pungent Enlarging
## 2 Edible Convex Yellow Almond Enlarging
## 3 Edible Bell White Anise Enlarging
## 4 Poisonous Convex White Pungent Enlarging
## 5 Edible Convex Gray None Tapering
## 6 Edible Convex Yellow Almond Enlarging
## 7 Edible Bell White Almond Enlarging
## 8 Edible Bell White Anise Enlarging
## 9 Poisonous Convex White Pungent Enlarging
## 10 Edible Bell Yellow Almond Enlarging
However, we run into a problem. We had essentially converted all of the data.frame factors into characters, making data analysis somewhat more difficult.
print(summary(mushroom.subset2))
## Deadly Cap.Shape Cap.Color
## Length:8124 Length:8124 Length:8124
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## Odor Stalk.Shape
## Length:8124 Length:8124
## Class :character Class :character
## Mode :character Mode :character
To convert the characters into factors, I will use package “dplyr”.
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
mushroom.subset3 <- mushroom.subset2 %>% mutate_if(is.character,as.factor)
print(summary(mushroom.subset3))
## Deadly Cap.Shape Cap.Color Odor
## Edible :4208 Bell : 452 Brown :2284 None :3528
## Poisonous:3916 Conical: 4 Gray :1840 Foul :2160
## Convex :3656 Red :1500 Fishy : 576
## Flat :3152 Yellow :1072 Spicy : 576
## Knobbed: 828 White :1040 Almond : 400
## Sunken : 32 Buff : 168 Anise : 400
## (Other): 220 (Other): 484
## Stalk.Shape
## Enlarging:3516
## Tapering :4608
##
##
##
##
##
As you can see, this data is now more usable for creating even further subsets for analysis.
barplot(table(mushroom.subset3$Cap.Shape), main = "Mushroom Cap Shapes")
Frequency Bar Plot
Shape.Table <- table(mushroom.subset3$Cap.Shape)
Shape.Table.Ratios <- Shape.Table/length(mushroom.subset3$Deadly)
print(Shape.Table.Ratios)
##
## Bell Conical Convex Flat Knobbed
## 0.0556376169 0.0004923683 0.4500246184 0.3879862137 0.1019202363
## Sunken
## 0.0039389463
barplot(Shape.Table.Ratios, main = "Mushroom Cap Shapes Frequency Bar Plot")
Pie Chart
pie(Shape.Table.Ratios, main = "Mushroom Cap Shapes Pie Chart")