##Read file and check for dimensions and headers
mushrooms<-read.csv("https://raw.githubusercontent.com/zahirf/learning/master/mushrooms.csv", header=TRUE, stringsAsFactors=FALSE)
dim(mushrooms)
## [1] 8124 23
View(mushrooms, "Mushrooms Data")
head(mushrooms)
## class cap.shape cap.surface cap.color bruises odor gill.attachment
## 1 p x s n t p f
## 2 e x s y t a f
## 3 e b s w t l f
## 4 p x y w t p f
## 5 e x s g f n f
## 6 e x y y t a f
## gill.spacing gill.size gill.color stalk.shape stalk.root
## 1 c n k e e
## 2 c b k e c
## 3 c b n e c
## 4 c n n e e
## 5 w b k t e
## 6 c b n e c
## stalk.surface.above.ring stalk.surface.below.ring stalk.color.above.ring
## 1 s s w
## 2 s s w
## 3 s s w
## 4 s s w
## 5 s s w
## 6 s s w
## stalk.color.below.ring veil.type veil.color ring.number ring.type
## 1 w p w o p
## 2 w p w o p
## 3 w p w o p
## 4 w p w o p
## 5 w p w o e
## 6 w p w o p
## spore.print.color population habitat
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
str(mushrooms)
## 'data.frame': 8124 obs. of 23 variables:
## $ class : chr "p" "e" "e" "p" ...
## $ cap.shape : chr "x" "x" "b" "x" ...
## $ cap.surface : chr "s" "s" "s" "y" ...
## $ cap.color : chr "n" "y" "w" "w" ...
## $ bruises : chr "t" "t" "t" "t" ...
## $ odor : chr "p" "a" "l" "p" ...
## $ gill.attachment : chr "f" "f" "f" "f" ...
## $ gill.spacing : chr "c" "c" "c" "c" ...
## $ gill.size : chr "n" "b" "b" "n" ...
## $ gill.color : chr "k" "k" "n" "n" ...
## $ stalk.shape : chr "e" "e" "e" "e" ...
## $ stalk.root : chr "e" "c" "c" "e" ...
## $ stalk.surface.above.ring: chr "s" "s" "s" "s" ...
## $ stalk.surface.below.ring: chr "s" "s" "s" "s" ...
## $ stalk.color.above.ring : chr "w" "w" "w" "w" ...
## $ stalk.color.below.ring : chr "w" "w" "w" "w" ...
## $ veil.type : chr "p" "p" "p" "p" ...
## $ veil.color : chr "w" "w" "w" "w" ...
## $ ring.number : chr "o" "o" "o" "o" ...
## $ ring.type : chr "p" "p" "p" "p" ...
## $ spore.print.color : chr "k" "n" "n" "k" ...
## $ population : chr "s" "n" "n" "s" ...
## $ habitat : chr "u" "g" "m" "u" ...
##Select specific columns from the table to create subset and check
mushrooms1<-mushrooms[c(1,2,3,7,10)]
View(mushrooms1)
##Replace contents of rows
class<- function(key){
switch (key,
'p' = 'poisonous',
'e' = 'edible'
)
}
mushrooms1$class <- sapply(mushrooms1$class,class);
cap.shape <- function(key){
switch (key,
'b' = 'bell',
'c' = 'conical',
'x' = 'convex',
'f' = 'flat',
'k' = 'knobbed',
's' = 'sunken'
)
}
mushrooms1$cap.shape <- sapply(mushrooms1$cap.shape, cap.shape);
cap.surface <- function(key){
switch (key,
'f' = 'fibrous',
'g' = 'grooves',
'y' = 'scaly',
's' = 'smooth'
)
}
mushrooms1$cap.surface <- sapply(mushrooms1$cap.surface, cap.surface);
gill.attachment <- function(key){
switch (key,
'a' = 'attached',
'd' = 'descending',
'f' = 'free',
'n' = 'notched'
)
}
mushrooms1$gill.attachment <- sapply(mushrooms1$gill.attachment, gill.attachment);
gill.color <- function(key){
switch (key,
'k' = 'black',
'n' = 'brown',
'b' = 'buff',
'h' = 'chocolate',
'g'= 'gray',
'r'='green',
'o'='orange',
'p'='pink',
'u'='purple',
'e'='red',
'w'='white',
'y'='yellow'
)
}
mushrooms1$gill.color <- sapply(mushrooms1$gill.color, gill.color);
mushrooms1[1:10,]
## class cap.shape cap.surface gill.attachment gill.color
## 1 poisonous convex smooth free black
## 2 edible convex smooth free black
## 3 edible bell smooth free brown
## 4 poisonous convex scaly free brown
## 5 edible convex smooth free black
## 6 edible convex scaly free brown
## 7 edible bell smooth free gray
## 8 edible bell scaly free brown
## 9 poisonous convex scaly free pink
## 10 edible bell smooth free gray
#Plot data
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.5.2
qplot(mushrooms1$gill.color, xlab="Color", ylab="Count", main="Classification by gill color")
