knitr::opts_chunk$set(echo = TRUE)
#install.packages("plyr")
#install.packages("rprojroot")
#install.packages("rmarkdown")
#install.packages("rmarkdown", repos = "https://mran.revolutionanalytics.com/snapshot/2016-01-02")
#devtools::install_url("http://cran.r-project.org/src/contrib/rmarkdown0.5.1.tar.gz")
library(data.table)
## Warning: package 'data.table' was built under R version 3.2.5
library(plyr)
## Warning: package 'plyr' was built under R version 3.2.5
original_file <- data.frame(read.table('https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data',
sep=","))
head(original_file,5)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
reformatted_file <- original_file
setnames(reformatted_file, old = c('V1','V2', 'V3', 'V4',
'V5', 'V6','V7', 'V8',
'V9','V10', 'V11', 'V12', 'V13',
'V14','V15', 'V16', 'V17', 'V18',
'V19','V20', 'V21', 'V22', 'V23' ),
new = c('type',
'cap_shape',
'cap_surface',
'cap_color',
'bruises',
'odor',
'gill_attachment',
'gill_spacing',
'gill_size',
'gill_color',
'stalk_shape',
'stalk_root',
'stalk_surface_above_ring',
'stalk_surface_below_ring',
'stalk_color_above_ring',
'stalk_color_below_ring',
'veil_type',
'veil_color',
'ring_number',
'ring_type',
'spore_print_color',
'population',
'habitat'))
head(reformatted_file, 10)
## type cap_shape cap_surface cap_color bruises odor gill_attachment
## 1 p x s n t p f
## 2 e x s y t a f
## 3 e b s w t l f
## 4 p x y w t p f
## 5 e x s g f n f
## 6 e x y y t a f
## 7 e b s w t a f
## 8 e b y w t l f
## 9 p x y w t p f
## 10 e b s y t a f
## gill_spacing gill_size gill_color stalk_shape stalk_root
## 1 c n k e e
## 2 c b k e c
## 3 c b n e c
## 4 c n n e e
## 5 w b k t e
## 6 c b n e c
## 7 c b g e c
## 8 c b n e c
## 9 c n p e e
## 10 c b g e c
## stalk_surface_above_ring stalk_surface_below_ring
## 1 s s
## 2 s s
## 3 s s
## 4 s s
## 5 s s
## 6 s s
## 7 s s
## 8 s s
## 9 s s
## 10 s s
## stalk_color_above_ring stalk_color_below_ring veil_type veil_color
## 1 w w p w
## 2 w w p w
## 3 w w p w
## 4 w w p w
## 5 w w p w
## 6 w w p w
## 7 w w p w
## 8 w w p w
## 9 w w p w
## 10 w w p w
## ring_number ring_type spore_print_color population habitat
## 1 o p k s u
## 2 o p n n g
## 3 o p n n m
## 4 o p k s u
## 5 o e n a g
## 6 o p k n g
## 7 o p k n m
## 8 o p n s m
## 9 o p k v g
## 10 o p k s m
reformatted_file$type <- ifelse(reformatted_file$type == 'p', "poisonous",
ifelse(reformatted_file$type == 'e', "edible","unknown"))
reformatted_file$cap_shape<-mapvalues(reformatted_file$cap_shape, from=c("b","c","x","f","k","s"), to=c("bell","conical","convex","flat"," knobbed","sunken"))
reformatted_file$cap_surface<-mapvalues(reformatted_file$cap_surface, from=c("f","g","y","s"), to=c(" fibrous","grooves","scaly","smooth"))
reformatted_file$cap_color<-mapvalues(reformatted_file$cap_color, from=c("n","b","c","g","r","p","u","e","w","y"), to=c(" brown","buff","cinnamon","gray","green"," pink","purple","red","white","yellow"))
reformatted_file$bruises <-mapvalues(reformatted_file$bruises, from=c("t","f"), to=c(" bruises","no"))
reformatted_file$odor <-mapvalues(reformatted_file$odor, from=c("a","l","c","y","f","m","n","p","s"), to=c(" almond","anise","creosote","fishy","foul"," musty","none","pungent","spicy"))
reformatted_file$gill_attachment <-mapvalues(reformatted_file$gill_attachment, from=c("a","f"), to=c(" attached","free"))
reformatted_file$gill_spacing <-mapvalues(reformatted_file$gill_spacing, from=c("c","w"), to=c(" close","crowded"))
reformatted_file$gill_size <-mapvalues(reformatted_file$gill_size , from=c("b","n"), to=c(" broad","narrow"))
reformatted_file$gill_color <-mapvalues(reformatted_file$gill_color, from=c("k","n","b","h","g","w","y","r","o","p","u","e"), to=c(" black","brown","buff","chocolate","gray"," white","yellow","green","orange","pink","purple","red"))
reformatted_file$stalk_shape<-mapvalues(reformatted_file$stalk_shape, from=c("e","t"), to=c(" enlarging","tapering"))
reformatted_file$stalk_root<-mapvalues(reformatted_file$stalk_root, from=c("b","c","e","r","?"), to=c(" bulbous","club","equal","rooted","missing"))
reformatted_file$stalk_surface_above_ring<-mapvalues(reformatted_file$stalk_surface_above_ring, from=c("f","y","k","s"), to=c(" fibrous","scaly","silky","smooth"))
reformatted_file$stalk_surface_below_ring<-mapvalues(reformatted_file$stalk_surface_below_ring, from=c("f","y","k","s"), to=c(" fibrous","scaly","silky","smooth"))
reformatted_file$stalk_color_above_ring<-mapvalues(reformatted_file$stalk_color_above_ring, from=c("n","b","c","g","o","p","e","w","y"), to=c(" brown","buff","cinnamon","gray","orange"," pink","red","white","yellow"))
reformatted_file$stalk_color_below_ring<-mapvalues(reformatted_file$stalk_color_below_ring, from=c("n","b","c","g","o","p","e","w","y"), to=c(" brown","buff","cinnamon","gray","orange"," pink","red","white","yellow"))
reformatted_file$veil_type<-mapvalues(reformatted_file$veil_type, from=c("p","u"), to=c(" partial","universal"))
reformatted_file$veil_color<-mapvalues(reformatted_file$veil_color, from=c("n","o","w","y"), to=c(" brown","orange","white","yellow"))
reformatted_file$ring_number<-mapvalues(reformatted_file$ring_number, from=c("n","o","t"), to=c(" none","one","two"))
reformatted_file$ring_type<-mapvalues(reformatted_file$ring_type, from=c("c","e","f","l","n","p","s","z"), to=c(" cobwebby","evanescent","flaring","large"," none","pendant","sheathing","zone"))
reformatted_file$spore_print_color<-mapvalues(reformatted_file$spore_print_color, from=c("k","n","b","h","r","o","u","w","y"), to=c(" black","brown","buff","chocolate","green"," orange","purple","white","yellow"))
reformatted_file$population<-mapvalues(reformatted_file$population, from=c("a","c","n","s","v","y"), to=c(" abundant","clustered","numerous"," scattered","several","solitary"))
reformatted_file$habitat<-mapvalues(reformatted_file$habitat, from=c("g","l","m","p","u","w","d"), to=c(" grasses","leaves","meadows","paths"," urban","waste","woods"))
head(reformatted_file,5)
## type cap_shape cap_surface cap_color bruises odor
## 1 poisonous convex smooth brown bruises pungent
## 2 edible convex smooth yellow bruises almond
## 3 edible bell smooth white bruises anise
## 4 poisonous convex scaly white bruises pungent
## 5 edible convex smooth gray no none
## gill_attachment gill_spacing gill_size gill_color stalk_shape stalk_root
## 1 free close narrow black enlarging equal
## 2 free close broad black enlarging club
## 3 free close broad brown enlarging club
## 4 free close narrow brown enlarging equal
## 5 free crowded broad black tapering equal
## stalk_surface_above_ring stalk_surface_below_ring stalk_color_above_ring
## 1 smooth smooth white
## 2 smooth smooth white
## 3 smooth smooth white
## 4 smooth smooth white
## 5 smooth smooth white
## stalk_color_below_ring veil_type veil_color ring_number ring_type
## 1 white partial white one pendant
## 2 white partial white one pendant
## 3 white partial white one pendant
## 4 white partial white one pendant
## 5 white partial white one evanescent
## spore_print_color population habitat
## 1 black scattered urban
## 2 brown numerous grasses
## 3 brown numerous meadows
## 4 black scattered urban
## 5 brown abundant grasses
Poisionous_df <- reformatted_file[reformatted_file$type == "poisonous",]
head(Poisionous_df, 5)
## type cap_shape cap_surface cap_color bruises odor
## 1 poisonous convex smooth brown bruises pungent
## 4 poisonous convex scaly white bruises pungent
## 9 poisonous convex scaly white bruises pungent
## 14 poisonous convex scaly white bruises pungent
## 18 poisonous convex smooth brown bruises pungent
## gill_attachment gill_spacing gill_size gill_color stalk_shape
## 1 free close narrow black enlarging
## 4 free close narrow brown enlarging
## 9 free close narrow pink enlarging
## 14 free close narrow black enlarging
## 18 free close narrow brown enlarging
## stalk_root stalk_surface_above_ring stalk_surface_below_ring
## 1 equal smooth smooth
## 4 equal smooth smooth
## 9 equal smooth smooth
## 14 equal smooth smooth
## 18 equal smooth smooth
## stalk_color_above_ring stalk_color_below_ring veil_type veil_color
## 1 white white partial white
## 4 white white partial white
## 9 white white partial white
## 14 white white partial white
## 18 white white partial white
## ring_number ring_type spore_print_color population habitat
## 1 one pendant black scattered urban
## 4 one pendant black scattered urban
## 9 one pendant black several grasses
## 14 one pendant brown several urban
## 18 one pendant black scattered grasses
Edible_df <- reformatted_file[reformatted_file$type == "edible",]
head(Edible_df,5)
## type cap_shape cap_surface cap_color bruises odor gill_attachment
## 2 edible convex smooth yellow bruises almond free
## 3 edible bell smooth white bruises anise free
## 5 edible convex smooth gray no none free
## 6 edible convex scaly yellow bruises almond free
## 7 edible bell smooth white bruises almond free
## gill_spacing gill_size gill_color stalk_shape stalk_root
## 2 close broad black enlarging club
## 3 close broad brown enlarging club
## 5 crowded broad black tapering equal
## 6 close broad brown enlarging club
## 7 close broad gray enlarging club
## stalk_surface_above_ring stalk_surface_below_ring stalk_color_above_ring
## 2 smooth smooth white
## 3 smooth smooth white
## 5 smooth smooth white
## 6 smooth smooth white
## 7 smooth smooth white
## stalk_color_below_ring veil_type veil_color ring_number ring_type
## 2 white partial white one pendant
## 3 white partial white one pendant
## 5 white partial white one evanescent
## 6 white partial white one pendant
## 7 white partial white one pendant
## spore_print_color population habitat
## 2 brown numerous grasses
## 3 brown numerous meadows
## 5 brown abundant grasses
## 6 black numerous grasses
## 7 black numerous meadows