Data 607

Packages used

suppressMessages(library(dplyr))
suppressMessages(library(tidyr))
suppressMessages(library(DT))

Week 1 Assignment - Loading Data into a a Data frame

#Reading the url into a variable
UCIurl <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
#Creating a full data set
mashroom.FullSet <- read.table(file=UCIurl,header = FALSE,sep = ',',stringsAsFactors = FALSE)
#Raw Data Set top 10
head(mashroom.FullSet,10)
##    V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1   p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2   e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3   e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4   p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5   e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6   e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 7   e  b  s  w  t  a  f  c  b   g   e   c   s   s   w   w   p   w   o   p
## 8   e  b  y  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 9   p  x  y  w  t  p  f  c  n   p   e   e   s   s   w   w   p   w   o   p
## 10  e  b  s  y  t  a  f  c  b   g   e   c   s   s   w   w   p   w   o   p
##    V21 V22 V23
## 1    k   s   u
## 2    n   n   g
## 3    n   n   m
## 4    k   s   u
## 5    n   a   g
## 6    k   n   g
## 7    k   n   m
## 8    n   s   m
## 9    k   v   g
## 10   k   s   m
#Changing attribute name to user friendly names

names(mashroom.FullSet) <- c("class","cap-shape","cap-surface","cap-color","bruises?","odor","gill-attachment","gill-spacing","gill-size","gill-color","stalk-shape","stalk-root","stalk-surface-above-ring","stalk-surface-below-ring","stalk-color-above-ring","stalk-color-below-ring","veil-type","veil-color","ring-number","ring-type","spore-print-color","population","habitat")
mashroom.FullSet <- tbl_df(mashroom.FullSet)
head(mashroom.FullSet,10)
## # A tibble: 10 x 23
##    class `cap-shape` `cap-surface` `cap-color` `bruises?` odor 
##    <chr> <chr>       <chr>         <chr>       <chr>      <chr>
##  1 p     x           s             n           t          p    
##  2 e     x           s             y           t          a    
##  3 e     b           s             w           t          l    
##  4 p     x           y             w           t          p    
##  5 e     x           s             g           f          n    
##  6 e     x           y             y           t          a    
##  7 e     b           s             w           t          a    
##  8 e     b           y             w           t          l    
##  9 p     x           y             w           t          p    
## 10 e     b           s             y           t          a    
## # ... with 17 more variables: `gill-attachment` <chr>,
## #   `gill-spacing` <chr>, `gill-size` <chr>, `gill-color` <chr>,
## #   `stalk-shape` <chr>, `stalk-root` <chr>,
## #   `stalk-surface-above-ring` <chr>, `stalk-surface-below-ring` <chr>,
## #   `stalk-color-above-ring` <chr>, `stalk-color-below-ring` <chr>,
## #   `veil-type` <chr>, `veil-color` <chr>, `ring-number` <chr>,
## #   `ring-type` <chr>, `spore-print-color` <chr>, population <chr>,
## #   habitat <chr>
# Change the codes in the attributes to more meaningful names
mashroom.FullSet$class <- c("p"="Poinious","e"="Edible")
mashroom.FullSet$cap_shape <- c("b"="bell","c"="Conical","x"="Convex","f"="flat","k"="knobbed","s"="suken")
mashroom.FullSet$cap_surface <- c("f"="Fibrous","g"="grooves","y"="scaly","s"="smooth")
mashroom.FullSet$cap_color <- c("n"="brown","b"="buff","c"="cinnamon","g"="gray")
mashroom.FullSet$cap_color <- c("r"="green","p"="pink","u"="purple","e"="red")
mashroom.FullSet$cap_color <- c("w"="white","y"="yellow")
mashroom.FullSet$bruises <- c("t"="bruises","f"="no")
mashroom.FullSet$odor <- c("a"="almond","l"="anise","c"="creosote","y"="fishy")
mashroom.FullSet$odor <- c("f"="foul","m"="musty","n"="none","p"="pungent")
mashroom.FullSet$odor <- c("s"="spicy")
mashroom.FullSet$gill_attachment <- c("a"="attached","d"="descending","f"="free","n"="notched")
mashroom.FullSet$veil_type <- c("p"="partial","u"="universal")
mashroom.FullSet$ring_number <- c("c"="attached","e"="descending","f"="free","l"="notched","f"="free","l"="notched")
mashroom.FullSet$population <- c("a"="abundant","c"="clustered","n"="numerous","s"="scattered","v"="several","y"="solitary")
mashroom.FullSet$habitat <- c("g"="grasses","l"="leaves","m"="meadows","p"="paths")
# Data sample (subsetting)
mashroom.SubSet <- subset(mashroom.FullSet,veil_type=="partial",select = c("class","cap_shape","odor","cap-surface","cap-color","bruises","odor","gill_attachment","veil_type","ring_number"))
mashroom.SubSet
## # A tibble: 4,062 x 10
##    class cap_shape odor  `cap-surface` `cap-color` bruises odor 
##    <chr> <chr>     <chr> <chr>         <chr>       <chr>   <chr>
##  1 Poin~ bell      spicy s             n           bruises spicy
##  2 Poin~ Convex    spicy s             w           bruises spicy
##  3 Poin~ knobbed   spicy s             g           bruises spicy
##  4 Poin~ bell      spicy s             w           bruises spicy
##  5 Poin~ Convex    spicy y             w           bruises spicy
##  6 Poin~ knobbed   spicy y             y           bruises spicy
##  7 Poin~ bell      spicy s             y           bruises spicy
##  8 Poin~ Convex    spicy f             n           bruises spicy
##  9 Poin~ knobbed   spicy f             w           bruises spicy
## 10 Poin~ bell      spicy y             w           bruises spicy
## # ... with 4,052 more rows, and 3 more variables: gill_attachment <chr>,
## #   veil_type <chr>, ring_number <chr>