R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

Rajesh Kumar CUNY 607 Assignment 1

CUNY 607 Assignment 1 - Mushroom Data Analysis

library(RCurl)
## Loading required package: bitops
fileURL <- "https://raw.githubusercontent.com/rajk11040/CUNY607/master/agaricus-lepiota.data"
mushroomDF <- read.csv(text = getURL(fileURL), header = TRUE, sep = ",")

summary(mushroomDF)
##  p        x        s              n        t             p.1      
##  e:4208   b: 452   f:2320   n      :2283   f:4748   n      :3528  
##  p:3915   c:   4   g:   4   g      :1840   t:3375   f      :2160  
##           f:3152   s:2555   e      :1500            s      : 576  
##           k: 828   y:3244   y      :1072            y      : 576  
##           s:  32            w      :1040            a      : 400  
##           x:3655            b      : 168            l      : 400  
##                             (Other): 220            (Other): 483  
##  f        c        n.1            k        e        e.1      s.1     
##  a: 210   c:6811   b:5612   b      :1728   e:3515   ?:2480   f: 552  
##  f:7913   w:1312   n:2511   p      :1492   t:4608   b:3776   k:2372  
##                             w      :1202            c: 556   s:5175  
##                             n      :1048            e:1119   y:  24  
##                             g      : 752            r: 192           
##                             h      : 732                             
##                             (Other):1169                             
##  s.2            w             w.1       p.2      w.2      o       
##  f: 600   w      :4463   w      :4383   p:8123   n:  96   n:  36  
##  k:2304   p      :1872   p      :1872            o:  96   o:7487  
##  s:4935   g      : 576   g      : 576            w:7923   t: 600  
##  y: 284   n      : 448   n      : 512            y:   8           
##           b      : 432   b      : 432                             
##           o      : 192   o      : 192                             
##           (Other): 140   (Other): 156                             
##  p.3           k.1       s.3      u       
##  e:2776   w      :2388   a: 384   d:3148  
##  f:  48   n      :1968   c: 340   g:2148  
##  l:1296   k      :1871   n: 400   l: 832  
##  n:  36   h      :1632   s:1247   m: 292  
##  p:3967   r      :  72   v:4040   p:1144  
##           b      :  48   y:1712   u: 367  
##           (Other): 144            w: 192
names(mushroomDF) <- c("class", 
                       "capShape", 
                       "capSurface", 
                       "capColor", 
                       "buises", 
                       "odor", 
                       "gillAttachment", 
                       "gillSpacing", 
                       "gillSize", 
                       "gillColor", 
                       "stalkShape", 
                       "stalkRoot", 
                       "stalkSurfaceAboveRing", 
                       "stalkSurfaceBelowRing", 
                       "stalkColorAboveRing", 
                       "stalkColorBelowRing", 
                       "veilType", 
                       "veilColor", 
                       "ringNumber", 
                       "ringType", 
                       "sporePrintColor", 
                       "population", 
                       "habitat"
                       )

str(mushroomDF)
## 'data.frame':    8123 obs. of  23 variables:
##  $ class                : Factor w/ 2 levels "e","p": 1 1 2 1 1 1 1 2 1 1 ...
##  $ capShape             : Factor w/ 6 levels "b","c","f","k",..: 6 1 6 6 6 1 1 6 1 6 ...
##  $ capSurface           : Factor w/ 4 levels "f","g","s","y": 3 3 4 3 4 3 4 4 3 4 ...
##  $ capColor             : Factor w/ 10 levels "b","c","e","g",..: 10 9 9 4 10 9 9 9 10 10 ...
##  $ buises               : Factor w/ 2 levels "f","t": 2 2 2 1 2 2 2 2 2 2 ...
##  $ odor                 : Factor w/ 9 levels "a","c","f","l",..: 1 4 7 6 1 1 4 7 1 4 ...
##  $ gillAttachment       : Factor w/ 2 levels "a","f": 2 2 2 2 2 2 2 2 2 2 ...
##  $ gillSpacing          : Factor w/ 2 levels "c","w": 1 1 1 2 1 1 1 1 1 1 ...
##  $ gillSize             : Factor w/ 2 levels "b","n": 1 1 2 1 1 1 1 2 1 1 ...
##  $ gillColor            : Factor w/ 12 levels "b","e","g","h",..: 5 6 6 5 6 3 6 8 3 3 ...
##  $ stalkShape           : Factor w/ 2 levels "e","t": 1 1 1 2 1 1 1 1 1 1 ...
##  $ stalkRoot            : Factor w/ 5 levels "?","b","c","e",..: 3 3 4 4 3 3 3 4 3 3 ...
##  $ stalkSurfaceAboveRing: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
##  $ stalkSurfaceBelowRing: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
##  $ stalkColorAboveRing  : Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $ stalkColorBelowRing  : Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $ veilType             : Factor w/ 1 level "p": 1 1 1 1 1 1 1 1 1 1 ...
##  $ veilColor            : Factor w/ 4 levels "n","o","w","y": 3 3 3 3 3 3 3 3 3 3 ...
##  $ ringNumber           : Factor w/ 3 levels "n","o","t": 2 2 2 2 2 2 2 2 2 2 ...
##  $ ringType             : Factor w/ 5 levels "e","f","l","n",..: 5 5 5 1 5 5 5 5 5 5 ...
##  $ sporePrintColor      : Factor w/ 9 levels "b","h","k","n",..: 4 4 3 4 3 3 4 3 3 4 ...
##  $ population           : Factor w/ 6 levels "a","c","n","s",..: 3 3 4 1 3 3 4 5 4 3 ...
##  $ habitat              : Factor w/ 7 levels "d","g","l","m",..: 2 4 6 2 2 4 4 2 4 2 ...
#subset of the data frame
unique(mushroomDF$class)
## [1] e p
## Levels: e p
edibleMushroomDF <-  subset(mushroomDF, 
                            mushroomDF$class=="e", 
                            select = c(class, 
                                       capShape, 
                                       capSurface, 
                                       population, 
                                       habitat
                                       )
                            )

dim(edibleMushroomDF)
## [1] 4208    5
# replace values with more readible 

names(edibleMushroomDF)
## [1] "class"      "capShape"   "capSurface" "population" "habitat"
unique(edibleMushroomDF$class)
## [1] e
## Levels: e p
# Update class from code to text
edibleMushroomDF$class <- as.character(edibleMushroomDF$class)
edibleMushroomDF$class[edibleMushroomDF$class == "e"]  <-  "edible"
edibleMushroomDF$class <- as.factor(edibleMushroomDF$class)

# Update capShape
unique(edibleMushroomDF$capShape)
## [1] x b s f k
## Levels: b c f k s x
edibleMushroomDF$capShape <- as.character(edibleMushroomDF$capShape)
edibleMushroomDF$capShape[edibleMushroomDF$capShape == "b"] <-  "bell"
edibleMushroomDF$capShape[edibleMushroomDF$capShape == "c"] <-  "conical"
edibleMushroomDF$capShape[edibleMushroomDF$capShape == "f"] <-  "flat"
edibleMushroomDF$capShape[edibleMushroomDF$capShape == "k"] <-  "knobbed"
edibleMushroomDF$capShape[edibleMushroomDF$capShape == "s"] <-  "sunken"
edibleMushroomDF$capShape[edibleMushroomDF$capShape == "x"] <-  "convex"
edibleMushroomDF$capShape <- as.factor(edibleMushroomDF$capShape)


# Update capSurface
unique(edibleMushroomDF$capSurface)
## [1] s y f
## Levels: f g s y
edibleMushroomDF$capSurface <- as.character(edibleMushroomDF$capSurface)
edibleMushroomDF$capSurface[edibleMushroomDF$capSurface == "f"] <-  "fibrous"
edibleMushroomDF$capSurface[edibleMushroomDF$capSurface == "g"] <-  "grooves"
edibleMushroomDF$capSurface[edibleMushroomDF$capSurface == "s"] <-  "smooth"
edibleMushroomDF$capSurface[edibleMushroomDF$capSurface == "y"] <-  "scaly"
edibleMushroomDF$capSurface <- as.factor(edibleMushroomDF$capSurface)


# Update population from code to text
edibleMushroomDF$population <- as.character(edibleMushroomDF$population)
edibleMushroomDF$population[edibleMushroomDF$population == "a"] <-  "abundant"
edibleMushroomDF$population[edibleMushroomDF$population == "c"] <-  "clustered"
edibleMushroomDF$population[edibleMushroomDF$population == "n"] <-  "numerous"
edibleMushroomDF$population[edibleMushroomDF$population == "s"] <-  "scattered"
edibleMushroomDF$population[edibleMushroomDF$population == "v"] <-  "several"
edibleMushroomDF$population[edibleMushroomDF$population == "y"] <-  "solitary"
edibleMushroomDF$population <- as.factor(edibleMushroomDF$population)

# Update habitat
# Update population from code to text
edibleMushroomDF$habitat <- as.character(edibleMushroomDF$habitat)
edibleMushroomDF$habitat[edibleMushroomDF$habitat == "g"] <-  "grasses"
edibleMushroomDF$habitat[edibleMushroomDF$habitat == "m"] <-  "meadows"
edibleMushroomDF$habitat[edibleMushroomDF$habitat == "u"] <-  "urban"
edibleMushroomDF$habitat[edibleMushroomDF$habitat == "d"] <-  "woods"
edibleMushroomDF$habitat[edibleMushroomDF$habitat == "p"] <-  "paths"
edibleMushroomDF$habitat[edibleMushroomDF$habitat == "w"] <-  "waste"
edibleMushroomDF$habitat[edibleMushroomDF$habitat == "l"] <-  "leaves"
edibleMushroomDF$habitat <- as.factor(edibleMushroomDF$habitat)




unique(edibleMushroomDF$class)
## [1] edible
## Levels: edible
unique(edibleMushroomDF$capShape)
## [1] convex  bell    sunken  flat    knobbed
## Levels: bell convex flat knobbed sunken
unique(edibleMushroomDF$capSurface)
## [1] smooth  scaly   fibrous
## Levels: fibrous scaly smooth
unique(edibleMushroomDF$habitat)
## [1] grasses meadows urban   woods   paths   waste   leaves 
## Levels: grasses leaves meadows paths urban waste woods
unique(edibleMushroomDF$population)
## [1] numerous  abundant  scattered solitary  several   clustered
## Levels: abundant clustered numerous scattered several solitary

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.