This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
library(RCurl)
## Loading required package: bitops
fileURL <- "https://raw.githubusercontent.com/rajk11040/CUNY607/master/agaricus-lepiota.data"
mushroomDF <- read.csv(text = getURL(fileURL), header = TRUE, sep = ",")
summary(mushroomDF)
## p x s n t p.1
## e:4208 b: 452 f:2320 n :2283 f:4748 n :3528
## p:3915 c: 4 g: 4 g :1840 t:3375 f :2160
## f:3152 s:2555 e :1500 s : 576
## k: 828 y:3244 y :1072 y : 576
## s: 32 w :1040 a : 400
## x:3655 b : 168 l : 400
## (Other): 220 (Other): 483
## f c n.1 k e e.1 s.1
## a: 210 c:6811 b:5612 b :1728 e:3515 ?:2480 f: 552
## f:7913 w:1312 n:2511 p :1492 t:4608 b:3776 k:2372
## w :1202 c: 556 s:5175
## n :1048 e:1119 y: 24
## g : 752 r: 192
## h : 732
## (Other):1169
## s.2 w w.1 p.2 w.2 o
## f: 600 w :4463 w :4383 p:8123 n: 96 n: 36
## k:2304 p :1872 p :1872 o: 96 o:7487
## s:4935 g : 576 g : 576 w:7923 t: 600
## y: 284 n : 448 n : 512 y: 8
## b : 432 b : 432
## o : 192 o : 192
## (Other): 140 (Other): 156
## p.3 k.1 s.3 u
## e:2776 w :2388 a: 384 d:3148
## f: 48 n :1968 c: 340 g:2148
## l:1296 k :1871 n: 400 l: 832
## n: 36 h :1632 s:1247 m: 292
## p:3967 r : 72 v:4040 p:1144
## b : 48 y:1712 u: 367
## (Other): 144 w: 192
names(mushroomDF) <- c("class",
"capShape",
"capSurface",
"capColor",
"buises",
"odor",
"gillAttachment",
"gillSpacing",
"gillSize",
"gillColor",
"stalkShape",
"stalkRoot",
"stalkSurfaceAboveRing",
"stalkSurfaceBelowRing",
"stalkColorAboveRing",
"stalkColorBelowRing",
"veilType",
"veilColor",
"ringNumber",
"ringType",
"sporePrintColor",
"population",
"habitat"
)
str(mushroomDF)
## 'data.frame': 8123 obs. of 23 variables:
## $ class : Factor w/ 2 levels "e","p": 1 1 2 1 1 1 1 2 1 1 ...
## $ capShape : Factor w/ 6 levels "b","c","f","k",..: 6 1 6 6 6 1 1 6 1 6 ...
## $ capSurface : Factor w/ 4 levels "f","g","s","y": 3 3 4 3 4 3 4 4 3 4 ...
## $ capColor : Factor w/ 10 levels "b","c","e","g",..: 10 9 9 4 10 9 9 9 10 10 ...
## $ buises : Factor w/ 2 levels "f","t": 2 2 2 1 2 2 2 2 2 2 ...
## $ odor : Factor w/ 9 levels "a","c","f","l",..: 1 4 7 6 1 1 4 7 1 4 ...
## $ gillAttachment : Factor w/ 2 levels "a","f": 2 2 2 2 2 2 2 2 2 2 ...
## $ gillSpacing : Factor w/ 2 levels "c","w": 1 1 1 2 1 1 1 1 1 1 ...
## $ gillSize : Factor w/ 2 levels "b","n": 1 1 2 1 1 1 1 2 1 1 ...
## $ gillColor : Factor w/ 12 levels "b","e","g","h",..: 5 6 6 5 6 3 6 8 3 3 ...
## $ stalkShape : Factor w/ 2 levels "e","t": 1 1 1 2 1 1 1 1 1 1 ...
## $ stalkRoot : Factor w/ 5 levels "?","b","c","e",..: 3 3 4 4 3 3 3 4 3 3 ...
## $ stalkSurfaceAboveRing: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ stalkSurfaceBelowRing: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ stalkColorAboveRing : Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ stalkColorBelowRing : Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ veilType : Factor w/ 1 level "p": 1 1 1 1 1 1 1 1 1 1 ...
## $ veilColor : Factor w/ 4 levels "n","o","w","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ ringNumber : Factor w/ 3 levels "n","o","t": 2 2 2 2 2 2 2 2 2 2 ...
## $ ringType : Factor w/ 5 levels "e","f","l","n",..: 5 5 5 1 5 5 5 5 5 5 ...
## $ sporePrintColor : Factor w/ 9 levels "b","h","k","n",..: 4 4 3 4 3 3 4 3 3 4 ...
## $ population : Factor w/ 6 levels "a","c","n","s",..: 3 3 4 1 3 3 4 5 4 3 ...
## $ habitat : Factor w/ 7 levels "d","g","l","m",..: 2 4 6 2 2 4 4 2 4 2 ...
#subset of the data frame
unique(mushroomDF$class)
## [1] e p
## Levels: e p
edibleMushroomDF <- subset(mushroomDF,
mushroomDF$class=="e",
select = c(class,
capShape,
capSurface,
population,
habitat
)
)
dim(edibleMushroomDF)
## [1] 4208 5
# replace values with more readible
names(edibleMushroomDF)
## [1] "class" "capShape" "capSurface" "population" "habitat"
unique(edibleMushroomDF$class)
## [1] e
## Levels: e p
# Update class from code to text
edibleMushroomDF$class <- as.character(edibleMushroomDF$class)
edibleMushroomDF$class[edibleMushroomDF$class == "e"] <- "edible"
edibleMushroomDF$class <- as.factor(edibleMushroomDF$class)
# Update capShape
unique(edibleMushroomDF$capShape)
## [1] x b s f k
## Levels: b c f k s x
edibleMushroomDF$capShape <- as.character(edibleMushroomDF$capShape)
edibleMushroomDF$capShape[edibleMushroomDF$capShape == "b"] <- "bell"
edibleMushroomDF$capShape[edibleMushroomDF$capShape == "c"] <- "conical"
edibleMushroomDF$capShape[edibleMushroomDF$capShape == "f"] <- "flat"
edibleMushroomDF$capShape[edibleMushroomDF$capShape == "k"] <- "knobbed"
edibleMushroomDF$capShape[edibleMushroomDF$capShape == "s"] <- "sunken"
edibleMushroomDF$capShape[edibleMushroomDF$capShape == "x"] <- "convex"
edibleMushroomDF$capShape <- as.factor(edibleMushroomDF$capShape)
# Update capSurface
unique(edibleMushroomDF$capSurface)
## [1] s y f
## Levels: f g s y
edibleMushroomDF$capSurface <- as.character(edibleMushroomDF$capSurface)
edibleMushroomDF$capSurface[edibleMushroomDF$capSurface == "f"] <- "fibrous"
edibleMushroomDF$capSurface[edibleMushroomDF$capSurface == "g"] <- "grooves"
edibleMushroomDF$capSurface[edibleMushroomDF$capSurface == "s"] <- "smooth"
edibleMushroomDF$capSurface[edibleMushroomDF$capSurface == "y"] <- "scaly"
edibleMushroomDF$capSurface <- as.factor(edibleMushroomDF$capSurface)
# Update population from code to text
edibleMushroomDF$population <- as.character(edibleMushroomDF$population)
edibleMushroomDF$population[edibleMushroomDF$population == "a"] <- "abundant"
edibleMushroomDF$population[edibleMushroomDF$population == "c"] <- "clustered"
edibleMushroomDF$population[edibleMushroomDF$population == "n"] <- "numerous"
edibleMushroomDF$population[edibleMushroomDF$population == "s"] <- "scattered"
edibleMushroomDF$population[edibleMushroomDF$population == "v"] <- "several"
edibleMushroomDF$population[edibleMushroomDF$population == "y"] <- "solitary"
edibleMushroomDF$population <- as.factor(edibleMushroomDF$population)
# Update habitat
# Update population from code to text
edibleMushroomDF$habitat <- as.character(edibleMushroomDF$habitat)
edibleMushroomDF$habitat[edibleMushroomDF$habitat == "g"] <- "grasses"
edibleMushroomDF$habitat[edibleMushroomDF$habitat == "m"] <- "meadows"
edibleMushroomDF$habitat[edibleMushroomDF$habitat == "u"] <- "urban"
edibleMushroomDF$habitat[edibleMushroomDF$habitat == "d"] <- "woods"
edibleMushroomDF$habitat[edibleMushroomDF$habitat == "p"] <- "paths"
edibleMushroomDF$habitat[edibleMushroomDF$habitat == "w"] <- "waste"
edibleMushroomDF$habitat[edibleMushroomDF$habitat == "l"] <- "leaves"
edibleMushroomDF$habitat <- as.factor(edibleMushroomDF$habitat)
unique(edibleMushroomDF$class)
## [1] edible
## Levels: edible
unique(edibleMushroomDF$capShape)
## [1] convex bell sunken flat knobbed
## Levels: bell convex flat knobbed sunken
unique(edibleMushroomDF$capSurface)
## [1] smooth scaly fibrous
## Levels: fibrous scaly smooth
unique(edibleMushroomDF$habitat)
## [1] grasses meadows urban woods paths waste leaves
## Levels: grasses leaves meadows paths urban waste woods
unique(edibleMushroomDF$population)
## [1] numerous abundant scattered solitary several clustered
## Levels: abundant clustered numerous scattered several solitary
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.