knitr::opts_chunk$set(echo = TRUE)

install.packages("RCurl", repos = "http://cran.us.r-project.org")
## package 'RCurl' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\jenny_000\AppData\Local\Temp\RtmpQPtlrc\downloaded_packages
url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"

require("RCurl")
## Loading required package: RCurl
## Warning: package 'RCurl' was built under R version 3.3.2
## Loading required package: bitops
l_data <- getURL(url)
# getwd()
x <- read.csv(text=l_data,header=F,sep=",")
head(x)
##   V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1  p  x  s  n  t  p  f  c  n   k   e   e   s   s   w   w   p   w   o   p
## 2  e  x  s  y  t  a  f  c  b   k   e   c   s   s   w   w   p   w   o   p
## 3  e  b  s  w  t  l  f  c  b   n   e   c   s   s   w   w   p   w   o   p
## 4  p  x  y  w  t  p  f  c  n   n   e   e   s   s   w   w   p   w   o   p
## 5  e  x  s  g  f  n  f  w  b   k   t   e   s   s   w   w   p   w   o   e
## 6  e  x  y  y  t  a  f  c  b   n   e   c   s   s   w   w   p   w   o   p
##   V21 V22 V23
## 1   k   s   u
## 2   n   n   g
## 3   n   n   m
## 4   k   s   u
## 5   n   a   g
## 6   k   n   g
str(x)
## 'data.frame':    8124 obs. of  23 variables:
##  $ V1 : Factor w/ 2 levels "e","p": 2 1 1 2 1 1 1 1 2 1 ...
##  $ V2 : Factor w/ 6 levels "b","c","f","k",..: 6 6 1 6 6 6 1 1 6 1 ...
##  $ V3 : Factor w/ 4 levels "f","g","s","y": 3 3 3 4 3 4 3 4 4 3 ...
##  $ V4 : Factor w/ 10 levels "b","c","e","g",..: 5 10 9 9 4 10 9 9 9 10 ...
##  $ V5 : Factor w/ 2 levels "f","t": 2 2 2 2 1 2 2 2 2 2 ...
##  $ V6 : Factor w/ 9 levels "a","c","f","l",..: 7 1 4 7 6 1 1 4 7 1 ...
##  $ V7 : Factor w/ 2 levels "a","f": 2 2 2 2 2 2 2 2 2 2 ...
##  $ V8 : Factor w/ 2 levels "c","w": 1 1 1 1 2 1 1 1 1 1 ...
##  $ V9 : Factor w/ 2 levels "b","n": 2 1 1 2 1 1 1 1 2 1 ...
##  $ V10: Factor w/ 12 levels "b","e","g","h",..: 5 5 6 6 5 6 3 6 8 3 ...
##  $ V11: Factor w/ 2 levels "e","t": 1 1 1 1 2 1 1 1 1 1 ...
##  $ V12: Factor w/ 5 levels "?","b","c","e",..: 4 3 3 4 4 3 3 3 4 3 ...
##  $ V13: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
##  $ V14: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
##  $ V15: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $ V16: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $ V17: Factor w/ 1 level "p": 1 1 1 1 1 1 1 1 1 1 ...
##  $ V18: Factor w/ 4 levels "n","o","w","y": 3 3 3 3 3 3 3 3 3 3 ...
##  $ V19: Factor w/ 3 levels "n","o","t": 2 2 2 2 2 2 2 2 2 2 ...
##  $ V20: Factor w/ 5 levels "e","f","l","n",..: 5 5 5 5 1 5 5 5 5 5 ...
##  $ V21: Factor w/ 9 levels "b","h","k","n",..: 3 4 4 3 4 3 3 4 3 3 ...
##  $ V22: Factor w/ 6 levels "a","c","n","s",..: 4 3 3 4 1 3 3 4 5 4 ...
##  $ V23: Factor w/ 7 levels "d","g","l","m",..: 6 2 4 6 2 2 4 4 2 4 ...
# View(x)

# class, odor, habitat
Y <- subset(x,select = c(V1,V6,V22,V23))
class(Y)
## [1] "data.frame"
head(Y)
##   V1 V6 V22 V23
## 1  p  p   s   u
## 2  e  a   n   g
## 3  e  l   n   m
## 4  p  p   s   u
## 5  e  n   a   g
## 6  e  a   n   g
colnames(Y) <- c("V1"="Class", "V6"="Odor", "V22"="Population","V23"="Habitat")
head(Y)
##   Class Odor Population Habitat
## 1     p    p          s       u
## 2     e    a          n       g
## 3     e    l          n       m
## 4     p    p          s       u
## 5     e    n          a       g
## 6     e    a          n       g
# reference url: http://rstudio-pubs-static.s3.amazonaws.com/92398_6df0ed45b14b4b599d7fa48fa6039b6b.html

levels(Y$'Class') <- c(levels(Y$'Class'), "edible", "poison")
Y$'Class'[Y$'Class' == "e"] <- "edible"
Y$'Class'[Y$'Class' == "p"] <- "poison"

levels(Y$'Odor') <- c(levels(Y$'Odor'), "almond", "anise", "creosote", "fishy", "foul", "musty", "none", "pungent", "spicy")
Y$'Odor'[Y$'Odor' == "a"] <- "almond"
Y$'Odor'[Y$'Odor' == "l"] <- "anise"
Y$'Odor'[Y$'Odor' == "c"] <- "creosote"
Y$'Odor'[Y$'Odor' == "y"] <- "fishy"
Y$'Odor'[Y$'Odor' == "f"] <- "foul"
Y$'Odor'[Y$'Odor' == "m"] <- "musty"
Y$'Odor'[Y$'Odor' == "n"] <- "none"
Y$'Odor'[Y$'Odor' == "p"] <- "pungent"
Y$'Odor'[Y$'Odor' == "s"] <- "spicy"


levels(Y$'Population') <- c(levels(Y$'Population'), "abundant", "clustered", "numerous", "scattered", "several", "solitary")
Y$'Population'[Y$'Population' == "a"] <- "abundant"
Y$'Population'[Y$'Populationr' == "c"] <- "clustered"
Y$'Population'[Y$'Population' == "n"] <- "numerous"
Y$'Population'[Y$'Population' == "s"] <- "scattered"
Y$'Population'[Y$'Population' == "v"] <- "several"
Y$'Population'[Y$'Population' == "y"] <- "solitary"

levels(Y$'Habitat') <- c(levels(Y$'Habitat'), "grasses", "leaves", "meadows", "paths", "urban", "waste", "woods")
Y$'Habitat'[Y$'Habitat' == "g"] <- "grasses"
Y$'Habitat'[Y$'Habitat' == "l"] <- "leaves"
Y$'Habitat'[Y$'Habitat' == "m"] <- "meadows"
Y$'Habitat'[Y$'Habitat' == "p"] <- "paths"
Y$'Habitat'[Y$'Habitat' == "u"] <- "urban"
Y$'Habitat'[Y$'Habitat' == "w"] <- "waste"
Y$'Habitat'[Y$'Habitat' == "d"] <- "woods"

head(Y)
##    Class    Odor Population Habitat
## 1 poison pungent  scattered   urban
## 2 edible  almond   numerous grasses
## 3 edible   anise   numerous meadows
## 4 poison pungent  scattered   urban
## 5 edible    none   abundant grasses
## 6 edible  almond   numerous grasses