knitr::opts_chunk$set(echo = TRUE)
install.packages("RCurl", repos = "http://cran.us.r-project.org")
## package 'RCurl' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\jenny_000\AppData\Local\Temp\RtmpQPtlrc\downloaded_packages
url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
require("RCurl")
## Loading required package: RCurl
## Warning: package 'RCurl' was built under R version 3.3.2
## Loading required package: bitops
l_data <- getURL(url)
# getwd()
x <- read.csv(text=l_data,header=F,sep=",")
head(x)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
str(x)
## 'data.frame': 8124 obs. of 23 variables:
## $ V1 : Factor w/ 2 levels "e","p": 2 1 1 2 1 1 1 1 2 1 ...
## $ V2 : Factor w/ 6 levels "b","c","f","k",..: 6 6 1 6 6 6 1 1 6 1 ...
## $ V3 : Factor w/ 4 levels "f","g","s","y": 3 3 3 4 3 4 3 4 4 3 ...
## $ V4 : Factor w/ 10 levels "b","c","e","g",..: 5 10 9 9 4 10 9 9 9 10 ...
## $ V5 : Factor w/ 2 levels "f","t": 2 2 2 2 1 2 2 2 2 2 ...
## $ V6 : Factor w/ 9 levels "a","c","f","l",..: 7 1 4 7 6 1 1 4 7 1 ...
## $ V7 : Factor w/ 2 levels "a","f": 2 2 2 2 2 2 2 2 2 2 ...
## $ V8 : Factor w/ 2 levels "c","w": 1 1 1 1 2 1 1 1 1 1 ...
## $ V9 : Factor w/ 2 levels "b","n": 2 1 1 2 1 1 1 1 2 1 ...
## $ V10: Factor w/ 12 levels "b","e","g","h",..: 5 5 6 6 5 6 3 6 8 3 ...
## $ V11: Factor w/ 2 levels "e","t": 1 1 1 1 2 1 1 1 1 1 ...
## $ V12: Factor w/ 5 levels "?","b","c","e",..: 4 3 3 4 4 3 3 3 4 3 ...
## $ V13: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ V14: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ V15: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ V16: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ V17: Factor w/ 1 level "p": 1 1 1 1 1 1 1 1 1 1 ...
## $ V18: Factor w/ 4 levels "n","o","w","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ V19: Factor w/ 3 levels "n","o","t": 2 2 2 2 2 2 2 2 2 2 ...
## $ V20: Factor w/ 5 levels "e","f","l","n",..: 5 5 5 5 1 5 5 5 5 5 ...
## $ V21: Factor w/ 9 levels "b","h","k","n",..: 3 4 4 3 4 3 3 4 3 3 ...
## $ V22: Factor w/ 6 levels "a","c","n","s",..: 4 3 3 4 1 3 3 4 5 4 ...
## $ V23: Factor w/ 7 levels "d","g","l","m",..: 6 2 4 6 2 2 4 4 2 4 ...
# View(x)
# class, odor, habitat
Y <- subset(x,select = c(V1,V6,V22,V23))
class(Y)
## [1] "data.frame"
head(Y)
## V1 V6 V22 V23
## 1 p p s u
## 2 e a n g
## 3 e l n m
## 4 p p s u
## 5 e n a g
## 6 e a n g
colnames(Y) <- c("V1"="Class", "V6"="Odor", "V22"="Population","V23"="Habitat")
head(Y)
## Class Odor Population Habitat
## 1 p p s u
## 2 e a n g
## 3 e l n m
## 4 p p s u
## 5 e n a g
## 6 e a n g
# reference url: http://rstudio-pubs-static.s3.amazonaws.com/92398_6df0ed45b14b4b599d7fa48fa6039b6b.html
levels(Y$'Class') <- c(levels(Y$'Class'), "edible", "poison")
Y$'Class'[Y$'Class' == "e"] <- "edible"
Y$'Class'[Y$'Class' == "p"] <- "poison"
levels(Y$'Odor') <- c(levels(Y$'Odor'), "almond", "anise", "creosote", "fishy", "foul", "musty", "none", "pungent", "spicy")
Y$'Odor'[Y$'Odor' == "a"] <- "almond"
Y$'Odor'[Y$'Odor' == "l"] <- "anise"
Y$'Odor'[Y$'Odor' == "c"] <- "creosote"
Y$'Odor'[Y$'Odor' == "y"] <- "fishy"
Y$'Odor'[Y$'Odor' == "f"] <- "foul"
Y$'Odor'[Y$'Odor' == "m"] <- "musty"
Y$'Odor'[Y$'Odor' == "n"] <- "none"
Y$'Odor'[Y$'Odor' == "p"] <- "pungent"
Y$'Odor'[Y$'Odor' == "s"] <- "spicy"
levels(Y$'Population') <- c(levels(Y$'Population'), "abundant", "clustered", "numerous", "scattered", "several", "solitary")
Y$'Population'[Y$'Population' == "a"] <- "abundant"
Y$'Population'[Y$'Populationr' == "c"] <- "clustered"
Y$'Population'[Y$'Population' == "n"] <- "numerous"
Y$'Population'[Y$'Population' == "s"] <- "scattered"
Y$'Population'[Y$'Population' == "v"] <- "several"
Y$'Population'[Y$'Population' == "y"] <- "solitary"
levels(Y$'Habitat') <- c(levels(Y$'Habitat'), "grasses", "leaves", "meadows", "paths", "urban", "waste", "woods")
Y$'Habitat'[Y$'Habitat' == "g"] <- "grasses"
Y$'Habitat'[Y$'Habitat' == "l"] <- "leaves"
Y$'Habitat'[Y$'Habitat' == "m"] <- "meadows"
Y$'Habitat'[Y$'Habitat' == "p"] <- "paths"
Y$'Habitat'[Y$'Habitat' == "u"] <- "urban"
Y$'Habitat'[Y$'Habitat' == "w"] <- "waste"
Y$'Habitat'[Y$'Habitat' == "d"] <- "woods"
head(Y)
## Class Odor Population Habitat
## 1 poison pungent scattered urban
## 2 edible almond numerous grasses
## 3 edible anise numerous meadows
## 4 poison pungent scattered urban
## 5 edible none abundant grasses
## 6 edible almond numerous grasses