##Assignment1
#load our mushrooms dataset into a data table
data_mushrooms <- read.table("agaricus-lepiota.data", header = TRUE, sep = ",")
#what dimension is our dataset
dim(data_mushrooms)
## [1] 8123 23
#describe our dataset
str(data_mushrooms)
## 'data.frame': 8123 obs. of 23 variables:
## $ p : Factor w/ 2 levels "e","p": 1 1 2 1 1 1 1 2 1 1 ...
## $ x : Factor w/ 6 levels "b","c","f","k",..: 6 1 6 6 6 1 1 6 1 6 ...
## $ s : Factor w/ 4 levels "f","g","s","y": 3 3 4 3 4 3 4 4 3 4 ...
## $ n : Factor w/ 10 levels "b","c","e","g",..: 10 9 9 4 10 9 9 9 10 10 ...
## $ t : Factor w/ 2 levels "f","t": 2 2 2 1 2 2 2 2 2 2 ...
## $ p.1: Factor w/ 9 levels "a","c","f","l",..: 1 4 7 6 1 1 4 7 1 4 ...
## $ f : Factor w/ 2 levels "a","f": 2 2 2 2 2 2 2 2 2 2 ...
## $ c : Factor w/ 2 levels "c","w": 1 1 1 2 1 1 1 1 1 1 ...
## $ n.1: Factor w/ 2 levels "b","n": 1 1 2 1 1 1 1 2 1 1 ...
## $ k : Factor w/ 12 levels "b","e","g","h",..: 5 6 6 5 6 3 6 8 3 3 ...
## $ e : Factor w/ 2 levels "e","t": 1 1 1 2 1 1 1 1 1 1 ...
## $ e.1: Factor w/ 5 levels "?","b","c","e",..: 3 3 4 4 3 3 3 4 3 3 ...
## $ s.1: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ s.2: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ w : Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ w.1: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
## $ p.2: Factor w/ 1 level "p": 1 1 1 1 1 1 1 1 1 1 ...
## $ w.2: Factor w/ 4 levels "n","o","w","y": 3 3 3 3 3 3 3 3 3 3 ...
## $ o : Factor w/ 3 levels "n","o","t": 2 2 2 2 2 2 2 2 2 2 ...
## $ p.3: Factor w/ 5 levels "e","f","l","n",..: 5 5 5 1 5 5 5 5 5 5 ...
## $ k.1: Factor w/ 9 levels "b","h","k","n",..: 4 4 3 4 3 3 4 3 3 4 ...
## $ s.3: Factor w/ 6 levels "a","c","n","s",..: 3 3 4 1 3 3 4 5 4 3 ...
## $ u : Factor w/ 7 levels "d","g","l","m",..: 2 4 6 2 2 4 4 2 4 2 ...
names(data_mushrooms)
## [1] "p" "x" "s" "n" "t" "p.1" "f" "c" "n.1" "k" "e"
## [12] "e.1" "s.1" "s.2" "w" "w.1" "p.2" "w.2" "o" "p.3" "k.1" "s.3"
## [23] "u"
#read the first few lines of data
head(data_mushrooms)
## p x s n t p.1 f c n.1 k e e.1 s.1 s.2 w w.1 p.2 w.2 o p.3 k.1 s.3 u
## 1 e x s y t a f c b k e c s s w w p w o p n n g
## 2 e b s w t l f c b n e c s s w w p w o p n n m
## 3 p x y w t p f c n n e e s s w w p w o p k s u
## 4 e x s g f n f w b k t e s s w w p w o e n a g
## 5 e x y y t a f c b n e c s s w w p w o p k n g
## 6 e b s w t a f c b g e c s s w w p w o p k n m
#let's count the numbers of rows
nrow(data_mushrooms)
## [1] 8123
#looking for missing values
NROW(na.omit(data_mushrooms))
## [1] 8123
my_new_mushrooms <- cbind(data_mushrooms[,1:4], data_mushrooms[,6])
head(my_new_mushrooms)
## p x s n data_mushrooms[, 6]
## 1 e x s y a
## 2 e b s w l
## 3 p x y w p
## 4 e x s g n
## 5 e x y y a
## 6 e b s w a
#reading the names
names(my_new_mushrooms)
## [1] "p" "x" "s"
## [4] "n" "data_mushrooms[, 6]"
#let's rename all those column names
colnames(my_new_mushrooms) <- c("edibleORpoisonous","shape","surface","color","odor")
#let's read few rows of our subset now
head(my_new_mushrooms)
## edibleORpoisonous shape surface color odor
## 1 e x s y a
## 2 e b s w l
## 3 p x y w p
## 4 e x s g n
## 5 e x y y a
## 6 e b s w a
#Let's change the values of edibleORpoisonous to some thing readable
levels(my_new_mushrooms$edibleORpoisonous)[levels(my_new_mushrooms$edibleORpoisonous)=='p'] <- 'poisonous'
levels(my_new_mushrooms$edibleORpoisonous)[levels(my_new_mushrooms$edibleORpoisonous)=='e'] <- 'edible'
#Let's read now
head(my_new_mushrooms)
## edibleORpoisonous shape surface color odor
## 1 edible x s y a
## 2 edible b s w l
## 3 poisonous x y w p
## 4 edible x s g n
## 5 edible x y y a
## 6 edible b s w a
#Let's rename the values of the shape variable:
levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='b'] <- 'bell'
levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='c'] <- 'conical'
levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='x'] <- 'convex'
levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='f'] <- 'flat'
levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='k'] <- 'knobbed'
levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='s'] <- 'sunken'
head(my_new_mushrooms)
## edibleORpoisonous shape surface color odor
## 1 edible convex s y a
## 2 edible bell s w l
## 3 poisonous convex y w p
## 4 edible convex s g n
## 5 edible convex y y a
## 6 edible bell s w a
str(my_new_mushrooms)
## 'data.frame': 8123 obs. of 5 variables:
## $ edibleORpoisonous: Factor w/ 2 levels "edible","poisonous": 1 1 2 1 1 1 1 2 1 1 ...
## $ shape : Factor w/ 6 levels "bell","conical",..: 6 1 6 6 6 1 1 6 1 6 ...
## $ surface : Factor w/ 4 levels "f","g","s","y": 3 3 4 3 4 3 4 4 3 4 ...
## $ color : Factor w/ 10 levels "b","c","e","g",..: 10 9 9 4 10 9 9 9 10 10 ...
## $ odor : Factor w/ 9 levels "a","c","f","l",..: 1 4 7 6 1 1 4 7 1 4 ...
#Renaming surface
levels(my_new_mushrooms$surface)[levels(my_new_mushrooms$surface)=='f'] <- 'fibrous'
levels(my_new_mushrooms$surface)[levels(my_new_mushrooms$surface)=='g'] <- 'grooves'
levels(my_new_mushrooms$surface)[levels(my_new_mushrooms$surface)=='y'] <- 'scaly'
levels(my_new_mushrooms$surface)[levels(my_new_mushrooms$surface)=='s'] <- 'smooth'
#Renaming color
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='n'] <- 'brown'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='b'] <- 'buff'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='c'] <- 'cinnamon'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='g'] <- 'gray'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='r'] <- 'green'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='p'] <- 'pink'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='u'] <- 'purple'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='e'] <- 'red'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='w'] <- 'white'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='y'] <- 'yellow'
#Renaming odor
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='a'] <- 'almond'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='l'] <- 'anise'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='c'] <- 'creosote'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='y'] <- 'fishy'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='f'] <- 'foul'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='m'] <- 'musty'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='n'] <- 'none'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='p'] <- 'pungent'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='s'] <- 'spicy'
#let's read now
head(my_new_mushrooms)
## edibleORpoisonous shape surface color odor
## 1 edible convex smooth yellow almond
## 2 edible bell smooth white anise
## 3 poisonous convex scaly white pungent
## 4 edible convex smooth gray none
## 5 edible convex scaly yellow almond
## 6 edible bell smooth white almond
#shape
plot(my_new_mushrooms$shape)