##Assignment1

loading data and print few lines from the beginning

#load our mushrooms dataset into a data table
data_mushrooms <- read.table("agaricus-lepiota.data", header = TRUE, sep = ",")

#what dimension is our dataset
dim(data_mushrooms)
## [1] 8123   23
#describe our dataset
str(data_mushrooms)
## 'data.frame':    8123 obs. of  23 variables:
##  $ p  : Factor w/ 2 levels "e","p": 1 1 2 1 1 1 1 2 1 1 ...
##  $ x  : Factor w/ 6 levels "b","c","f","k",..: 6 1 6 6 6 1 1 6 1 6 ...
##  $ s  : Factor w/ 4 levels "f","g","s","y": 3 3 4 3 4 3 4 4 3 4 ...
##  $ n  : Factor w/ 10 levels "b","c","e","g",..: 10 9 9 4 10 9 9 9 10 10 ...
##  $ t  : Factor w/ 2 levels "f","t": 2 2 2 1 2 2 2 2 2 2 ...
##  $ p.1: Factor w/ 9 levels "a","c","f","l",..: 1 4 7 6 1 1 4 7 1 4 ...
##  $ f  : Factor w/ 2 levels "a","f": 2 2 2 2 2 2 2 2 2 2 ...
##  $ c  : Factor w/ 2 levels "c","w": 1 1 1 2 1 1 1 1 1 1 ...
##  $ n.1: Factor w/ 2 levels "b","n": 1 1 2 1 1 1 1 2 1 1 ...
##  $ k  : Factor w/ 12 levels "b","e","g","h",..: 5 6 6 5 6 3 6 8 3 3 ...
##  $ e  : Factor w/ 2 levels "e","t": 1 1 1 2 1 1 1 1 1 1 ...
##  $ e.1: Factor w/ 5 levels "?","b","c","e",..: 3 3 4 4 3 3 3 4 3 3 ...
##  $ s.1: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
##  $ s.2: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
##  $ w  : Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $ w.1: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $ p.2: Factor w/ 1 level "p": 1 1 1 1 1 1 1 1 1 1 ...
##  $ w.2: Factor w/ 4 levels "n","o","w","y": 3 3 3 3 3 3 3 3 3 3 ...
##  $ o  : Factor w/ 3 levels "n","o","t": 2 2 2 2 2 2 2 2 2 2 ...
##  $ p.3: Factor w/ 5 levels "e","f","l","n",..: 5 5 5 1 5 5 5 5 5 5 ...
##  $ k.1: Factor w/ 9 levels "b","h","k","n",..: 4 4 3 4 3 3 4 3 3 4 ...
##  $ s.3: Factor w/ 6 levels "a","c","n","s",..: 3 3 4 1 3 3 4 5 4 3 ...
##  $ u  : Factor w/ 7 levels "d","g","l","m",..: 2 4 6 2 2 4 4 2 4 2 ...
names(data_mushrooms)
##  [1] "p"   "x"   "s"   "n"   "t"   "p.1" "f"   "c"   "n.1" "k"   "e"  
## [12] "e.1" "s.1" "s.2" "w"   "w.1" "p.2" "w.2" "o"   "p.3" "k.1" "s.3"
## [23] "u"
#read the first few lines of data
head(data_mushrooms)
##   p x s n t p.1 f c n.1 k e e.1 s.1 s.2 w w.1 p.2 w.2 o p.3 k.1 s.3 u
## 1 e x s y t   a f c   b k e   c   s   s w   w   p   w o   p   n   n g
## 2 e b s w t   l f c   b n e   c   s   s w   w   p   w o   p   n   n m
## 3 p x y w t   p f c   n n e   e   s   s w   w   p   w o   p   k   s u
## 4 e x s g f   n f w   b k t   e   s   s w   w   p   w o   e   n   a g
## 5 e x y y t   a f c   b n e   c   s   s w   w   p   w o   p   k   n g
## 6 e b s w t   a f c   b g e   c   s   s w   w   p   w o   p   k   n m

Let’s do some counting

#let's count the numbers of rows
nrow(data_mushrooms)
## [1] 8123
#looking for missing values
NROW(na.omit(data_mushrooms))
## [1] 8123

****** All Rows have data.******

renaming the columns

my_new_mushrooms <- cbind(data_mushrooms[,1:4], data_mushrooms[,6])
head(my_new_mushrooms)
##   p x s n data_mushrooms[, 6]
## 1 e x s y                   a
## 2 e b s w                   l
## 3 p x y w                   p
## 4 e x s g                   n
## 5 e x y y                   a
## 6 e b s w                   a
#reading the names
names(my_new_mushrooms)
## [1] "p"                   "x"                   "s"                  
## [4] "n"                   "data_mushrooms[, 6]"

Renaming columns

#let's rename all those column names
colnames(my_new_mushrooms) <- c("edibleORpoisonous","shape","surface","color","odor")

#let's read few rows of our subset now
head(my_new_mushrooms)
##   edibleORpoisonous shape surface color odor
## 1                 e     x       s     y    a
## 2                 e     b       s     w    l
## 3                 p     x       y     w    p
## 4                 e     x       s     g    n
## 5                 e     x       y     y    a
## 6                 e     b       s     w    a

Let’s change some values

#Let's change the values of edibleORpoisonous to some thing readable
levels(my_new_mushrooms$edibleORpoisonous)[levels(my_new_mushrooms$edibleORpoisonous)=='p'] <- 'poisonous'
levels(my_new_mushrooms$edibleORpoisonous)[levels(my_new_mushrooms$edibleORpoisonous)=='e'] <- 'edible'

#Let's read now
head(my_new_mushrooms)
##   edibleORpoisonous shape surface color odor
## 1            edible     x       s     y    a
## 2            edible     b       s     w    l
## 3         poisonous     x       y     w    p
## 4            edible     x       s     g    n
## 5            edible     x       y     y    a
## 6            edible     b       s     w    a
#Let's rename the values of the shape variable:
levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='b'] <- 'bell'
levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='c'] <- 'conical'
levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='x'] <- 'convex'
levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='f'] <- 'flat'
levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='k'] <- 'knobbed'
levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='s'] <- 'sunken'

head(my_new_mushrooms)
##   edibleORpoisonous  shape surface color odor
## 1            edible convex       s     y    a
## 2            edible   bell       s     w    l
## 3         poisonous convex       y     w    p
## 4            edible convex       s     g    n
## 5            edible convex       y     y    a
## 6            edible   bell       s     w    a
str(my_new_mushrooms)
## 'data.frame':    8123 obs. of  5 variables:
##  $ edibleORpoisonous: Factor w/ 2 levels "edible","poisonous": 1 1 2 1 1 1 1 2 1 1 ...
##  $ shape            : Factor w/ 6 levels "bell","conical",..: 6 1 6 6 6 1 1 6 1 6 ...
##  $ surface          : Factor w/ 4 levels "f","g","s","y": 3 3 4 3 4 3 4 4 3 4 ...
##  $ color            : Factor w/ 10 levels "b","c","e","g",..: 10 9 9 4 10 9 9 9 10 10 ...
##  $ odor             : Factor w/ 9 levels "a","c","f","l",..: 1 4 7 6 1 1 4 7 1 4 ...

Let’s rename the rest

#Renaming surface
levels(my_new_mushrooms$surface)[levels(my_new_mushrooms$surface)=='f'] <- 'fibrous'
levels(my_new_mushrooms$surface)[levels(my_new_mushrooms$surface)=='g'] <- 'grooves'
levels(my_new_mushrooms$surface)[levels(my_new_mushrooms$surface)=='y'] <- 'scaly'
levels(my_new_mushrooms$surface)[levels(my_new_mushrooms$surface)=='s'] <- 'smooth'

#Renaming color
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='n'] <- 'brown'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='b'] <- 'buff'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='c'] <- 'cinnamon'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='g'] <- 'gray'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='r'] <- 'green'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='p'] <- 'pink'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='u'] <- 'purple'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='e'] <- 'red'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='w'] <- 'white'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='y'] <- 'yellow'

#Renaming odor
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='a'] <- 'almond'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='l'] <- 'anise'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='c'] <- 'creosote'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='y'] <- 'fishy'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='f'] <- 'foul'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='m'] <- 'musty'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='n'] <- 'none'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='p'] <- 'pungent'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='s'] <- 'spicy'

#let's read now
head(my_new_mushrooms)
##   edibleORpoisonous  shape surface  color    odor
## 1            edible convex  smooth yellow  almond
## 2            edible   bell  smooth  white   anise
## 3         poisonous convex   scaly  white pungent
## 4            edible convex  smooth   gray    none
## 5            edible convex   scaly yellow  almond
## 6            edible   bell  smooth  white  almond

Let’s do the plotting

#shape
plot(my_new_mushrooms$shape)