##Assignment1

### loading data and print few lines from the beginning

#load our mushrooms dataset into a data table
data_mushrooms <- read.table("agaricus-lepiota.data", header = TRUE, sep = ",")

#what dimension is our dataset
dim(data_mushrooms)
## [1] 8123   23
#describe our dataset
str(data_mushrooms)
## 'data.frame':    8123 obs. of  23 variables:
##  $p : Factor w/ 2 levels "e","p": 1 1 2 1 1 1 1 2 1 1 ... ##$ x  : Factor w/ 6 levels "b","c","f","k",..: 6 1 6 6 6 1 1 6 1 6 ...
##  $s : Factor w/ 4 levels "f","g","s","y": 3 3 4 3 4 3 4 4 3 4 ... ##$ n  : Factor w/ 10 levels "b","c","e","g",..: 10 9 9 4 10 9 9 9 10 10 ...
##  $t : Factor w/ 2 levels "f","t": 2 2 2 1 2 2 2 2 2 2 ... ##$ p.1: Factor w/ 9 levels "a","c","f","l",..: 1 4 7 6 1 1 4 7 1 4 ...
##  $f : Factor w/ 2 levels "a","f": 2 2 2 2 2 2 2 2 2 2 ... ##$ c  : Factor w/ 2 levels "c","w": 1 1 1 2 1 1 1 1 1 1 ...
##  $n.1: Factor w/ 2 levels "b","n": 1 1 2 1 1 1 1 2 1 1 ... ##$ k  : Factor w/ 12 levels "b","e","g","h",..: 5 6 6 5 6 3 6 8 3 3 ...
##  $e : Factor w/ 2 levels "e","t": 1 1 1 2 1 1 1 1 1 1 ... ##$ e.1: Factor w/ 5 levels "?","b","c","e",..: 3 3 4 4 3 3 3 4 3 3 ...
##  $s.1: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ... ##$ s.2: Factor w/ 4 levels "f","k","s","y": 3 3 3 3 3 3 3 3 3 3 ...
##  $w : Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ... ##$ w.1: Factor w/ 9 levels "b","c","e","g",..: 8 8 8 8 8 8 8 8 8 8 ...
##  $p.2: Factor w/ 1 level "p": 1 1 1 1 1 1 1 1 1 1 ... ##$ w.2: Factor w/ 4 levels "n","o","w","y": 3 3 3 3 3 3 3 3 3 3 ...
##  $o : Factor w/ 3 levels "n","o","t": 2 2 2 2 2 2 2 2 2 2 ... ##$ p.3: Factor w/ 5 levels "e","f","l","n",..: 5 5 5 1 5 5 5 5 5 5 ...
##  $k.1: Factor w/ 9 levels "b","h","k","n",..: 4 4 3 4 3 3 4 3 3 4 ... ##$ s.3: Factor w/ 6 levels "a","c","n","s",..: 3 3 4 1 3 3 4 5 4 3 ...
##  $u : Factor w/ 7 levels "d","g","l","m",..: 2 4 6 2 2 4 4 2 4 2 ... names(data_mushrooms) ## [1] "p" "x" "s" "n" "t" "p.1" "f" "c" "n.1" "k" "e" ## [12] "e.1" "s.1" "s.2" "w" "w.1" "p.2" "w.2" "o" "p.3" "k.1" "s.3" ## [23] "u" #read the first few lines of data head(data_mushrooms) ## p x s n t p.1 f c n.1 k e e.1 s.1 s.2 w w.1 p.2 w.2 o p.3 k.1 s.3 u ## 1 e x s y t a f c b k e c s s w w p w o p n n g ## 2 e b s w t l f c b n e c s s w w p w o p n n m ## 3 p x y w t p f c n n e e s s w w p w o p k s u ## 4 e x s g f n f w b k t e s s w w p w o e n a g ## 5 e x y y t a f c b n e c s s w w p w o p k n g ## 6 e b s w t a f c b g e c s s w w p w o p k n m ### Let’s do some counting #let's count the numbers of rows nrow(data_mushrooms) ## [1] 8123 #looking for missing values NROW(na.omit(data_mushrooms)) ## [1] 8123 ### ****** All Rows have data.****** ### renaming the columns my_new_mushrooms <- cbind(data_mushrooms[,1:4], data_mushrooms[,6]) head(my_new_mushrooms) ## p x s n data_mushrooms[, 6] ## 1 e x s y a ## 2 e b s w l ## 3 p x y w p ## 4 e x s g n ## 5 e x y y a ## 6 e b s w a #reading the names names(my_new_mushrooms) ## [1] "p" "x" "s" ## [4] "n" "data_mushrooms[, 6]" ### Renaming columns #let's rename all those column names colnames(my_new_mushrooms) <- c("edibleORpoisonous","shape","surface","color","odor") #let's read few rows of our subset now head(my_new_mushrooms) ## edibleORpoisonous shape surface color odor ## 1 e x s y a ## 2 e b s w l ## 3 p x y w p ## 4 e x s g n ## 5 e x y y a ## 6 e b s w a ### Let’s change some values #Let's change the values of edibleORpoisonous to some thing readable levels(my_new_mushrooms$edibleORpoisonous)[levels(my_new_mushrooms$edibleORpoisonous)=='p'] <- 'poisonous' levels(my_new_mushrooms$edibleORpoisonous)[levels(my_new_mushrooms$edibleORpoisonous)=='e'] <- 'edible' #Let's read now head(my_new_mushrooms) ## edibleORpoisonous shape surface color odor ## 1 edible x s y a ## 2 edible b s w l ## 3 poisonous x y w p ## 4 edible x s g n ## 5 edible x y y a ## 6 edible b s w a #Let's rename the values of the shape variable: levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='b'] <- 'bell' levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='c'] <- 'conical' levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='x'] <- 'convex' levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='f'] <- 'flat' levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='k'] <- 'knobbed' levels(my_new_mushrooms$shape)[levels(my_new_mushrooms$shape)=='s'] <- 'sunken' head(my_new_mushrooms) ## edibleORpoisonous shape surface color odor ## 1 edible convex s y a ## 2 edible bell s w l ## 3 poisonous convex y w p ## 4 edible convex s g n ## 5 edible convex y y a ## 6 edible bell s w a str(my_new_mushrooms) ## 'data.frame': 8123 obs. of 5 variables: ##$ edibleORpoisonous: Factor w/ 2 levels "edible","poisonous": 1 1 2 1 1 1 1 2 1 1 ...
##  $shape : Factor w/ 6 levels "bell","conical",..: 6 1 6 6 6 1 1 6 1 6 ... ##$ surface          : Factor w/ 4 levels "f","g","s","y": 3 3 4 3 4 3 4 4 3 4 ...
##  $color : Factor w/ 10 levels "b","c","e","g",..: 10 9 9 4 10 9 9 9 10 10 ... ##$ odor             : Factor w/ 9 levels "a","c","f","l",..: 1 4 7 6 1 1 4 7 1 4 ...

### Let’s rename the rest

#Renaming surface
levels(my_new_mushrooms$surface)[levels(my_new_mushrooms$surface)=='f'] <- 'fibrous'
levels(my_new_mushrooms$surface)[levels(my_new_mushrooms$surface)=='g'] <- 'grooves'
levels(my_new_mushrooms$surface)[levels(my_new_mushrooms$surface)=='y'] <- 'scaly'
levels(my_new_mushrooms$surface)[levels(my_new_mushrooms$surface)=='s'] <- 'smooth'

#Renaming color
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='n'] <- 'brown'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='b'] <- 'buff'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='c'] <- 'cinnamon'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='g'] <- 'gray'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='r'] <- 'green'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='p'] <- 'pink'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='u'] <- 'purple'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='e'] <- 'red'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='w'] <- 'white'
levels(my_new_mushrooms$color)[levels(my_new_mushrooms$color)=='y'] <- 'yellow'

#Renaming odor
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='a'] <- 'almond'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='l'] <- 'anise'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='c'] <- 'creosote'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='y'] <- 'fishy'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='f'] <- 'foul'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='m'] <- 'musty'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='n'] <- 'none'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='p'] <- 'pungent'
levels(my_new_mushrooms$odor)[levels(my_new_mushrooms$odor)=='s'] <- 'spicy'

#let's read now
head(my_new_mushrooms)
##   edibleORpoisonous  shape surface  color    odor
## 1            edible convex  smooth yellow  almond
## 2            edible   bell  smooth  white   anise
## 3         poisonous convex   scaly  white pungent
## 4            edible convex  smooth   gray    none
## 5            edible convex   scaly yellow  almond
## 6            edible   bell  smooth  white  almond

### Let’s do the plotting

#shape
plot(my_new_mushrooms\$shape)