library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
df<-read.csv(url('https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data'))
head(df)
## p x s n t p.1 f c n.1 k e e.1 s.1 s.2 w w.1 p.2 w.2 o p.3 k.1 s.3 u
## 1 e x s y t a f c b k e c s s w w p w o p n n g
## 2 e b s w t l f c b n e c s s w w p w o p n n m
## 3 p x y w t p f c n n e e s s w w p w o p k s u
## 4 e x s g f n f w b k t e s s w w p w o e n a g
## 5 e x y y t a f c b n e c s s w w p w o p k n g
## 6 e b s w t a f c b g e c s s w w p w o p k n m
names(df)<- c("class","cap-shape","cap-surface","cap-color","bruise","odor","gill-attachment","gill-spacing","gill-size","gill-color","stalk-shape","stalk-root","stalk-surface-above-ring","stalk-surface-below-ring","stalk-color-above-ring","stalk-color-below-ring","veil-type","veil-color","ring-number","ring-type","spore-print-color","population","habitat")
head(df)
## class cap-shape cap-surface cap-color bruise odor gill-attachment
## 1 e x s y t a f
## 2 e b s w t l f
## 3 p x y w t p f
## 4 e x s g f n f
## 5 e x y y t a f
## 6 e b s w t a f
## gill-spacing gill-size gill-color stalk-shape stalk-root
## 1 c b k e c
## 2 c b n e c
## 3 c n n e e
## 4 w b k t e
## 5 c b n e c
## 6 c b g e c
## stalk-surface-above-ring stalk-surface-below-ring stalk-color-above-ring
## 1 s s w
## 2 s s w
## 3 s s w
## 4 s s w
## 5 s s w
## 6 s s w
## stalk-color-below-ring veil-type veil-color ring-number ring-type
## 1 w p w o p
## 2 w p w o p
## 3 w p w o p
## 4 w p w o e
## 5 w p w o p
## 6 w p w o p
## spore-print-color population habitat
## 1 n n g
## 2 n n m
## 3 k s u
## 4 n a g
## 5 k n g
## 6 k n m
print(paste("The number of columns is equal to ", ncol(df)))
## [1] "The number of columns is equal to 23"
print(paste("The number of rows is equal to ", nrow(df)))
## [1] "The number of rows is equal to 8123"
df1<-select(df,c(1,2,4,22,23))
levels(df1$class)<-c("edible","poisonous")
levels(df1$`cap-shape`)<-c("bell","conical","flat","knobbed","sunken","convex")
levels(df1$`cap-color`)<-c("brown","buff","cinnamon","gray","green", "pink","purple","red","white","yellow")
levels(df1$population)<-c("abundant","clustered","numerous","scattered","several","solitary")
levels(df1$habitat)<-c("woods","grasses","leaves","meadows","paths","urban","waste")
head(df1)
## class cap-shape cap-color population habitat
## 1 edible convex yellow numerous grasses
## 2 edible bell white numerous meadows
## 3 poisonous convex white scattered urban
## 4 edible convex gray abundant grasses
## 5 edible convex yellow numerous grasses
## 6 edible bell white numerous meadows