suppressMessages(library(dplyr))
suppressMessages(library(tidyr))
suppressMessages(library(DT))
Week 1 Assignment - Loading Data into a a Data frame
#Reading the url into a variable
UCIurl <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
#Creating a full data set
mashroom.FullSet <- read.table(file=UCIurl,header = FALSE,sep = ',',stringsAsFactors = FALSE)
#Raw Data Set top 10
head(mashroom.FullSet,10)
## V1 V2 V3 V4 V5 V6 V7 V8 V9 V10 V11 V12 V13 V14 V15 V16 V17 V18 V19 V20
## 1 p x s n t p f c n k e e s s w w p w o p
## 2 e x s y t a f c b k e c s s w w p w o p
## 3 e b s w t l f c b n e c s s w w p w o p
## 4 p x y w t p f c n n e e s s w w p w o p
## 5 e x s g f n f w b k t e s s w w p w o e
## 6 e x y y t a f c b n e c s s w w p w o p
## 7 e b s w t a f c b g e c s s w w p w o p
## 8 e b y w t l f c b n e c s s w w p w o p
## 9 p x y w t p f c n p e e s s w w p w o p
## 10 e b s y t a f c b g e c s s w w p w o p
## V21 V22 V23
## 1 k s u
## 2 n n g
## 3 n n m
## 4 k s u
## 5 n a g
## 6 k n g
## 7 k n m
## 8 n s m
## 9 k v g
## 10 k s m
#Changing attribute name to user friendly names
names(mashroom.FullSet) <- c("class","cap-shape","cap-surface","cap-color","bruises?","odor","gill-attachment","gill-spacing","gill-size","gill-color","stalk-shape","stalk-root","stalk-surface-above-ring","stalk-surface-below-ring","stalk-color-above-ring","stalk-color-below-ring","veil-type","veil-color","ring-number","ring-type","spore-print-color","population","habitat")
mashroom.FullSet <- tbl_df(mashroom.FullSet)
head(mashroom.FullSet,10)
## # A tibble: 10 x 23
## class `cap-shape` `cap-surface` `cap-color` `bruises?` odor
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 p x s n t p
## 2 e x s y t a
## 3 e b s w t l
## 4 p x y w t p
## 5 e x s g f n
## 6 e x y y t a
## 7 e b s w t a
## 8 e b y w t l
## 9 p x y w t p
## 10 e b s y t a
## # ... with 17 more variables: `gill-attachment` <chr>,
## # `gill-spacing` <chr>, `gill-size` <chr>, `gill-color` <chr>,
## # `stalk-shape` <chr>, `stalk-root` <chr>,
## # `stalk-surface-above-ring` <chr>, `stalk-surface-below-ring` <chr>,
## # `stalk-color-above-ring` <chr>, `stalk-color-below-ring` <chr>,
## # `veil-type` <chr>, `veil-color` <chr>, `ring-number` <chr>,
## # `ring-type` <chr>, `spore-print-color` <chr>, population <chr>,
## # habitat <chr>
# Change the codes in the attributes to more meaningful names
mashroom.FullSet$class <- c("p"="Poinious","e"="Edible")
mashroom.FullSet$cap_shape <- c("b"="bell","c"="Conical","x"="Convex","f"="flat","k"="knobbed","s"="suken")
mashroom.FullSet$cap_surface <- c("f"="Fibrous","g"="grooves","y"="scaly","s"="smooth")
mashroom.FullSet$cap_color <- c("n"="brown","b"="buff","c"="cinnamon","g"="gray")
mashroom.FullSet$cap_color <- c("r"="green","p"="pink","u"="purple","e"="red")
mashroom.FullSet$cap_color <- c("w"="white","y"="yellow")
mashroom.FullSet$bruises <- c("t"="bruises","f"="no")
mashroom.FullSet$odor <- c("a"="almond","l"="anise","c"="creosote","y"="fishy")
mashroom.FullSet$odor <- c("f"="foul","m"="musty","n"="none","p"="pungent")
mashroom.FullSet$odor <- c("s"="spicy")
mashroom.FullSet$gill_attachment <- c("a"="attached","d"="descending","f"="free","n"="notched")
mashroom.FullSet$veil_type <- c("p"="partial","u"="universal")
mashroom.FullSet$ring_number <- c("c"="attached","e"="descending","f"="free","l"="notched","f"="free","l"="notched")
mashroom.FullSet$population <- c("a"="abundant","c"="clustered","n"="numerous","s"="scattered","v"="several","y"="solitary")
mashroom.FullSet$habitat <- c("g"="grasses","l"="leaves","m"="meadows","p"="paths")
# Data sample (subsetting)
mashroom.SubSet <- subset(mashroom.FullSet,veil_type=="partial",select = c("class","cap_shape","odor","cap-surface","cap-color","bruises","odor","gill_attachment","veil_type","ring_number"))
mashroom.SubSet
## # A tibble: 4,062 x 10
## class cap_shape odor `cap-surface` `cap-color` bruises odor
## <chr> <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Poin~ bell spicy s n bruises spicy
## 2 Poin~ Convex spicy s w bruises spicy
## 3 Poin~ knobbed spicy s g bruises spicy
## 4 Poin~ bell spicy s w bruises spicy
## 5 Poin~ Convex spicy y w bruises spicy
## 6 Poin~ knobbed spicy y y bruises spicy
## 7 Poin~ bell spicy s y bruises spicy
## 8 Poin~ Convex spicy f n bruises spicy
## 9 Poin~ knobbed spicy f w bruises spicy
## 10 Poin~ bell spicy y w bruises spicy
## # ... with 4,052 more rows, and 3 more variables: gill_attachment <chr>,
## # veil_type <chr>, ring_number <chr>