This assignment for DATA 607 course within the CUNY School of Professional Studies MS Data Science program is to transform semi-unstructured data into a structured data using R methods and functions:
df <- mushroom
df1 <- df[c("X1","X2","X3","X4")]
df2 <- rename(df1, c("X1"="Classes","X2"="Cap_Shape", "X3"="Cap_Surface", "X4"="Cap_color"))
df2
## # A tibble: 8,124 x 4
## Classes Cap_Shape Cap_Surface Cap_color
## <chr> <chr> <chr> <chr>
## 1 p x s n
## 2 e x s y
## 3 e b s w
## 4 p x y w
## 5 e x s g
## 6 e x y y
## 7 e b s w
## 8 e b y w
## 9 p x y w
## 10 e b s y
## # ... with 8,114 more rows
df2$Classes[df2$Classes == "e"] <- "Edible"
df2$Classes[df2$Classes == "p"] <- "Poisonous"
df2$Cap_Shape[df2$Cap_Shape == "b"] <- "Bell"
df2$Cap_Shape[df2$Cap_Shape == "c"] <- "Conical"
df2$Cap_Shape[df2$Cap_Shape == "x"] <- "Convex"
df2$Cap_Shape[df2$Cap_Shape == "f"] <- "Flat"
df2$Cap_Shape[df2$Cap_Shape == "k"] <- "Knobbed"
df2$Cap_Shape[df2$Cap_Shape == "s"] <- "Sunken"
df2$Cap_Surface[df2$Cap_Surface == "f"] <- "Fibrous"
df2$Cap_Surface[df2$Cap_Surface == "g"] <- "Grooves"
df2$Cap_Surface[df2$Cap_Surface == "y"] <- "Scaly"
df2$Cap_Surface[df2$Cap_Surface == "s"] <- "Smooth"
df2$Cap_color[df2$Cap_color == "n"] <- "Brown"
df2$Cap_color[df2$Cap_color == "b"] <- "Buff"
df2$Cap_color[df2$Cap_color == "c"] <- "Cinnamon"
df2$Cap_color[df2$Cap_color == "g"] <- "Gray"
df2$Cap_color[df2$Cap_color == "r"] <- "Green"
df2$Cap_color[df2$Cap_color == "p"] <- "Pink"
df2$Cap_color[df2$Cap_color == "u"] <- "Purple"
df2$Cap_color[df2$Cap_color == "e"] <- "Red"
df2$Cap_color[df2$Cap_color == "w"] <- "White"
df2$Cap_color[df2$Cap_color == "y"] <- "Yellow"
FinalMushroomData <- df2
FinalMushroomData
## # A tibble: 8,124 x 4
## Classes Cap_Shape Cap_Surface Cap_color
## <chr> <chr> <chr> <chr>
## 1 Poisonous Convex Smooth Brown
## 2 Edible Convex Smooth Yellow
## 3 Edible Bell Smooth White
## 4 Poisonous Convex Scaly White
## 5 Edible Convex Smooth Gray
## 6 Edible Convex Scaly Yellow
## 7 Edible Bell Smooth White
## 8 Edible Bell Scaly White
## 9 Poisonous Convex Scaly White
## 10 Edible Bell Smooth Yellow
## # ... with 8,114 more rows