mydat <- fread("http://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data")
#head(mydat)
#dim(mydat)
names(mydat) <- c("classes","cap_shape","cap_surface","cap_color","bruises","odor","gill_attachment","gill_spacing","gill_size","gill_color","stalk_shape","stalk_root","Surface_above","surface_below","color_above","color_below","veil_type","veil_color","ring_number","ring_type","spore_color","population","habitat")
my_shrooms <- subset(mydat,select=c(classes,cap_shape,cap_color,habitat,population))
exp_df <- my_shrooms
my_shrooms <- mutate(my_shrooms,classes=case_when(classes=="e"~"edible",classes=="p"~"poisonous"))
my_shrooms <- mutate(my_shrooms,cap_shape=case_when(cap_shape=="b"~"bell",cap_shape=="c"~"conical",cap_shape=="x"~"convex",cap_shape=="f"~"flat",cap_shape=="k"~"knobbed",cap_shape=="s"~"sunken"))
my_shrooms <- mutate(my_shrooms,cap_color=case_when(cap_color=="n"~"brown",cap_color=="b"~"buff",cap_color=="c"~"cinnamon",cap_color=="g"~"gray",cap_color=="r"~"green",cap_color=="p"~"pink",cap_color=="u"~"purple",cap_color=="e"~"red",cap_color=="w"~"white",cap_color=="y"~"yellow"))
my_shrooms <- mutate(my_shrooms,habitat=case_when(habitat=="g"~"grasses",habitat=="l"~"leaves",habitat=="m"~"meadows",habitat=="u"~"urban",habitat=="w"~"waste",habitat=="d"~"woods",habitat=="p"~"paths"))
my_shrooms <- mutate(my_shrooms,population=case_when(population=="a"~"abundant",population=="c"~"clustered",population=="n"~"numerous",population=="s"~"scattered",population=="v"~"several",population=="y"~"solitary"))
head(my_shrooms,50)
## classes cap_shape cap_color habitat population
## 1 edible convex yellow grasses numerous
## 2 edible bell white meadows numerous
## 3 poisonous convex white urban scattered
## 4 edible convex gray grasses abundant
## 5 edible convex yellow grasses numerous
## 6 edible bell white meadows numerous
## 7 edible bell white meadows scattered
## 8 poisonous convex white grasses several
## 9 edible bell yellow meadows scattered
## 10 edible convex yellow grasses numerous
## 11 edible convex yellow meadows scattered
## 12 edible bell yellow grasses scattered
## 13 poisonous convex white urban several
## 14 edible convex brown grasses abundant
## 15 edible sunken gray urban solitary
## 16 edible flat white grasses abundant
## 17 poisonous convex brown grasses scattered
## 18 poisonous convex white urban scattered
## 19 poisonous convex brown urban scattered
## 20 edible bell yellow meadows scattered
## 21 poisonous convex brown grasses several
## 22 edible bell yellow meadows scattered
## 23 edible bell white meadows numerous
## 24 edible bell white meadows scattered
## 25 poisonous flat white grasses several
## 26 edible convex yellow meadows numerous
## 27 edible convex white meadows numerous
## 28 edible flat brown urban solitary
## 29 edible convex yellow woods several
## 30 edible bell yellow meadows numerous
## 31 poisonous convex white urban scattered
## 32 edible convex yellow meadows numerous
## 33 edible convex brown paths solitary
## 34 edible bell yellow meadows scattered
## 35 edible convex yellow woods several
## 36 edible sunken gray urban several
## 37 poisonous convex brown urban scattered
## 38 edible convex yellow woods several
## 39 edible bell yellow meadows scattered
## 40 edible bell yellow grasses scattered
## 41 edible convex yellow paths solitary
## 42 edible convex brown urban solitary
## 43 poisonous convex white grasses several
## 44 edible convex yellow meadows numerous
## 45 edible convex white grasses numerous
## 46 edible convex yellow meadows scattered
## 47 edible convex white meadows numerous
## 48 edible convex yellow paths scattered
## 49 edible flat yellow paths scattered
## 50 edible convex brown grasses scattered
#summary(my_shrooms)
#head(subset(my_shrooms, select = 'cap_shape'))
#names(my_shrooms)
#count(my_shrooms, "cap_shape")
#names(my_shrooms)
#str(my_shrooms)
#my_shrooms<- as.factor(my_shrooms)
#str((my_shrooms$classes))
for (x in c(names(my_shrooms))){
my_shrooms[x] <- factor(unlist(my_shrooms[x]))
}
summary(my_shrooms)
## classes cap_shape cap_color habitat
## edible :4208 bell : 452 brown :2283 grasses:2148
## poisonous:3915 conical: 4 gray :1840 leaves : 832
## convex :3655 red :1500 meadows: 292
## flat :3152 yellow :1072 paths :1144
## knobbed: 828 white :1040 urban : 367
## sunken : 32 buff : 168 waste : 192
## (Other): 220 woods :3148
## population
## abundant : 384
## clustered: 340
## numerous : 400
## scattered:1247
## several :4040
## solitary :1712
##
for( x in names(my_shrooms)){print(flat_table(my_shrooms, classes,x))
}
## x edible poisonous
##
## 4208 3915
## cap_shape bell conical convex flat knobbed sunken
## classes
## edible 404 0 1948 1596 228 32
## poisonous 48 4 1707 1556 600 0
## cap_color brown buff cinnamon gray green pink purple red white yellow
## classes
## edible 1264 48 32 1032 16 56 16 624 720 400
## poisonous 1019 120 12 808 0 88 0 876 320 672
## habitat grasses leaves meadows paths urban waste woods
## classes
## edible 1408 240 256 136 96 192 1880
## poisonous 740 592 36 1008 271 0 1268
## population abundant clustered numerous scattered several solitary
## classes
## edible 384 288 400 880 1192 1064
## poisonous 0 52 0 367 2848 648
#store these values in flat_table$colname
for(i in seq_along(names(my_shrooms))){
nam <- paste("flat_table",names(my_shrooms)[i],sep="")
assign(nam,flat_table(my_shrooms, classes,names(my_shrooms)[i]))
}
# alternative solution to avoid assign()
# create list of flat_tables
N <- 5
x <- vector("list", N)
for(i in 1:N) {
Ps <- flat_table(my_shrooms, classes,names(my_shrooms)[i])
x[[i]] <- Ps
}
names(x) <- c("my_flat_table_1", "my_flat_table_2", "my_flat_table_3","my_flat_table_4","my_flat_table_5")
counts <- table(my_shrooms$classes, my_shrooms$cap_shape)
barplot(counts, main="Edible VS Poisonous by shape",
xlab="Mushroom shapes", col=c("darkblue","red"),
legend = rownames(counts), beside=TRUE)
counts <-table(my_shrooms$classes, my_shrooms$cap_color)
barplot(counts, main="Edible VS Poisonous by color",
xlab="Mushroom shapes", col=c("darkblue","red"),
legend = rownames(counts), beside=TRUE)
counts <-table(my_shrooms$classes, my_shrooms$population)
barplot(counts, main="Edible VS Poisonous by class_population",
xlab="Mushroom class_population", col=c("darkblue","red"),
legend = rownames(counts), beside=TRUE)
counts <-table(my_shrooms$classes, my_shrooms$habitat)
barplot(counts, main="Edible VS Poisonous by habitat",
xlab="Mushroom habitat", col=c("darkblue","red"),
legend = rownames(counts), beside=TRUE)
Everything that follows from here was code experimentation.
table2<-read.table(text="
df_val replacement
g grasses
l leaves
m meadows
p paths
u urban
w waste
d woods
e edible
s scattered
n numerous
v several
a abundant
c clustered
y solitary
b bell
x convex
f flat
k knobbed
" , header=TRUE, stringsAsFactors=FALSE)
exp_df_2<- as.data.frame(lapply(exp_df, function(x) ifelse(grepl(paste(table2$df_val, collapse = '|'), x),
table2$replacement[match(x, table2$df_val)], x)))
head(exp_df_2,50)
## classes cap_shape cap_color habitat population
## 1 edible convex solitary grasses numerous
## 2 edible bell waste meadows numerous
## 3 paths convex waste urban scattered
## 4 edible convex grasses grasses abundant
## 5 edible convex solitary grasses numerous
## 6 edible bell waste meadows numerous
## 7 edible bell waste meadows scattered
## 8 paths convex waste grasses several
## 9 edible bell solitary meadows scattered
## 10 edible convex solitary grasses numerous
## 11 edible convex solitary meadows scattered
## 12 edible bell solitary grasses scattered
## 13 paths convex waste urban several
## 14 edible convex numerous grasses abundant
## 15 edible scattered grasses urban solitary
## 16 edible flat waste grasses abundant
## 17 paths convex numerous grasses scattered
## 18 paths convex waste urban scattered
## 19 paths convex numerous urban scattered
## 20 edible bell solitary meadows scattered
## 21 paths convex numerous grasses several
## 22 edible bell solitary meadows scattered
## 23 edible bell waste meadows numerous
## 24 edible bell waste meadows scattered
## 25 paths flat waste grasses several
## 26 edible convex solitary meadows numerous
## 27 edible convex waste meadows numerous
## 28 edible flat numerous urban solitary
## 29 edible convex solitary woods several
## 30 edible bell solitary meadows numerous
## 31 paths convex waste urban scattered
## 32 edible convex solitary meadows numerous
## 33 edible convex numerous paths solitary
## 34 edible bell solitary meadows scattered
## 35 edible convex solitary woods several
## 36 edible scattered grasses urban several
## 37 paths convex numerous urban scattered
## 38 edible convex solitary woods several
## 39 edible bell solitary meadows scattered
## 40 edible bell solitary grasses scattered
## 41 edible convex solitary paths solitary
## 42 edible convex numerous urban solitary
## 43 paths convex waste grasses several
## 44 edible convex solitary meadows numerous
## 45 edible convex waste grasses numerous
## 46 edible convex solitary meadows scattered
## 47 edible convex waste meadows numerous
## 48 edible convex solitary paths scattered
## 49 edible flat solitary paths scattered
## 50 edible convex numerous grasses scattered
#for datatable in list(datatables) for x in names(exp_df){ifelse(grepl(paste(df$x, collapse = '&'), list of tables),
##table that returns true from list[match......]