Retrieve dataset from uci site and create a dataframe
url <- "https://archive.ics.uci.edu/ml/machine-learning-databases/mushroom/agaricus-lepiota.data"
#rawData <- readLines(url)
#mushroomDF <- read.csv(textConnection(rawData), header = F, stringsAsFactors = F)
mushroomDF <- read.csv(url, header = F, stringsAsFactors = F)
summary(mushroomDF)
## V1 V2 V3
## Length:8124 Length:8124 Length:8124
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## V4 V5 V6
## Length:8124 Length:8124 Length:8124
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## V7 V8 V9
## Length:8124 Length:8124 Length:8124
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## V10 V11 V12
## Length:8124 Length:8124 Length:8124
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## V13 V14 V15
## Length:8124 Length:8124 Length:8124
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## V16 V17 V18
## Length:8124 Length:8124 Length:8124
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## V19 V20 V21
## Length:8124 Length:8124 Length:8124
## Class :character Class :character Class :character
## Mode :character Mode :character Mode :character
## V22 V23
## Length:8124 Length:8124
## Class :character Class :character
## Mode :character Mode :character
Assign column names to the dataset
colnames(mushroomDF) <- c("class", "cap_shape", "cap_surface", "cap_color",
"bruises", "odor", "gill_attachment",
"gill_spacing", "gill_size", "gill_color",
"stalk_shape", "stalk_root", "stalk_surface_above_ring",
"stalk_surface_below_ring", "stalk_color_above_ring", "stalk_color_above_ring",
"veil_type", "veil_color", "ring_number", "ring_type",
"spore_print_color", "population", "habitat")
colnames(mushroomDF)
## [1] "class" "cap_shape"
## [3] "cap_surface" "cap_color"
## [5] "bruises" "odor"
## [7] "gill_attachment" "gill_spacing"
## [9] "gill_size" "gill_color"
## [11] "stalk_shape" "stalk_root"
## [13] "stalk_surface_above_ring" "stalk_surface_below_ring"
## [15] "stalk_color_above_ring" "stalk_color_above_ring"
## [17] "veil_type" "veil_color"
## [19] "ring_number" "ring_type"
## [21] "spore_print_color" "population"
## [23] "habitat"
Create a subset of dataframes using a limited set of columns
mushroomSubSetDF <- subset(mushroomDF, subset = cap_color==c("b","p"), select = c("class", "cap_color", "cap_surface", "odor","veil_type", "veil_color"))
mushroomSubSetDF
## class cap_color cap_surface odor veil_type veil_color
## 2286 p p s c p w
## 2684 p p f c p w
## 2842 p p f c p w
## 3048 p p s c p w
## 3098 p p s c p w
## 3182 p p f c p w
## 3192 p p s c p w
## 3328 p p f c p w
## 3332 p p s c p w
## 3366 p p f c p w
## 3372 p p s c p w
## 3382 p p s c p w
## 3396 p p f c p w
## 3404 p p s c p w
## 3442 p p f c p w
## 3444 p p f c p w
## 3470 p p s c p w
## 3522 p p s c p w
## 3526 p p f c p w
## 3544 p p s c p w
## 3548 p p s c p w
## 3628 p p f c p w
## 3680 p p s c p w
## 3698 p p s c p w
## 3700 p p s c p w
## 3704 p p f c p w
## 3936 p p s c p w
## 3940 p p f c p w
## 3954 p p s c p w
## 3976 p p s c p w
## 3982 p p f c p w
## 3985 e b y n p w
## 4029 p b s f p w
## 4052 p p f c p w
## 4107 p b s n p w
## 4171 p b s f p w
## 4195 p b s f p w
## 4200 p p f c p w
## 4284 e p s n p w
## 4292 e p s n p w
## 4381 e b y n p w
## 4535 e b y n p w
## 4549 p b s f p w
## 4674 e p s n p w
## 4705 p b s f p w
## 4750 p p f c p w
## 4778 e p s n p w
## 4780 p p s c p w
## 4841 p b s n p w
## 4849 p b s f p w
## 4861 e b s n p w
## 4927 e b s n p w
## 4946 e p s n p w
## 4951 p b s f p w
## 4959 p b s f p w
## 4965 e b s n p w
## 4966 e p s n p w
## 4968 p p y n p w
## 5003 p b s n p w
## 5045 p b s f p w
## 5053 e b y n p w
## 5059 p b s f p w
## 5065 p b s f p w
## 5098 p p y n p w
## 5114 e p y n p w
## 5115 p b s f p w
## 5121 p b s f p w
## 5141 p b s n p w
## 5151 e b s n p w
## 5157 p b s f p w
## 5165 p b s f p w
## 5173 p b s f p w
## 5176 e p s n p w
## 5181 p b s f p w
## 5194 e p s n p w
## 5197 p b s f p w
## 5198 e p y n p w
## 5207 e b s n p w
## 5216 p p y n p w
## 5217 e b y n p w
## 5222 e p y n p w
## 5224 e p s n p w
## 5229 p b s f p w
## 5253 e b y n p w
## 5255 p b s f p w
## 5283 p b s f p w
## 5291 p b s f p w
## 5321 p b s f p w
## 5349 p b s f p w
## 5351 p b s f p w
## 5370 p p s n p w
## 5372 e p s n p w
## 5373 p b s f p w
## 5383 e b y n p w
## 5386 e p s n p w
## 5388 e p s n p w
## 5393 e b s n p w
## 5406 p p y n p w
## 5411 p b s f p w
## 5425 p b s f p w
## 5427 p b s f p w
## 5444 e p y n p w
## 5461 p b s f p w
## 5465 p b y n p w
## 5480 e p y n p w
## 5482 e p s n p w
## 5491 p b s f p w
## 5494 p p y n p w
## 5501 p b s f p w
## 5504 e p s n p w
## 5505 e b y n p w
## 5526 e p y n p w
## 5527 p b y n p w
## 5541 e b y n p w
## 5551 p b s f p w
## 5553 p b s f p w
## 5558 e p y n p w
## 5567 p b s n p w
## 5570 e p s n p w
## 5575 p b s f p w
## 5578 p p y n p w
## 5588 p p y n p w
## 5594 p p y n p w
## 5597 e b s n p w
## 5619 p b s f p w
## 5625 e b y n p w
## 5650 e p y n p w
## 5661 p b s f p w
## 5670 e p y n p w
## 5671 p b s f p w
## 5679 p b s f p w
## 5687 e b s n p w
## 5692 e p y n p w
## 5712 e p s n p w
## 5713 p b s f p w
## 5716 e p y n p w
## 5717 p b s f p w
## 5735 e b s n p w
## 5744 e p y n p w
## 5745 p b y n p w
## 5773 p b s f p w
## 5779 p b s f p w
## 5782 e p s n p w
## 5789 p b s n p w
## 5790 p p s n p w
## 5807 p b s f p w
## 5809 p b s f p w
## 5863 p b s n p w
## 5881 p b s f p w
## 5887 e b y n p w
## 5897 p b s f p w
## 5907 e b s n p w
## 5908 e p s n p w
## 5917 e b y n p w
## 5928 p p s n p w
## 5935 p b s f p w
## 5938 e p y n p w
## 5947 p b s f p w
## 5975 p b s f p w
## 5993 p b y n p w
## 5997 e b y n p w
## 5999 p b s f p w
## 7064 e p s n p w
## 7354 e p y n p w
## 7706 e p s n p w
## 7920 e p y n p w
Identify expanded values for the abbreviated data in the selected cols
class_values <- c(e='edible', p='poisonous')
cap_surface_values <- c(f='fibrous',g='grooves',y='scaly',s='smooth')
cap_color_values <- c(n='brown',b='buff',c='cinnamon',g='gray',r='green', p='pink',u='purple',e='red',w='white',y='yellow')
odor_values <- c(a='almond',l='anise',c='creosote',y='fishy',f='foul', m='musty',n='none',p='pungent',s='spicy')
veil_type_values <- c( p='partial',u='universal')
veil_color_values <- c( n='brown',o='orange',w='white',y='yellow')
Replace the abbreviated values with the expanded data
mushroomSubSetDF$class[mushroomSubSetDF$class=="e"] <- class_values["e"]
mushroomSubSetDF$class[mushroomSubSetDF$class=="p"] <- class_values["p"]
mushroomSubSetDF$cap_surface[mushroomSubSetDF$cap_surface=="f"] <- cap_surface_values["f"]
mushroomSubSetDF$cap_surface[mushroomSubSetDF$cap_surface=="g"] <- cap_surface_values["g"]
mushroomSubSetDF$cap_surface[mushroomSubSetDF$cap_surface=="y"] <- cap_surface_values["y"]
mushroomSubSetDF$cap_surface[mushroomSubSetDF$cap_surface=="s"] <- cap_surface_values["s"]
mushroomSubSetDF$cap_color[mushroomSubSetDF$cap_color=="b"] <- cap_color_values["b"]
mushroomSubSetDF$cap_color[mushroomSubSetDF$cap_color=="p"] <- cap_color_values["p"]
mushroomSubSetDF$odor[mushroomSubSetDF$odor=="a"] <- odor_values["a"]
mushroomSubSetDF$odor[mushroomSubSetDF$odor=="l"] <- odor_values["l"]
mushroomSubSetDF$odor[mushroomSubSetDF$odor=="c"] <- odor_values["c"]
mushroomSubSetDF$odor[mushroomSubSetDF$odor=="y"] <- odor_values["y"]
mushroomSubSetDF$odor[mushroomSubSetDF$odor=="f"] <- odor_values["f"]
mushroomSubSetDF$odor[mushroomSubSetDF$odor=="m"] <- odor_values["m"]
mushroomSubSetDF$odor[mushroomSubSetDF$odor=="n"] <- odor_values["n"]
mushroomSubSetDF$odor[mushroomSubSetDF$odor=="p"] <- odor_values["p"]
mushroomSubSetDF$odor[mushroomSubSetDF$odor=="s"] <- odor_values["s"]
mushroomSubSetDF$veil_type[mushroomSubSetDF$veil_type=="p"] <- veil_type_values["p"]
mushroomSubSetDF$veil_type[mushroomSubSetDF$veil_type=="u"] <- veil_type_values["u"]
mushroomSubSetDF$veil_color[mushroomSubSetDF$veil_color=="n"] <- veil_color_values["n"]
mushroomSubSetDF$veil_color[mushroomSubSetDF$veil_color=="o"] <- veil_color_values["o"]
mushroomSubSetDF$veil_color[mushroomSubSetDF$veil_color=="w"] <- veil_color_values["w"]
mushroomSubSetDF$veil_color[mushroomSubSetDF$veil_color=="y"] <- veil_color_values["y"]
Created two new columns which can represent some meaningful data
mushroomSubSetDF$isPungent <- mushroomSubSetDF$odor=='pungent'
mushroomSubSetDF$isSmoothSurface <- mushroomSubSetDF$cap_surface=='smooth'
mushroomSubSetDF
## class cap_color cap_surface odor veil_type veil_color
## 2286 poisonous pink smooth creosote partial white
## 2684 poisonous pink fibrous creosote partial white
## 2842 poisonous pink fibrous creosote partial white
## 3048 poisonous pink smooth creosote partial white
## 3098 poisonous pink smooth creosote partial white
## 3182 poisonous pink fibrous creosote partial white
## 3192 poisonous pink smooth creosote partial white
## 3328 poisonous pink fibrous creosote partial white
## 3332 poisonous pink smooth creosote partial white
## 3366 poisonous pink fibrous creosote partial white
## 3372 poisonous pink smooth creosote partial white
## 3382 poisonous pink smooth creosote partial white
## 3396 poisonous pink fibrous creosote partial white
## 3404 poisonous pink smooth creosote partial white
## 3442 poisonous pink fibrous creosote partial white
## 3444 poisonous pink fibrous creosote partial white
## 3470 poisonous pink smooth creosote partial white
## 3522 poisonous pink smooth creosote partial white
## 3526 poisonous pink fibrous creosote partial white
## 3544 poisonous pink smooth creosote partial white
## 3548 poisonous pink smooth creosote partial white
## 3628 poisonous pink fibrous creosote partial white
## 3680 poisonous pink smooth creosote partial white
## 3698 poisonous pink smooth creosote partial white
## 3700 poisonous pink smooth creosote partial white
## 3704 poisonous pink fibrous creosote partial white
## 3936 poisonous pink smooth creosote partial white
## 3940 poisonous pink fibrous creosote partial white
## 3954 poisonous pink smooth creosote partial white
## 3976 poisonous pink smooth creosote partial white
## 3982 poisonous pink fibrous creosote partial white
## 3985 edible buff scaly none partial white
## 4029 poisonous buff smooth foul partial white
## 4052 poisonous pink fibrous creosote partial white
## 4107 poisonous buff smooth none partial white
## 4171 poisonous buff smooth foul partial white
## 4195 poisonous buff smooth foul partial white
## 4200 poisonous pink fibrous creosote partial white
## 4284 edible pink smooth none partial white
## 4292 edible pink smooth none partial white
## 4381 edible buff scaly none partial white
## 4535 edible buff scaly none partial white
## 4549 poisonous buff smooth foul partial white
## 4674 edible pink smooth none partial white
## 4705 poisonous buff smooth foul partial white
## 4750 poisonous pink fibrous creosote partial white
## 4778 edible pink smooth none partial white
## 4780 poisonous pink smooth creosote partial white
## 4841 poisonous buff smooth none partial white
## 4849 poisonous buff smooth foul partial white
## 4861 edible buff smooth none partial white
## 4927 edible buff smooth none partial white
## 4946 edible pink smooth none partial white
## 4951 poisonous buff smooth foul partial white
## 4959 poisonous buff smooth foul partial white
## 4965 edible buff smooth none partial white
## 4966 edible pink smooth none partial white
## 4968 poisonous pink scaly none partial white
## 5003 poisonous buff smooth none partial white
## 5045 poisonous buff smooth foul partial white
## 5053 edible buff scaly none partial white
## 5059 poisonous buff smooth foul partial white
## 5065 poisonous buff smooth foul partial white
## 5098 poisonous pink scaly none partial white
## 5114 edible pink scaly none partial white
## 5115 poisonous buff smooth foul partial white
## 5121 poisonous buff smooth foul partial white
## 5141 poisonous buff smooth none partial white
## 5151 edible buff smooth none partial white
## 5157 poisonous buff smooth foul partial white
## 5165 poisonous buff smooth foul partial white
## 5173 poisonous buff smooth foul partial white
## 5176 edible pink smooth none partial white
## 5181 poisonous buff smooth foul partial white
## 5194 edible pink smooth none partial white
## 5197 poisonous buff smooth foul partial white
## 5198 edible pink scaly none partial white
## 5207 edible buff smooth none partial white
## 5216 poisonous pink scaly none partial white
## 5217 edible buff scaly none partial white
## 5222 edible pink scaly none partial white
## 5224 edible pink smooth none partial white
## 5229 poisonous buff smooth foul partial white
## 5253 edible buff scaly none partial white
## 5255 poisonous buff smooth foul partial white
## 5283 poisonous buff smooth foul partial white
## 5291 poisonous buff smooth foul partial white
## 5321 poisonous buff smooth foul partial white
## 5349 poisonous buff smooth foul partial white
## 5351 poisonous buff smooth foul partial white
## 5370 poisonous pink smooth none partial white
## 5372 edible pink smooth none partial white
## 5373 poisonous buff smooth foul partial white
## 5383 edible buff scaly none partial white
## 5386 edible pink smooth none partial white
## 5388 edible pink smooth none partial white
## 5393 edible buff smooth none partial white
## 5406 poisonous pink scaly none partial white
## 5411 poisonous buff smooth foul partial white
## 5425 poisonous buff smooth foul partial white
## 5427 poisonous buff smooth foul partial white
## 5444 edible pink scaly none partial white
## 5461 poisonous buff smooth foul partial white
## 5465 poisonous buff scaly none partial white
## 5480 edible pink scaly none partial white
## 5482 edible pink smooth none partial white
## 5491 poisonous buff smooth foul partial white
## 5494 poisonous pink scaly none partial white
## 5501 poisonous buff smooth foul partial white
## 5504 edible pink smooth none partial white
## 5505 edible buff scaly none partial white
## 5526 edible pink scaly none partial white
## 5527 poisonous buff scaly none partial white
## 5541 edible buff scaly none partial white
## 5551 poisonous buff smooth foul partial white
## 5553 poisonous buff smooth foul partial white
## 5558 edible pink scaly none partial white
## 5567 poisonous buff smooth none partial white
## 5570 edible pink smooth none partial white
## 5575 poisonous buff smooth foul partial white
## 5578 poisonous pink scaly none partial white
## 5588 poisonous pink scaly none partial white
## 5594 poisonous pink scaly none partial white
## 5597 edible buff smooth none partial white
## 5619 poisonous buff smooth foul partial white
## 5625 edible buff scaly none partial white
## 5650 edible pink scaly none partial white
## 5661 poisonous buff smooth foul partial white
## 5670 edible pink scaly none partial white
## 5671 poisonous buff smooth foul partial white
## 5679 poisonous buff smooth foul partial white
## 5687 edible buff smooth none partial white
## 5692 edible pink scaly none partial white
## 5712 edible pink smooth none partial white
## 5713 poisonous buff smooth foul partial white
## 5716 edible pink scaly none partial white
## 5717 poisonous buff smooth foul partial white
## 5735 edible buff smooth none partial white
## 5744 edible pink scaly none partial white
## 5745 poisonous buff scaly none partial white
## 5773 poisonous buff smooth foul partial white
## 5779 poisonous buff smooth foul partial white
## 5782 edible pink smooth none partial white
## 5789 poisonous buff smooth none partial white
## 5790 poisonous pink smooth none partial white
## 5807 poisonous buff smooth foul partial white
## 5809 poisonous buff smooth foul partial white
## 5863 poisonous buff smooth none partial white
## 5881 poisonous buff smooth foul partial white
## 5887 edible buff scaly none partial white
## 5897 poisonous buff smooth foul partial white
## 5907 edible buff smooth none partial white
## 5908 edible pink smooth none partial white
## 5917 edible buff scaly none partial white
## 5928 poisonous pink smooth none partial white
## 5935 poisonous buff smooth foul partial white
## 5938 edible pink scaly none partial white
## 5947 poisonous buff smooth foul partial white
## 5975 poisonous buff smooth foul partial white
## 5993 poisonous buff scaly none partial white
## 5997 edible buff scaly none partial white
## 5999 poisonous buff smooth foul partial white
## 7064 edible pink smooth none partial white
## 7354 edible pink scaly none partial white
## 7706 edible pink smooth none partial white
## 7920 edible pink scaly none partial white
## isPungent isSmoothSurface
## 2286 FALSE TRUE
## 2684 FALSE FALSE
## 2842 FALSE FALSE
## 3048 FALSE TRUE
## 3098 FALSE TRUE
## 3182 FALSE FALSE
## 3192 FALSE TRUE
## 3328 FALSE FALSE
## 3332 FALSE TRUE
## 3366 FALSE FALSE
## 3372 FALSE TRUE
## 3382 FALSE TRUE
## 3396 FALSE FALSE
## 3404 FALSE TRUE
## 3442 FALSE FALSE
## 3444 FALSE FALSE
## 3470 FALSE TRUE
## 3522 FALSE TRUE
## 3526 FALSE FALSE
## 3544 FALSE TRUE
## 3548 FALSE TRUE
## 3628 FALSE FALSE
## 3680 FALSE TRUE
## 3698 FALSE TRUE
## 3700 FALSE TRUE
## 3704 FALSE FALSE
## 3936 FALSE TRUE
## 3940 FALSE FALSE
## 3954 FALSE TRUE
## 3976 FALSE TRUE
## 3982 FALSE FALSE
## 3985 FALSE FALSE
## 4029 FALSE TRUE
## 4052 FALSE FALSE
## 4107 FALSE TRUE
## 4171 FALSE TRUE
## 4195 FALSE TRUE
## 4200 FALSE FALSE
## 4284 FALSE TRUE
## 4292 FALSE TRUE
## 4381 FALSE FALSE
## 4535 FALSE FALSE
## 4549 FALSE TRUE
## 4674 FALSE TRUE
## 4705 FALSE TRUE
## 4750 FALSE FALSE
## 4778 FALSE TRUE
## 4780 FALSE TRUE
## 4841 FALSE TRUE
## 4849 FALSE TRUE
## 4861 FALSE TRUE
## 4927 FALSE TRUE
## 4946 FALSE TRUE
## 4951 FALSE TRUE
## 4959 FALSE TRUE
## 4965 FALSE TRUE
## 4966 FALSE TRUE
## 4968 FALSE FALSE
## 5003 FALSE TRUE
## 5045 FALSE TRUE
## 5053 FALSE FALSE
## 5059 FALSE TRUE
## 5065 FALSE TRUE
## 5098 FALSE FALSE
## 5114 FALSE FALSE
## 5115 FALSE TRUE
## 5121 FALSE TRUE
## 5141 FALSE TRUE
## 5151 FALSE TRUE
## 5157 FALSE TRUE
## 5165 FALSE TRUE
## 5173 FALSE TRUE
## 5176 FALSE TRUE
## 5181 FALSE TRUE
## 5194 FALSE TRUE
## 5197 FALSE TRUE
## 5198 FALSE FALSE
## 5207 FALSE TRUE
## 5216 FALSE FALSE
## 5217 FALSE FALSE
## 5222 FALSE FALSE
## 5224 FALSE TRUE
## 5229 FALSE TRUE
## 5253 FALSE FALSE
## 5255 FALSE TRUE
## 5283 FALSE TRUE
## 5291 FALSE TRUE
## 5321 FALSE TRUE
## 5349 FALSE TRUE
## 5351 FALSE TRUE
## 5370 FALSE TRUE
## 5372 FALSE TRUE
## 5373 FALSE TRUE
## 5383 FALSE FALSE
## 5386 FALSE TRUE
## 5388 FALSE TRUE
## 5393 FALSE TRUE
## 5406 FALSE FALSE
## 5411 FALSE TRUE
## 5425 FALSE TRUE
## 5427 FALSE TRUE
## 5444 FALSE FALSE
## 5461 FALSE TRUE
## 5465 FALSE FALSE
## 5480 FALSE FALSE
## 5482 FALSE TRUE
## 5491 FALSE TRUE
## 5494 FALSE FALSE
## 5501 FALSE TRUE
## 5504 FALSE TRUE
## 5505 FALSE FALSE
## 5526 FALSE FALSE
## 5527 FALSE FALSE
## 5541 FALSE FALSE
## 5551 FALSE TRUE
## 5553 FALSE TRUE
## 5558 FALSE FALSE
## 5567 FALSE TRUE
## 5570 FALSE TRUE
## 5575 FALSE TRUE
## 5578 FALSE FALSE
## 5588 FALSE FALSE
## 5594 FALSE FALSE
## 5597 FALSE TRUE
## 5619 FALSE TRUE
## 5625 FALSE FALSE
## 5650 FALSE FALSE
## 5661 FALSE TRUE
## 5670 FALSE FALSE
## 5671 FALSE TRUE
## 5679 FALSE TRUE
## 5687 FALSE TRUE
## 5692 FALSE FALSE
## 5712 FALSE TRUE
## 5713 FALSE TRUE
## 5716 FALSE FALSE
## 5717 FALSE TRUE
## 5735 FALSE TRUE
## 5744 FALSE FALSE
## 5745 FALSE FALSE
## 5773 FALSE TRUE
## 5779 FALSE TRUE
## 5782 FALSE TRUE
## 5789 FALSE TRUE
## 5790 FALSE TRUE
## 5807 FALSE TRUE
## 5809 FALSE TRUE
## 5863 FALSE TRUE
## 5881 FALSE TRUE
## 5887 FALSE FALSE
## 5897 FALSE TRUE
## 5907 FALSE TRUE
## 5908 FALSE TRUE
## 5917 FALSE FALSE
## 5928 FALSE TRUE
## 5935 FALSE TRUE
## 5938 FALSE FALSE
## 5947 FALSE TRUE
## 5975 FALSE TRUE
## 5993 FALSE FALSE
## 5997 FALSE FALSE
## 5999 FALSE TRUE
## 7064 FALSE TRUE
## 7354 FALSE FALSE
## 7706 FALSE TRUE
## 7920 FALSE FALSE