
library(ggplot2) # Data visualization
library(readr) # CSV file I/O, e.g. the read_csv function
library(gridExtra)
library(grid)
library(plyr)
# Load the dataset
iris=read.csv('assets/iris.csv')
# First let's get a random sampling of the data
iris[sample(nrow(iris),10),]
NA
# Density & Frequency analysis with the Histogram,
# Sepal length
HisSl <- ggplot(data=iris, aes(x=SepalLengthCm))+
geom_histogram(binwidth=0.2, color="black", aes(fill=Species)) +
xlab("Sepal Length (cm)") +
ylab("Frequency") +
theme(legend.position="none")+
ggtitle("Histogram of Sepal Length")+
geom_vline(data=iris, aes(xintercept = mean(SepalLengthCm)),linetype="dashed",color="grey")
# Sepal width
HistSw <- ggplot(data=iris, aes(x=SepalWidthCm)) +
geom_histogram(binwidth=0.2, color="black", aes(fill=Species)) +
xlab("Sepal Width (cm)") +
ylab("Frequency") +
theme(legend.position="none")+
ggtitle("Histogram of Sepal Width")+
geom_vline(data=iris, aes(xintercept = mean(SepalWidthCm)),linetype="dashed",color="grey")
# Petal length
HistPl <- ggplot(data=iris, aes(x=PetalLengthCm))+
geom_histogram(binwidth=0.2, color="black", aes(fill=Species)) +
xlab("Petal Length (cm)") +
ylab("Frequency") +
theme(legend.position="none")+
ggtitle("Histogram of Petal Length")+
geom_vline(data=iris, aes(xintercept = mean(PetalLengthCm)),
linetype="dashed",color="grey")
# Petal width
HistPw <- ggplot(data=iris, aes(x=PetalWidthCm))+
geom_histogram(binwidth=0.2, color="black", aes(fill=Species)) +
xlab("Petal Width (cm)") +
ylab("Frequency") +
theme(legend.position="right" )+
ggtitle("Histogram of Petal Width")+
geom_vline(data=iris, aes(xintercept = mean(PetalWidthCm)),linetype="dashed",color="grey")
# Plot all visualizations
grid.arrange(HisSl + ggtitle(""),
HistSw + ggtitle(""),
HistPl + ggtitle(""),
HistPw + ggtitle(""),
nrow = 2,
top = textGrob("Iris Frequency Histogram",
gp=gpar(fontsize=15))
)

# Notice the shape of the data, most attributes exhibit a normal distribution.
# You can see the measurements of very small flowers in the Petal width and length column.
# We can review the density distribution of each attribute broken down by class value.
# Like the scatterplot matrix, the density plot by class can help see the separation of classes.
# It can also help to understand the overlap in class values for an attribute.
DhistPl <- ggplot(iris, aes(x=PetalLengthCm, colour=Species, fill=Species)) +
geom_density(alpha=.3) +
geom_vline(aes(xintercept=mean(PetalLengthCm), colour=Species),linetype="dashed",color="grey", size=1)+
xlab("Petal Length (cm)") +
ylab("Density")+
theme(legend.position="none")
DhistPw <- ggplot(iris, aes(x=PetalWidthCm, colour=Species, fill=Species)) +
geom_density(alpha=.3) +
geom_vline(aes(xintercept=mean(PetalWidthCm), colour=Species),linetype="dashed",color="grey", size=1)+
xlab("Petal Width (cm)") +
ylab("Density")
DhistSw <- ggplot(iris, aes(x=SepalWidthCm, colour=Species, fill=Species)) +
geom_density(alpha=.3) +
geom_vline(aes(xintercept=mean(SepalWidthCm), colour=Species), linetype="dashed",color="grey", size=1)+
xlab("Sepal Width (cm)") +
ylab("Density")+
theme(legend.position="none")
DhistSl <- ggplot(iris, aes(x=SepalLengthCm, colour=Species, fill=Species)) +
geom_density(alpha=.3) +
geom_vline(aes(xintercept=mean(SepalLengthCm), colour=Species),linetype="dashed", color="grey", size=1)+
xlab("Sepal Length (cm)") +
ylab("Density")+
theme(legend.position="none")
# Plot all density visualizations
grid.arrange(DhistSl + ggtitle(""),
DhistSw + ggtitle(""),
DhistPl + ggtitle(""),
DhistPw + ggtitle(""),
nrow = 2,
top = textGrob("Iris Density Plot",
gp=gpar(fontsize=15))
)

# Let's plot all the variables in a single visualization that will contain all the boxplots
BpSl <- ggplot(iris, aes(Species, SepalLengthCm, fill=Species)) +
geom_boxplot()+
scale_y_continuous("Sepal Length (cm)", breaks= seq(0,30, by=.5))+
theme(legend.position="none")
BpSw <- ggplot(iris, aes(Species, SepalWidthCm, fill=Species)) +
geom_boxplot()+
scale_y_continuous("Sepal Width (cm)", breaks= seq(0,30, by=.5))+
theme(legend.position="none")
BpPl <- ggplot(iris, aes(Species, PetalLengthCm, fill=Species)) +
geom_boxplot()+
scale_y_continuous("Petal Length (cm)", breaks= seq(0,30, by=.5))+
theme(legend.position="none")
BpPw <- ggplot(iris, aes(Species, PetalWidthCm, fill=Species)) +
geom_boxplot()+
scale_y_continuous("Petal Width (cm)", breaks= seq(0,30, by=.5))+
labs(title = "Iris Box Plot", x = "Species")
# Plot all visualizations
grid.arrange(BpSl + ggtitle(""),
BpSw + ggtitle(""),
BpPl + ggtitle(""),
BpPw + ggtitle(""),
nrow = 2,
top = textGrob("Sepal and Petal Box Plot",
gp=gpar(fontsize=15))
)

# Source: https://www.kaggle.com/antoniolopez/iris-data-visualization-with-r/data
LS0tCnRpdGxlOiAiSXJpcyBEYXRhc2V0IChTY2h3ZXJ0bGlsaWVuKSIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKIVtdKGFzc2V0cy9zZXBhbF9wZXRhbC5wbmcpCiFbXShhc3NldHMvc3BlY2llcy5wbmcpCgpgYGB7cn0KCmxpYnJhcnkoZ2dwbG90MikgIyBEYXRhIHZpc3VhbGl6YXRpb24KbGlicmFyeShyZWFkcikgIyBDU1YgZmlsZSBJL08sIGUuZy4gdGhlIHJlYWRfY3N2IGZ1bmN0aW9uCmxpYnJhcnkoZ3JpZEV4dHJhKQpsaWJyYXJ5KGdyaWQpCmxpYnJhcnkocGx5cikKCiMgTG9hZCB0aGUgZGF0YXNldAppcmlzPXJlYWQuY3N2KCdhc3NldHMvaXJpcy5jc3YnKQoKCiMgRmlyc3QgbGV0J3MgZ2V0IGEgcmFuZG9tIHNhbXBsaW5nIG9mIHRoZSBkYXRhCmlyaXNbc2FtcGxlKG5yb3coaXJpcyksMTApLF0KCmBgYAoKYGBge3J9CgojIERlbnNpdHkgJiBGcmVxdWVuY3kgYW5hbHlzaXMgd2l0aCB0aGUgSGlzdG9ncmFtLAoKIyBTZXBhbCBsZW5ndGggCkhpc1NsIDwtIGdncGxvdChkYXRhPWlyaXMsIGFlcyh4PVNlcGFsTGVuZ3RoQ20pKSsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aD0wLjIsIGNvbG9yPSJibGFjayIsIGFlcyhmaWxsPVNwZWNpZXMpKSArIAogIHhsYWIoIlNlcGFsIExlbmd0aCAoY20pIikgKyAgCiAgeWxhYigiRnJlcXVlbmN5IikgKyAKICB0aGVtZShsZWdlbmQucG9zaXRpb249Im5vbmUiKSsKICBnZ3RpdGxlKCJIaXN0b2dyYW0gb2YgU2VwYWwgTGVuZ3RoIikrCiAgZ2VvbV92bGluZShkYXRhPWlyaXMsIGFlcyh4aW50ZXJjZXB0ID0gbWVhbihTZXBhbExlbmd0aENtKSksbGluZXR5cGU9ImRhc2hlZCIsY29sb3I9ImdyZXkiKQoKCiMgU2VwYWwgd2lkdGgKSGlzdFN3IDwtIGdncGxvdChkYXRhPWlyaXMsIGFlcyh4PVNlcGFsV2lkdGhDbSkpICsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aD0wLjIsIGNvbG9yPSJibGFjayIsIGFlcyhmaWxsPVNwZWNpZXMpKSArIAogIHhsYWIoIlNlcGFsIFdpZHRoIChjbSkiKSArICAKICB5bGFiKCJGcmVxdWVuY3kiKSArIAogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbj0ibm9uZSIpKwogIGdndGl0bGUoIkhpc3RvZ3JhbSBvZiBTZXBhbCBXaWR0aCIpKwogIGdlb21fdmxpbmUoZGF0YT1pcmlzLCBhZXMoeGludGVyY2VwdCA9IG1lYW4oU2VwYWxXaWR0aENtKSksbGluZXR5cGU9ImRhc2hlZCIsY29sb3I9ImdyZXkiKQoKCiMgUGV0YWwgbGVuZ3RoCkhpc3RQbCA8LSBnZ3Bsb3QoZGF0YT1pcmlzLCBhZXMoeD1QZXRhbExlbmd0aENtKSkrCiAgZ2VvbV9oaXN0b2dyYW0oYmlud2lkdGg9MC4yLCBjb2xvcj0iYmxhY2siLCBhZXMoZmlsbD1TcGVjaWVzKSkgKyAKICB4bGFiKCJQZXRhbCBMZW5ndGggKGNtKSIpICsgIAogIHlsYWIoIkZyZXF1ZW5jeSIpICsgCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikrCiAgZ2d0aXRsZSgiSGlzdG9ncmFtIG9mIFBldGFsIExlbmd0aCIpKwogIGdlb21fdmxpbmUoZGF0YT1pcmlzLCBhZXMoeGludGVyY2VwdCA9IG1lYW4oUGV0YWxMZW5ndGhDbSkpLAogICAgICAgICAgICAgbGluZXR5cGU9ImRhc2hlZCIsY29sb3I9ImdyZXkiKQoKCgojIFBldGFsIHdpZHRoCkhpc3RQdyA8LSBnZ3Bsb3QoZGF0YT1pcmlzLCBhZXMoeD1QZXRhbFdpZHRoQ20pKSsKICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aD0wLjIsIGNvbG9yPSJibGFjayIsIGFlcyhmaWxsPVNwZWNpZXMpKSArIAogIHhsYWIoIlBldGFsIFdpZHRoIChjbSkiKSArICAKICB5bGFiKCJGcmVxdWVuY3kiKSArIAogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbj0icmlnaHQiICkrCiAgZ2d0aXRsZSgiSGlzdG9ncmFtIG9mIFBldGFsIFdpZHRoIikrCiAgZ2VvbV92bGluZShkYXRhPWlyaXMsIGFlcyh4aW50ZXJjZXB0ID0gbWVhbihQZXRhbFdpZHRoQ20pKSxsaW5ldHlwZT0iZGFzaGVkIixjb2xvcj0iZ3JleSIpCgoKIyBQbG90IGFsbCB2aXN1YWxpemF0aW9ucwpncmlkLmFycmFuZ2UoSGlzU2wgKyBnZ3RpdGxlKCIiKSwKICAgICAgICAgICAgIEhpc3RTdyArIGdndGl0bGUoIiIpLAogICAgICAgICAgICAgSGlzdFBsICsgZ2d0aXRsZSgiIiksCiAgICAgICAgICAgICBIaXN0UHcgICsgZ2d0aXRsZSgiIiksCiAgICAgICAgICAgICBucm93ID0gMiwKICAgICAgICAgICAgIHRvcCA9IHRleHRHcm9iKCJJcmlzIEZyZXF1ZW5jeSBIaXN0b2dyYW0iLCAKICAgICAgICAgICAgICAgICAgICAgICAgICAgIGdwPWdwYXIoZm9udHNpemU9MTUpKQopCmBgYAoKCgoKYGBge3J9CiMgTm90aWNlIHRoZSBzaGFwZSBvZiB0aGUgZGF0YSwgbW9zdCBhdHRyaWJ1dGVzIGV4aGliaXQgYSBub3JtYWwgZGlzdHJpYnV0aW9uLiAKIyBZb3UgY2FuIHNlZSB0aGUgbWVhc3VyZW1lbnRzIG9mIHZlcnkgc21hbGwgZmxvd2VycyBpbiB0aGUgUGV0YWwgd2lkdGggYW5kIGxlbmd0aCBjb2x1bW4uCgoKIyBXZSBjYW4gcmV2aWV3IHRoZSBkZW5zaXR5IGRpc3RyaWJ1dGlvbiBvZiBlYWNoIGF0dHJpYnV0ZSBicm9rZW4gZG93biBieSBjbGFzcyB2YWx1ZS4gCiMgTGlrZSB0aGUgc2NhdHRlcnBsb3QgbWF0cml4LCB0aGUgZGVuc2l0eSBwbG90IGJ5IGNsYXNzIGNhbiBoZWxwIHNlZSB0aGUgc2VwYXJhdGlvbiBvZiBjbGFzc2VzLiAKIyBJdCBjYW4gYWxzbyBoZWxwIHRvIHVuZGVyc3RhbmQgdGhlIG92ZXJsYXAgaW4gY2xhc3MgdmFsdWVzIGZvciBhbiBhdHRyaWJ1dGUuCgpEaGlzdFBsIDwtICAgIGdncGxvdChpcmlzLCBhZXMoeD1QZXRhbExlbmd0aENtLCBjb2xvdXI9U3BlY2llcywgZmlsbD1TcGVjaWVzKSkgKwogIGdlb21fZGVuc2l0eShhbHBoYT0uMykgKwogIGdlb21fdmxpbmUoYWVzKHhpbnRlcmNlcHQ9bWVhbihQZXRhbExlbmd0aENtKSwgIGNvbG91cj1TcGVjaWVzKSxsaW5ldHlwZT0iZGFzaGVkIixjb2xvcj0iZ3JleSIsIHNpemU9MSkrCiAgeGxhYigiUGV0YWwgTGVuZ3RoIChjbSkiKSArICAKICB5bGFiKCJEZW5zaXR5IikrCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikKCkRoaXN0UHcgPC0gZ2dwbG90KGlyaXMsIGFlcyh4PVBldGFsV2lkdGhDbSwgY29sb3VyPVNwZWNpZXMsIGZpbGw9U3BlY2llcykpICsKICBnZW9tX2RlbnNpdHkoYWxwaGE9LjMpICsKICBnZW9tX3ZsaW5lKGFlcyh4aW50ZXJjZXB0PW1lYW4oUGV0YWxXaWR0aENtKSwgIGNvbG91cj1TcGVjaWVzKSxsaW5ldHlwZT0iZGFzaGVkIixjb2xvcj0iZ3JleSIsIHNpemU9MSkrCiAgeGxhYigiUGV0YWwgV2lkdGggKGNtKSIpICsgIAogIHlsYWIoIkRlbnNpdHkiKQogIAoKCkRoaXN0U3cgPC0gZ2dwbG90KGlyaXMsIGFlcyh4PVNlcGFsV2lkdGhDbSwgY29sb3VyPVNwZWNpZXMsIGZpbGw9U3BlY2llcykpICsKICBnZW9tX2RlbnNpdHkoYWxwaGE9LjMpICsKICBnZW9tX3ZsaW5lKGFlcyh4aW50ZXJjZXB0PW1lYW4oU2VwYWxXaWR0aENtKSwgIGNvbG91cj1TcGVjaWVzKSwgbGluZXR5cGU9ImRhc2hlZCIsY29sb3I9ImdyZXkiLCBzaXplPTEpKwogIHhsYWIoIlNlcGFsIFdpZHRoIChjbSkiKSArICAKICB5bGFiKCJEZW5zaXR5IikrCiAgdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikKCgpEaGlzdFNsIDwtIGdncGxvdChpcmlzLCBhZXMoeD1TZXBhbExlbmd0aENtLCBjb2xvdXI9U3BlY2llcywgZmlsbD1TcGVjaWVzKSkgKwogIGdlb21fZGVuc2l0eShhbHBoYT0uMykgKwogIGdlb21fdmxpbmUoYWVzKHhpbnRlcmNlcHQ9bWVhbihTZXBhbExlbmd0aENtKSwgIGNvbG91cj1TcGVjaWVzKSxsaW5ldHlwZT0iZGFzaGVkIiwgY29sb3I9ImdyZXkiLCBzaXplPTEpKwogIHhsYWIoIlNlcGFsIExlbmd0aCAoY20pIikgKyAgCiAgeWxhYigiRGVuc2l0eSIpKwogIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbj0ibm9uZSIpCgoKIyBQbG90IGFsbCBkZW5zaXR5IHZpc3VhbGl6YXRpb25zCmdyaWQuYXJyYW5nZShEaGlzdFNsICsgZ2d0aXRsZSgiIiksCiAgICAgICAgICAgICBEaGlzdFN3ICArIGdndGl0bGUoIiIpLAogICAgICAgICAgICAgRGhpc3RQbCArIGdndGl0bGUoIiIpLAogICAgICAgICAgICAgRGhpc3RQdyAgKyBnZ3RpdGxlKCIiKSwKICAgICAgICAgICAgIG5yb3cgPSAyLAogICAgICAgICAgICAgdG9wID0gdGV4dEdyb2IoIklyaXMgRGVuc2l0eSBQbG90IiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBncD1ncGFyKGZvbnRzaXplPTE1KSkKKQpgYGAKCgpgYGB7cn0KIyBMZXQncyBwbG90IGFsbCB0aGUgdmFyaWFibGVzIGluIGEgc2luZ2xlIHZpc3VhbGl6YXRpb24gdGhhdCB3aWxsIGNvbnRhaW4gYWxsIHRoZSBib3hwbG90cwoKCkJwU2wgPC0gZ2dwbG90KGlyaXMsIGFlcyhTcGVjaWVzLCBTZXBhbExlbmd0aENtLCBmaWxsPVNwZWNpZXMpKSArIAogICAgICAgIGdlb21fYm94cGxvdCgpKwogICAgICAgIHNjYWxlX3lfY29udGludW91cygiU2VwYWwgTGVuZ3RoIChjbSkiLCBicmVha3M9IHNlcSgwLDMwLCBieT0uNSkpKwogICAgICAgIHRoZW1lKGxlZ2VuZC5wb3NpdGlvbj0ibm9uZSIpCgoKCkJwU3cgPC0gIGdncGxvdChpcmlzLCBhZXMoU3BlY2llcywgU2VwYWxXaWR0aENtLCBmaWxsPVNwZWNpZXMpKSArIAogICAgICAgICAgZ2VvbV9ib3hwbG90KCkrCiAgICAgICAgICBzY2FsZV95X2NvbnRpbnVvdXMoIlNlcGFsIFdpZHRoIChjbSkiLCBicmVha3M9IHNlcSgwLDMwLCBieT0uNSkpKwogICAgICAgICAgdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikKCgoKQnBQbCA8LSBnZ3Bsb3QoaXJpcywgYWVzKFNwZWNpZXMsIFBldGFsTGVuZ3RoQ20sIGZpbGw9U3BlY2llcykpICsgCiAgICAgICAgZ2VvbV9ib3hwbG90KCkrCiAgICAgICAgc2NhbGVfeV9jb250aW51b3VzKCJQZXRhbCBMZW5ndGggKGNtKSIsIGJyZWFrcz0gc2VxKDAsMzAsIGJ5PS41KSkrCiAgICAgICAgdGhlbWUobGVnZW5kLnBvc2l0aW9uPSJub25lIikKICAgICAgICAKCgpCcFB3IDwtICBnZ3Bsb3QoaXJpcywgYWVzKFNwZWNpZXMsIFBldGFsV2lkdGhDbSwgZmlsbD1TcGVjaWVzKSkgKyAKICAgICAgICBnZW9tX2JveHBsb3QoKSsKICAgICAgICBzY2FsZV95X2NvbnRpbnVvdXMoIlBldGFsIFdpZHRoIChjbSkiLCBicmVha3M9IHNlcSgwLDMwLCBieT0uNSkpKwogICAgICAgIGxhYnModGl0bGUgPSAiSXJpcyBCb3ggUGxvdCIsIHggPSAiU3BlY2llcyIpCgoKCiMgUGxvdCBhbGwgdmlzdWFsaXphdGlvbnMKZ3JpZC5hcnJhbmdlKEJwU2wgICsgZ2d0aXRsZSgiIiksCiAgICAgICAgICAgICBCcFN3ICArIGdndGl0bGUoIiIpLAogICAgICAgICAgICAgQnBQbCArIGdndGl0bGUoIiIpLAogICAgICAgICAgICAgQnBQdyArIGdndGl0bGUoIiIpLAogICAgICAgICAgICAgbnJvdyA9IDIsCiAgICAgICAgICAgICB0b3AgPSB0ZXh0R3JvYigiU2VwYWwgYW5kIFBldGFsIEJveCBQbG90IiwgCiAgICAgICAgICAgICAgICAgICAgICAgICAgICBncD1ncGFyKGZvbnRzaXplPTE1KSkKKQpgYGAKCgoKYGBge3J9CiMgU291cmNlOiBodHRwczovL3d3dy5rYWdnbGUuY29tL2FudG9uaW9sb3Blei9pcmlzLWRhdGEtdmlzdWFsaXphdGlvbi13aXRoLXIvZGF0YQpgYGAK