IRIS DATASET

The iris dataset is a built-in dataset in R that contains measurements on 4 different attributes (in cm) for 50 flowers from 3 different species.

data=iris
summary(data)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
str(data)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.1
ggplot(data = iris) + labs(title ="IRIS data plot")

ggplot(data = iris, aes(x =Sepal.Length , y =Sepal.Width))+labs(title = "Iris Data Plot")

ggplot(data = iris, aes(x =Sepal.Length , y =Sepal.Width)) +
  geom_point() +
  labs(title = " Sepal.Length vs Sepal.Width", x = "Sepal.Length", y = "Sepal.Width")

ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width, size = Petal.Length)) +
  geom_point() +
  labs(title = "Sepal Width vs Sepal Length", x = "Sepal Length", y = "Sepal Width")

ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width, col=factor(Petal.Length), shape = factor( Petal.Width))) +
  geom_point() +
  labs(title = "Sepal Width vs Sepal Length", x = "Sepal Length", y = "Sepal Width")
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 22. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 100 rows containing missing values (geom_point).

Species.type=table(iris$Species)
color=c("pink","black","grey")
SLS=table(iris$Sepal.Length,iris$Species)
barplot(Species.type,main="Species frequency",xlab="Species",ylab="fREQUENCY OF SPECIES",names.arg=names(Species.type),col=color,legend=rownames(SLS))

iris$Species<-factor(iris$Species)
ggplot(iris, aes(x = factor(Species), y = Sepal.Length)) +
  geom_point()

ggplot(data = iris, aes(x = Sepal.Length)) +
geom_histogram(binwidth = 5,color="black", fill="lightblue") +
labs(title = "Histogram of Sepal.Length", x = "Sepal.Length", y = "Count")

ggplot(data = iris, aes(x=as.factor(Species), fill=Species)) + 
       geom_bar(stat="count")

Species.type = table(iris$Species)
SepalLength.Species = table(iris$Sepal.Length, iris$Species)
barplot(Species.type, main="Species Frequency", xlab="Species",ylab="Frequency of Species",names.arg=names(Species.type),col=c("black","yellow","blue"),legend = rownames(SepalLength.Species))

Length = table(iris$Species)
data.labels = names(Length)
share = round(Length/sum(Length)*100)
data.labels = paste(data.labels, share)
data.labels = paste(data.labels,"%",sep="") 
pie(Length,labels = data.labels,clockwise=TRUE, col=heat.colors(length(data.labels)), main="Frequency of Species")

bx <- ggplot(data = iris, aes(x = factor(Species), y = Sepal.Width )) + 
  geom_boxplot(fill = "pink") + 
  ggtitle("Distribution of Sepal Length") +
  ylab("Width") + 
  xlab("Length") 
bx

Model <- lm(Sepal.Length ~ Petal.Length, data = iris)
iris$Species <- as.factor(iris$Petal.Length)
plot(iris$Petal.Length, iris$Sepal.Length, col = iris$Species)
abline(Model, lty = 2)

ggplot(iris, aes(x = as.factor(Species), y = Sepal.Width, col = Species)) +
  geom_jitter() +
  facet_grid(. ~ Species)