IRIS DATA-SET

The iris dataset is a built-in dataset in R that contains measurements on 4 different attributes (in centimeters) for 50 flowers from 3 different species.

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

#Provides the summary of the given dataset

data=iris
summary(iris)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 
str(data)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.3
ggplot(data=iris)+labs(title="Iris Data Plot")

ggplot(data = iris, aes(x = Petal.Length, y = Petal.Width, col = Sepal.Length))+labs(title = "Iris Data Plot")

ggplot(data = iris, aes(x = Petal.Length, y = Petal.Width, col = Sepal.Length)) +
  geom_point() +
  labs(title = "Petal Width vs Petal Length", x = "Petal Length", y = "Petal Width")

ggplot(data = iris, aes(x = Petal.Length, y = Petal.Width, size = Sepal.Length)) +
  geom_point() +
  labs(title = " Petal Width vs  Petal Length", x = " Petal Length", y = " Petal Width")

ggplot(data = iris, aes(x =  Petal.Length, y =  Petal.Width, col=factor(Sepal.Length), shape = factor( Sepal.Width))) +
  geom_point() +
  labs(title = " Petal Width vs  Petal Length", x = " Petal Length", y = " Petal Width")
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 23. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 126 rows containing missing values (`geom_point()`).

iris$Species<-factor(iris$Species)
ggplot(iris, aes(x = factor(Species), y =  Petal.Length)) +
  geom_point()

ggplot(data = iris, aes(x =  Petal.Length)) +
geom_histogram(binwidth = 6,color="black", fill="purple") +
labs(title = "Histogram of Petal.Length", x = " Petal.Length", y = "Count")

ggplot(data = iris, aes(x=as.factor(Species), fill=Species)) + 
       geom_bar(stat="count")

Species.type = table(iris$Species)
PetalLength.Species = table(iris$ Petal.Length, iris$Species)
barplot(Species.type, main="Species Frequency", xlab="Species",ylab="Frequency of Species",names.arg=names(Species.type),col=c("skyblue","lightgreen","orange"),legend = rownames( PetalLength.Species))

Length = table(iris$Species)
data.labels = names(Length)
share = round(Length/sum(Length)*100)
data.labels = paste(data.labels, share)
data.labels = paste(data.labels,"%",sep="") 
pie(Length,labels = data.labels,clockwise=TRUE, col=heat.colors(length(data.labels)), main="Frequency of Species")

bx <- ggplot(data = iris, aes(x = factor(Species), y =  Petal.Width )) + 
  geom_boxplot(fill = "purple") + 
  ggtitle("Distribution of Sepal Length") +
  ylab("Width") + 
  xlab("Length") 
bx

Model <- lm( Petal.Length ~ Sepal.Length, data = iris)
iris$Species <- as.factor(iris$Sepal.Length)
plot(iris$Sepal.Length, iris$ Petal.Length, col = iris$Species)
abline(Model, lty = 2)

ggplot(iris, aes(x = as.factor(Species), y =  Petal.Width, col = Species)) +
  geom_jitter() +
  facet_grid(. ~ Species)

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.