The iris dataset is a built-in dataset in R that contains measurements on 4 different attributes (in cm) for 50 flowers from 3 different species.
data=iris
summary(data)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
str(data)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.2.1
ggplot(data = iris) + labs(title ="IRIS data plot")
ggplot(data = iris, aes(x =Sepal.Length , y =Sepal.Width))+labs(title = "Iris Data Plot")
ggplot(data = iris, aes(x =Sepal.Length , y =Sepal.Width)) +
geom_point() +
labs(title = " Sepal.Length vs Sepal.Width", x = "Sepal.Length", y = "Sepal.Width")
ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width, size = Petal.Length)) +
geom_point() +
labs(title = "Sepal Width vs Sepal Length", x = "Sepal Length", y = "Sepal Width")
ggplot(data = iris, aes(x = Sepal.Length, y = Sepal.Width, col=factor(Petal.Length), shape = factor( Petal.Width))) +
geom_point() +
labs(title = "Sepal Width vs Sepal Length", x = "Sepal Length", y = "Sepal Width")
## Warning: The shape palette can deal with a maximum of 6 discrete values because
## more than 6 becomes difficult to discriminate; you have 22. Consider
## specifying shapes manually if you must have them.
## Warning: Removed 100 rows containing missing values (geom_point).
Species.type=table(iris$Species)
color=c("pink","black","grey")
SLS=table(iris$Sepal.Length,iris$Species)
barplot(Species.type,main="Species frequency",xlab="Species",ylab="fREQUENCY OF SPECIES",names.arg=names(Species.type),col=color,legend=rownames(SLS))
iris$Species<-factor(iris$Species)
ggplot(iris, aes(x = factor(Species), y = Sepal.Length)) +
geom_point()
ggplot(data = iris, aes(x = Sepal.Length)) +
geom_histogram(binwidth = 5,color="black", fill="lightblue") +
labs(title = "Histogram of Sepal.Length", x = "Sepal.Length", y = "Count")
ggplot(data = iris, aes(x=as.factor(Species), fill=Species)) +
geom_bar(stat="count")
Species.type = table(iris$Species)
SepalLength.Species = table(iris$Sepal.Length, iris$Species)
barplot(Species.type, main="Species Frequency", xlab="Species",ylab="Frequency of Species",names.arg=names(Species.type),col=c("black","yellow","blue"),legend = rownames(SepalLength.Species))
Length = table(iris$Species)
data.labels = names(Length)
share = round(Length/sum(Length)*100)
data.labels = paste(data.labels, share)
data.labels = paste(data.labels,"%",sep="")
pie(Length,labels = data.labels,clockwise=TRUE, col=heat.colors(length(data.labels)), main="Frequency of Species")
bx <- ggplot(data = iris, aes(x = factor(Species), y = Sepal.Width )) +
geom_boxplot(fill = "pink") +
ggtitle("Distribution of Sepal Length") +
ylab("Width") +
xlab("Length")
bx
Model <- lm(Sepal.Length ~ Petal.Length, data = iris)
iris$Species <- as.factor(iris$Petal.Length)
plot(iris$Petal.Length, iris$Sepal.Length, col = iris$Species)
abline(Model, lty = 2)
ggplot(iris, aes(x = as.factor(Species), y = Sepal.Width, col = Species)) +
geom_jitter() +
facet_grid(. ~ Species)