#introduction
#load the inbuilt dataset
data("iris")
#display the first few rows
head(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
# Question 1
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(iris)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
# Question 2
dim(iris)
## [1] 150 5
nrow(iris)
## [1] 150
ncol(iris)
## [1] 5
# Question 3
unique(iris$Species)
## [1] setosa versicolor virginica
## Levels: setosa versicolor virginica
table(iris$Species)
##
## setosa versicolor virginica
## 50 50 50
# Question 4
sapply(iris[, 1:4], function(x) c(mean=mean(x), median=median(x), sd=sd(x)))
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## mean 5.8433333 3.0573333 3.758000 1.1993333
## median 5.8000000 3.0000000 4.350000 1.3000000
## sd 0.8280661 0.4358663 1.765298 0.7622377
# Question 5
cor(iris[, 1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Sepal.Length 1.0000000 -0.1175698 0.8717538 0.8179411
## Sepal.Width -0.1175698 1.0000000 -0.4284401 -0.3661259
## Petal.Length 0.8717538 -0.4284401 1.0000000 0.9628654
## Petal.Width 0.8179411 -0.3661259 0.9628654 1.0000000
# Question 6
pairs(iris[, 1:4], col=iris$Species, main="Pairwise Scatterplot of Iris Features")
# Question 7
boxplot(Sepal.Length ~ Species, data=iris,
main="Sepal Length by Species",
col=c("lightblue", "lightgreen", "lightpink"))
# Question 8
sum(is.na(iris))
## [1] 0
# Question 9
aggregate(. ~ Species, data=iris, FUN=mean)
## Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 setosa 5.006 3.428 1.462 0.246
## 2 versicolor 5.936 2.770 4.260 1.326
## 3 virginica 6.588 2.974 5.552 2.026
aggregate(. ~ Species, data=iris, FUN=sd)
## Species Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1 setosa 0.3524897 0.3790644 0.1736640 0.1053856
## 2 versicolor 0.5161711 0.3137983 0.4699110 0.1977527
## 3 virginica 0.6358796 0.3224966 0.5518947 0.2746501
# Question 10
plot(iris$Petal.Length, iris$Petal.Width,
col=iris$Species, pch=19,
xlab="Petal Length", ylab="Petal Width",
main="Petal Length vs Petal Width by Species")
legend("topleft", legend=levels(iris$Species),
col=1:3, pch=19)