Import packages
library(dplyr)
library(ggplot2)
library(corrplot)
part a
data("iris")
attach(iris)
head(iris, 10)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
tail(iris, 10)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 141 6.7 3.1 5.6 2.4 virginica
## 142 6.9 3.1 5.1 2.3 virginica
## 143 5.8 2.7 5.1 1.9 virginica
## 144 6.8 3.2 5.9 2.3 virginica
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
str(iris)
## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
summary(iris[1:4])
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
sapply(iris[1:4], sd)
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 0.8280661 0.4358663 1.7652982 0.7622377
part b
boxplot(iris[1:4], col = rainbow(4))
part c
tapply(X = Petal.Length , INDEX = Species , FUN = median)
## setosa versicolor virginica
## 1.50 4.35 5.55
d1 = subset(x = iris , Species=="setosa")
d2 = subset(x = iris , Species=="versicolor")
d3 = subset(x = iris , Species=="virginica")
d = data.frame(type = c(rep("setosa" , 50) , rep("versicolor" , 50) , rep("virginica", 50)),
value = c(d1$Petal.Length , d2$Petal.Length , d3$Petal.Length))
d %>%
ggplot( aes(x=value, fill=type)) +
geom_histogram( color="#e9ecef", alpha=0.6, position = 'identity') +
labs(fill="")
part d
c = cor(iris[1:4])
corrplot.mixed(corr = c , lower.col = "black")
part e
cor(Sepal.Length , Petal.Length )
## [1] 0.8717538
ggplot(data = iris , mapping = aes(x = Sepal.Length , y = Petal.Length , color = Species)) + geom_point()
part f
data = mutate(iris, Sepal.Length2 = if_else(Sepal.Length<=5.1 , "low",
if_else(((Sepal.Length > 5.1 ) & (Sepal.Length<=5.8)) , "a little low",
if_else(((Sepal.Length > 5.8 ) & (Sepal.Length<=6.4)) , "a little high" , "high" ))))
data = mutate(data, Sepal.Width2 = if_else(Sepal.Width<=2.8 , "low",
if_else(((Sepal.Width > 2.8 ) & (Sepal.Width<=3)) , "a little low",
if_else(((Sepal.Width > 3 ) & (Sepal.Width<=3.3)) , "a little high" , "high" ))))
data = mutate(data, Petal.Length2 = if_else(Petal.Length<=1.6 , "low",
if_else(((Petal.Length > 1.6 ) & (Petal.Length<=4.35)) , "a little low",
if_else(((Petal.Length > 4.35 ) & (Petal.Length<=5.1)) , "a little high" , "high" ))))
data = mutate(data, Petal.Width2 = if_else(Petal.Width<=0.3 , "low",
if_else(((Petal.Width > 0.3 ) & (Petal.Width<=1.3)) , "a little low",
if_else(((Petal.Width > 1.3 ) & (Petal.Width<=1.8)) , "a little high" , "high" ))))
attach(data)
part g
a1 = xtabs(~Sepal.Length2 + Petal.Length2)
a1
## Petal.Length2
## Sepal.Length2 a little high a little low high low
## a little high 20 5 10 0
## a little low 9 19 0 11
## high 11 0 24 0
## low 1 7 0 33
a2 = xtabs(~Sepal.Length2)
a2
## Sepal.Length2
## a little high a little low high low
## 35 39 35 41
a3 = xtabs(~Petal.Length2)
a3
## Petal.Length2
## a little high a little low high low
## 41 31 34 44
part h
barplot(a1 , beside = TRUE , col = c("sky blue" , "pink" , "gray" , "green"), ylim = c(0,50))
barplot(a2 , beside = TRUE , col = c("sky blue" , "pink" , "gray" , "green") , xlab = "sepal length" , ylab = "frequency" , ylim = c(0,50))
barplot(a3 , beside = TRUE , col = c("sky blue" , "pink" , "gray" , "green") , xlab = "petal length" , ylab = "frequency" , ylim = c(0,50))