Use the dataset iris to answer the following: 1)Create two subset(df1 and df2) of iris so that df1 contains flowers with above average Sepal.Length. df2 contains the flowers which are not in df1.
df1 <- subset(iris,Sepal.Length>=mean(Sepal.Length))
df2 <- subset(iris,Sepal.Length<mean(Sepal.Length))
head(df1)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 51 7.0 3.2 4.7 1.4 versicolor
## 52 6.4 3.2 4.5 1.5 versicolor
## 53 6.9 3.1 4.9 1.5 versicolor
## 55 6.5 2.8 4.6 1.5 versicolor
## 57 6.3 3.3 4.7 1.6 versicolor
## 59 6.6 2.9 4.6 1.3 versicolor
head(df2)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
2)For each Species,create a plot to show the relationship between Sepal.Length and Petal.Length. What is your conclusion?
# pch=1,setosa(circle); pch=2, ersicolor(triangle); pch=3,virginica (plus)
plot(iris$Sepal.Length,iris$Petal.Length,pch=as.numeric(iris$Species),xlab="Sepal.Length",ylab="Petal.Length")
Conclusion: Sepal.Length does seem to have a positive correlation with the Petal.Length
3)For each Species,create a plot to show the relationship between Sepal.Width and Petal.Width.What is your conclusion?
plot(iris$Sepal.Width,iris$Petal.Width,pch=as.numeric(iris$Species),xlab="Sepal.Width",ylab="Petal.Width")
Conclusion:Sepal.Width does seem to have a positive correlation with the Petal.Width
4)Using the iris dataset, explain the use of these functions: cbind,rbind, and merge.
# cbind
df3 <- subset(iris,select = c("Sepal.Length","Sepal.Width"))
df4 <- subset(iris,select = c("Petal.Length","Petal.Width"))
df5 <- subset(iris,select = "Species")
cbind_df <- cbind(df3,df4,df5);head(cbind_df)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
# rbind
rbind_df <- rbind(df1,df2);head(rbind_df)
## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 51 7.0 3.2 4.7 1.4 versicolor
## 52 6.4 3.2 4.5 1.5 versicolor
## 53 6.9 3.1 4.9 1.5 versicolor
## 55 6.5 2.8 4.6 1.5 versicolor
## 57 6.3 3.3 4.7 1.6 versicolor
## 59 6.6 2.9 4.6 1.3 versicolor
# merge
iris1 <- iris
iris1$id <- seq(1,nrow(iris1),1);
df6 <- subset(iris1,select = c("Sepal.Length","Sepal.Width","id"))
df7 <- subset(iris1,select = c("Petal.Length","Petal.Width","Species","id"))
merge_df <- merge(x=df6,y=df7,by = "id" ,all=TRUE);head(merge_df)
## id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 1 5.1 3.5 1.4 0.2 setosa
## 2 2 4.9 3.0 1.4 0.2 setosa
## 3 3 4.7 3.2 1.3 0.2 setosa
## 4 4 4.6 3.1 1.5 0.2 setosa
## 5 5 5.0 3.6 1.4 0.2 setosa
## 6 6 5.4 3.9 1.7 0.4 setosa
5)Explain the use of apply,lapply,tapply and aggregate functions using iris dataset.Use the help available in RStudio to answer this question.
#apply
iris_apply <- apply(iris[,1:4],2,mean)
iris_apply
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 5.843333 3.057333 3.758000 1.199333
#lapply
iris_lapply <- lapply(iris[,-5], sum)
iris_lapply
## $Sepal.Length
## [1] 876.5
##
## $Sepal.Width
## [1] 458.6
##
## $Petal.Length
## [1] 563.7
##
## $Petal.Width
## [1] 179.9
#tapply
iris_tapply <- tapply(iris$Petal.Length,iris$Species,mean)
iris_tapply
## setosa versicolor virginica
## 1.462 4.260 5.552
#aggregate
iris_aggregate <- aggregate(iris[, c("Sepal.Length","Sepal.Width")], list(Species = iris$Species), mean)
iris_aggregate
## Species Sepal.Length Sepal.Width
## 1 setosa 5.006 3.428
## 2 versicolor 5.936 2.770
## 3 virginica 6.588 2.974