Homework1

Use the dataset iris to answer the following: 1)Create two subset(df1 and df2) of iris so that df1 contains flowers with above average Sepal.Length. df2 contains the flowers which are not in df1.

df1 <- subset(iris,Sepal.Length>=mean(Sepal.Length))
df2 <- subset(iris,Sepal.Length<mean(Sepal.Length))
head(df1)

##    Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 51          7.0         3.2          4.7         1.4 versicolor
## 52          6.4         3.2          4.5         1.5 versicolor
## 53          6.9         3.1          4.9         1.5 versicolor
## 55          6.5         2.8          4.6         1.5 versicolor
## 57          6.3         3.3          4.7         1.6 versicolor
## 59          6.6         2.9          4.6         1.3 versicolor

head(df2)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

2)For each Species,create a plot to show the relationship between Sepal.Length and Petal.Length. What is your conclusion?

# pch=1,setosa(circle); pch=2, ersicolor(triangle); pch=3,virginica (plus)
plot(iris$Sepal.Length,iris$Petal.Length,pch=as.numeric(iris$Species),xlab="Sepal.Length",ylab="Petal.Length")

Conclusion: Sepal.Length does seem to have a positive correlation with the Petal.Length

3)For each Species,create a plot to show the relationship between Sepal.Width and Petal.Width.What is your conclusion?

plot(iris$Sepal.Width,iris$Petal.Width,pch=as.numeric(iris$Species),xlab="Sepal.Width",ylab="Petal.Width")

Conclusion:Sepal.Width does seem to have a positive correlation with the Petal.Width

4)Using the iris dataset, explain the use of these functions: cbind,rbind, and merge.

# cbind
df3 <- subset(iris,select = c("Sepal.Length","Sepal.Width"))
df4 <- subset(iris,select = c("Petal.Length","Petal.Width"))
df5 <- subset(iris,select = "Species")
cbind_df <- cbind(df3,df4,df5);head(cbind_df)

##   Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1          5.1         3.5          1.4         0.2  setosa
## 2          4.9         3.0          1.4         0.2  setosa
## 3          4.7         3.2          1.3         0.2  setosa
## 4          4.6         3.1          1.5         0.2  setosa
## 5          5.0         3.6          1.4         0.2  setosa
## 6          5.4         3.9          1.7         0.4  setosa

# rbind
rbind_df <- rbind(df1,df2);head(rbind_df)

##    Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 51          7.0         3.2          4.7         1.4 versicolor
## 52          6.4         3.2          4.5         1.5 versicolor
## 53          6.9         3.1          4.9         1.5 versicolor
## 55          6.5         2.8          4.6         1.5 versicolor
## 57          6.3         3.3          4.7         1.6 versicolor
## 59          6.6         2.9          4.6         1.3 versicolor

# merge
iris1 <- iris
iris1$id <- seq(1,nrow(iris1),1);
df6 <-  subset(iris1,select = c("Sepal.Length","Sepal.Width","id"))
df7 <-  subset(iris1,select = c("Petal.Length","Petal.Width","Species","id"))
merge_df <- merge(x=df6,y=df7,by = "id" ,all=TRUE);head(merge_df)

##   id Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1  1          5.1         3.5          1.4         0.2  setosa
## 2  2          4.9         3.0          1.4         0.2  setosa
## 3  3          4.7         3.2          1.3         0.2  setosa
## 4  4          4.6         3.1          1.5         0.2  setosa
## 5  5          5.0         3.6          1.4         0.2  setosa
## 6  6          5.4         3.9          1.7         0.4  setosa

5)Explain the use of apply,lapply,tapply and aggregate functions using iris dataset.Use the help available in RStudio to answer this question.

#apply
iris_apply <- apply(iris[,1:4],2,mean)
iris_apply

## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##     5.843333     3.057333     3.758000     1.199333

#lapply
iris_lapply <- lapply(iris[,-5], sum)
iris_lapply

## $Sepal.Length
## [1] 876.5
## 
## $Sepal.Width
## [1] 458.6
## 
## $Petal.Length
## [1] 563.7
## 
## $Petal.Width
## [1] 179.9

#tapply
iris_tapply <- tapply(iris$Petal.Length,iris$Species,mean)
iris_tapply

##     setosa versicolor  virginica 
##      1.462      4.260      5.552

#aggregate
iris_aggregate <- aggregate(iris[, c("Sepal.Length","Sepal.Width")], list(Species = iris$Species), mean)
iris_aggregate

##      Species Sepal.Length Sepal.Width
## 1     setosa        5.006       3.428
## 2 versicolor        5.936       2.770
## 3  virginica        6.588       2.974

Homework1

Guang Cao

2023-07-06