Câu 1: Dùng R:

#1.1
# Lấy dữ liệu
mydata <- iris
num_data <- mydata[, -5]
head(num_data, 25)
##    Sepal.Length Sepal.Width Petal.Length Petal.Width
## 1           5.1         3.5          1.4         0.2
## 2           4.9         3.0          1.4         0.2
## 3           4.7         3.2          1.3         0.2
## 4           4.6         3.1          1.5         0.2
## 5           5.0         3.6          1.4         0.2
## 6           5.4         3.9          1.7         0.4
## 7           4.6         3.4          1.4         0.3
## 8           5.0         3.4          1.5         0.2
## 9           4.4         2.9          1.4         0.2
## 10          4.9         3.1          1.5         0.1
## 11          5.4         3.7          1.5         0.2
## 12          4.8         3.4          1.6         0.2
## 13          4.8         3.0          1.4         0.1
## 14          4.3         3.0          1.1         0.1
## 15          5.8         4.0          1.2         0.2
## 16          5.7         4.4          1.5         0.4
## 17          5.4         3.9          1.3         0.4
## 18          5.1         3.5          1.4         0.3
## 19          5.7         3.8          1.7         0.3
## 20          5.1         3.8          1.5         0.3
## 21          5.4         3.4          1.7         0.2
## 22          5.1         3.7          1.5         0.4
## 23          4.6         3.6          1.0         0.2
## 24          5.1         3.3          1.7         0.5
## 25          4.8         3.4          1.9         0.2
tail(num_data, 10)
##     Sepal.Length Sepal.Width Petal.Length Petal.Width
## 141          6.7         3.1          5.6         2.4
## 142          6.9         3.1          5.1         2.3
## 143          5.8         2.7          5.1         1.9
## 144          6.8         3.2          5.9         2.3
## 145          6.7         3.3          5.7         2.5
## 146          6.7         3.0          5.2         2.3
## 147          6.3         2.5          5.0         1.9
## 148          6.5         3.0          5.2         2.0
## 149          6.2         3.4          5.4         2.3
## 150          5.9         3.0          5.1         1.8
# Tính trung bình
mean_val <- sapply(num_data, mean)
mean_val
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##     5.843333     3.057333     3.758000     1.199333
# Tính phương sai
var_val <- sapply(num_data, var)
var_val
## Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
##    0.6856935    0.1899794    3.1162779    0.5810063
#1.2.Vẽ biểu đồ
library(ggplot2)

ggplot(iris, aes(x = Sepal.Length,
                 y = Sepal.Width,
                 color = Species)) +
  scale_color_manual(values = c("firebrick", "goldenrod2", "royalblue")) +
  geom_point(size = 2) +
  labs(title = "SCATTER PLOT Sepal.Length WITH Sepal.Width",
       x = "Sepal Length",
       y = "Sepal Width") +
  theme_minimal()

CÂU 2: Dùng Pandas:

-Đọc file iris.csv

-Lọc các dòng có Petal.Length lớn hơn 4.0

-Tính tổng của cột Sepal.Length cho dữ liệu đã lọc

library(reticulate)
py_require("pandas")
import pandas as pd

data = pd.read_csv("iris.csv")
print(data)
##       Id  SepalLengthCm  ...  PetalWidthCm         Species
## 0      1            5.1  ...           0.2     Iris-setosa
## 1      2            4.9  ...           0.2     Iris-setosa
## 2      3            4.7  ...           0.2     Iris-setosa
## 3      4            4.6  ...           0.2     Iris-setosa
## 4      5            5.0  ...           0.2     Iris-setosa
## ..   ...            ...  ...           ...             ...
## 145  146            6.7  ...           2.3  Iris-virginica
## 146  147            6.3  ...           1.9  Iris-virginica
## 147  148            6.5  ...           2.0  Iris-virginica
## 148  149            6.2  ...           2.3  Iris-virginica
## 149  150            5.9  ...           1.8  Iris-virginica
## 
## [150 rows x 6 columns]
df_filtered = data[data["PetalLengthCm"] > 4.0]
print(df_filtered)
##       Id  SepalLengthCm  ...  PetalWidthCm          Species
## 50    51            7.0  ...           1.4  Iris-versicolor
## 51    52            6.4  ...           1.5  Iris-versicolor
## 52    53            6.9  ...           1.5  Iris-versicolor
## 54    55            6.5  ...           1.5  Iris-versicolor
## 55    56            5.7  ...           1.3  Iris-versicolor
## ..   ...            ...  ...           ...              ...
## 145  146            6.7  ...           2.3   Iris-virginica
## 146  147            6.3  ...           1.9   Iris-virginica
## 147  148            6.5  ...           2.0   Iris-virginica
## 148  149            6.2  ...           2.3   Iris-virginica
## 149  150            5.9  ...           1.8   Iris-virginica
## 
## [84 rows x 6 columns]
total_sepal_length = df_filtered["SepalLengthCm"].sum()
print(total_sepal_length)
## 538.4