library(reticulate)
import pandas as pd
# Doc du lieu tu file iris.csv
df = pd.read_csv("iris.csv")
# loc cac mau co sepal.length > 5
df_filtered = df[df["sepal.length"] > 5.0]
# luu file moi
df_filtered.to_csv("iris_length5.csv", index=False)
result = df.groupby("Species")["sepal.length"].agg(
Tong="sum",
TrungBinh="mean",
SoLuong="count"
)
print(df_filtered)
## sepal.length sepal.width petal.length petal.width Species
## 0 5.1 3.5 1.4 0.2 Setosa
## 5 5.4 3.9 1.7 0.4 Setosa
## 10 5.4 3.7 1.5 0.2 Setosa
## 14 5.8 4.0 1.2 0.2 Setosa
## 15 5.7 4.4 1.5 0.4 Setosa
## .. ... ... ... ... ...
## 145 6.7 3.0 5.2 2.3 Virginica
## 146 6.3 2.5 5.0 1.9 Virginica
## 147 6.5 3.0 5.2 2.0 Virginica
## 148 6.2 3.4 5.4 2.3 Virginica
## 149 5.9 3.0 5.1 1.8 Virginica
##
## [118 rows x 5 columns]
print(result)
## Tong TrungBinh SoLuong
## Species
## Setosa 250.3 5.006 50
## Versicolor 296.8 5.936 50
## Virginica 329.4 6.588 50