data(iris) # Doc du lieu iris co san trong R
quartiles <- quantile(iris$Sepal.Length)# Tinh cac tu phan vi cua Sepal.Length
quartiles# In ket qua
## 0% 25% 50% 75% 100%
## 4.3 5.1 5.8 6.4 7.9
library(ggplot2)# Nap thu vien ggplot2
# Ve boxplot
ggplot(iris, aes(x = Species, y = Petal.Length, fill = Species)) +
geom_boxplot() +
labs(
title = "Boxplot Petal.Length theo tung nhom Species",
x = "Species",
y = "Petal.Length"
) +
theme_minimal()
#Câu2
import pandas as pd
# 1. Doc file iris.csv
# Gia su file iris.csv nam cung thu muc voi file Rmd
try:
df = pd.read_csv("iris.csv")
print("Da doc file iris.csv thanh cong.")
except FileNotFoundError:
print("Khong tim thay file iris.csv. Su dung du lieu mau de demo.")
# Tao du lieu mau
data = {
"Species": [
"Setosa", "Setosa",
"Versicolor", "Versicolor",
"Virginica", "Virginica"
],
"Sepal.Width": [3.5, 3.0, 2.7, 2.9, 3.1, 3.0]
}
df = pd.DataFrame(data)
## Da doc file iris.csv thanh cong.
# 2. Tinh trung binh, lon nhat, nho nhat cua Sepal.Width theo Species
result = (
df.groupby("Species")["Sepal.Width"]
.agg(["mean", "max", "min"])
)
# 3. Doi ten cot cho de hieu
result = result.rename(columns={
"mean": "Trung_binh",
"max": "Lon_nhat",
"min": "Nho_nhat"
})
print("\nKet qua thong ke:")
##
## Ket qua thong ke:
print(result)
## Trung_binh Lon_nhat Nho_nhat
## Species
## setosa 3.428 4.4 2.3
## versicolor 2.770 3.4 2.0
## virginica 2.974 3.8 2.2
# 4. Xuat ket qua ra file CSV
result.to_csv("species_summary.csv")
print("\nDa xuat file species_summary.csv thanh cong.")
##
## Da xuat file species_summary.csv thanh cong.