bài 1

data(iris)
result <- aggregate(
Sepal.Width ~ Species,
data = iris,
FUN = function(x) c(
Min = min(x),
Max = max(x),
SD = sd(x)
)
)
result

##      Species Sepal.Width.Min Sepal.Width.Max Sepal.Width.SD
## 1     setosa       2.3000000       4.4000000      0.3790644
## 2 versicolor       2.0000000       3.4000000      0.3137983
## 3  virginica       2.2000000       3.8000000      0.3224966

library(ggplot2)
ggplot(data = iris, aes(x = Species, y = Sepal.Length)) +
geom_boxplot(fill = "lightpink") +
labs(
title = "Boxplot Sepal.Length theo Species",
x = "Species",
y = "Sepal.Length"
) +
theme_minimal()

import pandas as pd

# Đọc trực tiếp iris.csv từ GitHub (link raw, có header)
url = "https://raw.githubusercontent.com/uiuc-cse/data-fa14/gh-pages/data/iris.csv"

df = pd.read_csv(url)

# In ra tên các cột để kiểm tra
print("Tên các cột:", df.columns.tolist())

## Tên các cột: ['sepal_length', 'sepal_width', 'petal_length', 'petal_width', 'species']

# Lọc các hàng có petal_length > 4.0
filtered_df = df[df['petal_length'] > 4.0]

# Đếm số lượng
count = len(filtered_df)
print(f"Số lượng bản ghi có petal_length lớn hơn 4.0: {count}")  # Kết quả mong đợi: 83

## Số lượng bản ghi có petal_length lớn hơn 4.0: 84

# Xuất ra file (nếu bạn muốn lưu về máy)
filtered_df.to_csv('count_filtered_petal_length.csv', index=False)
print("Đã xuất file count_filtered_petal_length.csv thành công!")

## Đã xuất file count_filtered_petal_length.csv thành công!

bài 1

2026-01-08