TH1_Ktra

library(ggplot2)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

#library(knitr) 

# Load dữ liệu
data(iris)

# --- Ý 1: Tính toán và in bảng đẹp ---
thong_ke <- iris %>%
  group_by(Species) %>%
  summarise(
    Trung_Binh = mean(Petal.Length),
    Do_Lech_Chuan = sd(Petal.Length),
    Max = max(Petal.Length)
  )

# --- Ý 2: Vẽ biểu đồ Violin nâng cao ---
bieu_do <- ggplot(iris, aes(x = Species, y = Sepal.Width, fill = Species)) +
  # Vẽ violin với độ mờ 0.7 cho đẹp
  geom_violin(trim = FALSE, alpha = 0.7) + 
  # Vẽ thêm boxplot nhỏ bên trong để thấy rõ trung vị
  geom_boxplot(width = 0.1, fill = "white", color = "black") +
  # Trang trí tiêu đề và nhãn trục
  labs(title = "Biểu đồ phân phối độ rộng đài hoa (Sepal.Width)",
       subtitle = "So sánh giữa 3 loài hoa: Setosa, Versicolor, Virginica",
       x = "Loài hoa (Species)",
       y = "Độ rộng đài hoa (cm)") +
  # Dùng tông màu đẹp mắt (Set2)
  scale_fill_brewer(palette = "Set2") +
  # Giao diện tối giản
  theme_minimal() +
  theme(legend.position = "none") # Ẩn chú thích màu vì đã có tên trục x

print(bieu_do)


``` r
data(iris)
write.csv(iris, "iris.csv", row.names = FALSE)

import pandas as pd

# --- Ý 1: Đọc file iris.csv ---
try:
    df = pd.read_csv("iris.csv")
    print("1. Đã đọc file dữ liệu thành công.")
    # In ra kích thước dữ liệu để kiểm tra
    print(f"   Kích thước dữ liệu: {df.shape[0]} dòng, {df.shape[1]} cột")
except FileNotFoundError:
    print("Lỗi: Không tìm thấy file iris.csv")

## 1. Đã đọc file dữ liệu thành công.
##    Kích thước dữ liệu: 150 dòng, 5 cột

# --- Ý 2: Tính trung bình và xuất file ---
print("-" * 30)

## ------------------------------

# Loại bỏ cột 'Species' (vì là chữ không tính toán được)
# axis=1 nghĩa là thao tác trên cột
df_numeric = df.drop('Species', axis=1)

# Tính trung bình
mean_values = df_numeric.mean()

print("2. Giá trị trung bình các cột:")

## 2. Giá trị trung bình các cột:

print(mean_values)

## Sepal.Length    5.843333
## Sepal.Width     3.057333
## Petal.Length    3.758000
## Petal.Width     1.199333
## dtype: float64

# Xuất kết quả ra file mean_values.csv
mean_values.to_csv("mean_values.csv", header=["Gia_tri_trung_binh"])
print("-> Đã xuất kết quả ra file 'mean_values.csv'")

## -> Đã xuất kết quả ra file 'mean_values.csv'

# --- Ý 3: Lọc dữ liệu và xuất file ---
print("-" * 30)

## ------------------------------

# Tính trung bình riêng của cột Petal.Width
mean_pw = df['Petal.Width'].mean()
print(f"3. Trung bình cột Petal.Width là: {mean_pw:.4f}")

## 3. Trung bình cột Petal.Width là: 1.1993

# Lọc các dòng nhỏ hơn giá trị trung bình này
filtered_df = df[df['Petal.Width'] < mean_pw]

print(f"   Số dòng thỏa mãn điều kiện (< {mean_pw:.2f}): {len(filtered_df)} dòng")

##    Số dòng thỏa mãn điều kiện (< 1.20): 60 dòng

# Xuất kết quả ra file filtered_petal_width.csv
filtered_df.to_csv("filtered_petal_width.csv", index=False)
print("-> Đã xuất kết quả ra file 'filtered_petal_width.csv'")

## -> Đã xuất kết quả ra file 'filtered_petal_width.csv'

TH1_Ktra

nguyen_van_minh

2026-01-08