Kiemtra1

# Câu 1 (R)
# Tính min, max, mean của Petal.Length và vẽ cột (ggplot2)

# Nếu chưa có, cài gói

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(ggplot2)

# Dùng dataset iris có sẵn
data(iris)

# Tính min, max, mean của Petal.Length
summary_petal_length <- iris %>%
  summarise(
    min_petal_length = min(Petal.Length, na.rm = TRUE),
    max_petal_length = max(Petal.Length, na.rm = TRUE),
    mean_petal_length = mean(Petal.Length, na.rm = TRUE)
  )

print(summary_petal_length)

##   min_petal_length max_petal_length mean_petal_length
## 1                1              6.9             3.758

# Vẽ biểu đồ cột: mean Petal.Width theo từng Species
mean_by_species <- iris %>%
  group_by(Species) %>%
  summarise(mean_petal_width = mean(Petal.Width, na.rm = TRUE))

# Vẽ
ggplot(mean_by_species, aes(x = Species, y = mean_petal_width)) +
  geom_col() +
  labs(
    title = "Mean Petal.Width theo Species",
    x = "Species",
    y = "Mean Petal.Width"
  ) +
  theme_minimal()

# Câu 2 (Python) — chunk Python trong RStudio
import sys

import importlib

def try_import(name):
    try:
        return importlib.import_module(name)
    except Exception:
        return None

pd = try_import("pandas")
sklearn = try_import("sklearn")
seaborn = try_import("seaborn")

if pd is None:
    raise ImportError("Module 'pandas' chưa cài. Trong RStudio, chạy: library(reticulate); py_install('pandas') hoặc cài Miniconda bằng install_miniconda().")

# Thử đọc iris.csv; nếu không có, thử load từ sklearn hoặc seaborn
import os

if os.path.exists("iris.csv"):
    df = pd.read_csv("iris.csv")
else:
    # thử sklearn
    if sklearn is not None:
        from sklearn.datasets import load_iris
        iris = load_iris()
        df = pd.DataFrame(
            iris.data,
            columns=["Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width"]
        )
        # nếu có target names, thêm cột Species
        try:
            df["Species"] = [iris.target_names[i] for i in iris.target]
        except Exception:
            pass
        # (tùy muốn, lưu ra file)
        df.to_csv("iris.csv", index=False)
        print("Không tìm thấy iris.csv → đã tạo từ scikit-learn và lưu thành iris.csv")
    elif seaborn is not None:
        df = seaborn.load_dataset("iris")
        # seaborn tên cột khác, chuẩn hóa tên cho giống đề bài
        df = df.rename(columns={"sepal_length":"Sepal.Length","sepal_width":"Sepal.Width",
                                "petal_length":"Petal.Length","petal_width":"Petal.Width","species":"Species"})
        df.to_csv("iris.csv", index=False)
        print("Không tìm thấy iris.csv → đã tạo từ seaborn và lưu thành iris.csv")
    else:
        raise RuntimeError(
            "Không tìm thấy iris.csv và cả scikit-learn lẫn seaborn đều chưa cài. "
            "Hãy cung cấp file iris.csv trong working directory hoặc cài pandas + scikit-learn/seaborn."
        )

# Lọc Sepal.Length > 6.0
df_loc = df[df["Sepal.Length"] > 6.0]

# Tính trung bình Petal.Width trên dữ liệu đã lọc
mean_petal_width = df_loc["Petal.Width"].mean()

print("Số dòng sau khi lọc:", len(df_loc))

## Số dòng sau khi lọc: 61

print("Giá trị trung bình của Petal.Width trong dữ liệu đã lọc:", mean_petal_width)

## Giá trị trung bình của Petal.Width trong dữ liệu đã lọc: 1.8475409836065577

Kiemtra1

Le_Anh_Minh

2026-01-08