KiemTraKyNang1

BÀI 1: 1. Tính trung bình, độ lệch chuẩn, giá trị lớn nhất của Petal.Length theo Species

library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

summary_petal_length <- iris %>%
  group_by(Species) %>%
  summarise(
    mean_Petal_Length = mean(Petal.Length),
    sd_Petal_Length   = sd(Petal.Length),
    max_Petal_Length  = max(Petal.Length)
  )

print(summary_petal_length)

## # A tibble: 3 × 4
##   Species    mean_Petal_Length sd_Petal_Length max_Petal_Length
##   <fct>                  <dbl>           <dbl>            <dbl>
## 1 setosa                  1.46           0.174              1.9
## 2 versicolor              4.26           0.470              5.1
## 3 virginica               5.55           0.552              6.9

Vẽ biểu đồ violin của Sepal.Width theo Species (ggplot2)

library(ggplot2)

ggplot(iris, aes(x = Species, y = Sepal.Width, fill = Species)) +
  geom_violin(trim = FALSE) +
  theme_minimal() +
  labs(
    title = "Violin plot của Sepal.Width theo Species",
    x = "Species",
    y = "Sepal.Width"
  )

BÀI 2:

Sử dụng Pandas:

Đọc file iris.csv.
Tính trung bình của tất cả các cột (trừ Species) và xuất kết quả ra file mean_values.csv.
Lọc các dòng có Petal.Width nhỏ hơn giá trị trung bình của cột này. Xuất kết quả ra file filtered_petal_width.csv.

write.csv(iris, "iris.csv", row.names = FALSE)

library(reticulate)

use_python(
  "C:/Users/andan/AppData/Local/Programs/Python/Python39/python.exe",
  required = TRUE
)

# kiểm tra (tùy chọn)
py_config()

## python:         C:/Users/andan/AppData/Local/Programs/Python/Python39/python.exe
## libpython:      C:/Users/andan/AppData/Local/Programs/Python/Python39/python39.dll
## pythonhome:     C:/Users/andan/AppData/Local/Programs/Python/Python39
## version:        3.9.12 (tags/v3.9.12:b28265d, Mar 23 2022, 23:52:46) [MSC v.1929 64 bit (AMD64)]
## Architecture:   64bit
## numpy:          C:/Users/andan/AppData/Local/Programs/Python/Python39/Lib/site-packages/numpy
## numpy_version:  1.26.4
## 
## NOTE: Python version was forced by use_python() function

import pandas as pd

df = pd.read_csv("iris.csv")

# Dung select_dtypes de tu dong lay cot so, tranh loi drop
df_num = df.select_dtypes(include=['number'])
mean_values = df_num.mean()
mean_values.to_csv("mean_values.csv", header=["mean"])

pw_mean = df["Petal.Width"].mean()
filtered_df = df[df["Petal.Width"] < pw_mean]
filtered_df.to_csv("filtered_petal_width.csv", index=False)