BÀI 1: 1. Tính trung bình, độ lệch chuẩn, giá trị lớn nhất của Petal.Length theo Species
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
summary_petal_length <- iris %>%
group_by(Species) %>%
summarise(
mean_Petal_Length = mean(Petal.Length),
sd_Petal_Length = sd(Petal.Length),
max_Petal_Length = max(Petal.Length)
)
print(summary_petal_length)
## # A tibble: 3 × 4
## Species mean_Petal_Length sd_Petal_Length max_Petal_Length
## <fct> <dbl> <dbl> <dbl>
## 1 setosa 1.46 0.174 1.9
## 2 versicolor 4.26 0.470 5.1
## 3 virginica 5.55 0.552 6.9
library(ggplot2)
ggplot(iris, aes(x = Species, y = Sepal.Width, fill = Species)) +
geom_violin(trim = FALSE) +
theme_minimal() +
labs(
title = "Violin plot của Sepal.Width theo Species",
x = "Species",
y = "Sepal.Width"
)
BÀI 2:
Sử dụng Pandas:
write.csv(iris, "iris.csv", row.names = FALSE)
library(reticulate)
use_python(
"C:/Users/andan/AppData/Local/Programs/Python/Python39/python.exe",
required = TRUE
)
# kiểm tra (tùy chọn)
py_config()
## python: C:/Users/andan/AppData/Local/Programs/Python/Python39/python.exe
## libpython: C:/Users/andan/AppData/Local/Programs/Python/Python39/python39.dll
## pythonhome: C:/Users/andan/AppData/Local/Programs/Python/Python39
## version: 3.9.12 (tags/v3.9.12:b28265d, Mar 23 2022, 23:52:46) [MSC v.1929 64 bit (AMD64)]
## Architecture: 64bit
## numpy: C:/Users/andan/AppData/Local/Programs/Python/Python39/Lib/site-packages/numpy
## numpy_version: 1.26.4
##
## NOTE: Python version was forced by use_python() function
import pandas as pd
df = pd.read_csv("iris.csv")
# Dung select_dtypes de tu dong lay cot so, tranh loi drop
df_num = df.select_dtypes(include=['number'])
mean_values = df_num.mean()
mean_values.to_csv("mean_values.csv", header=["mean"])
pw_mean = df["Petal.Width"].mean()
filtered_df = df[df["Petal.Width"] < pw_mean]
filtered_df.to_csv("filtered_petal_width.csv", index=False)