set.seed(123) # để kết quả lặp lại được
n <- 15
p <- 0.4
x <- rbinom(300, size = n, prob = p)
lambda <- n * p
hist(x, probability = TRUE,
col = "lightblue",
main = "Histogram Binomial vs Poisson",
xlab = "Giá trị")
k <- 0:15
points(k, dpois(k, lambda),
col = "red",
pch = 19)
lines(k, dpois(k, lambda),
col = "red",
lwd = 2)

import pandas as pd
import matplotlib.pyplot as plt
# đọc dataset iris từ github (dùng pandas)
df = pd.read_csv("https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv")
print(df.head())
## sepal_length sepal_width petal_length petal_width species
## 0 5.1 3.5 1.4 0.2 setosa
## 1 4.9 3.0 1.4 0.2 setosa
## 2 4.7 3.2 1.3 0.2 setosa
## 3 4.6 3.1 1.5 0.2 setosa
## 4 5.0 3.6 1.4 0.2 setosa
# tính trung bình theo species
mean_by_species = df.groupby("species").mean()
print(mean_by_species)
## sepal_length sepal_width petal_length petal_width
## species
## setosa 5.006 3.428 1.462 0.246
## versicolor 5.936 2.770 4.260 1.326
## virginica 6.588 2.974 5.552 2.026
# scatter Sepal.Length vs Petal.Length
plt.figure()
plt.scatter(df["sepal_length"], df["petal_length"])
plt.xlabel("Sepal Length")
plt.ylabel("Petal Length")
plt.title("Scatter Plot: Sepal vs Petal Length")
plt.show()

# scatter có màu theo loài
plt.figure()
for sp in df["species"].unique():
subset = df[df["species"] == sp]
plt.scatter(subset["sepal_length"], subset["petal_length"], label=sp)
plt.legend()
plt.xlabel("Sepal Length")
plt.ylabel("Petal Length")
plt.title("Scatter by Species")
plt.show()
