set.seed(123)   # để kết quả lặp lại được
n <- 15
p <- 0.4
x <- rbinom(300, size = n, prob = p)
lambda <- n * p
hist(x, probability = TRUE,
     col = "lightblue",
     main = "Histogram Binomial vs Poisson",
     xlab = "Giá trị")

k <- 0:15
points(k, dpois(k, lambda),
       col = "red",
       pch = 19)

lines(k, dpois(k, lambda),
      col = "red",
      lwd = 2)

import pandas as pd
import matplotlib.pyplot as plt

# đọc dataset iris từ github (dùng pandas)
df = pd.read_csv("https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv")
print(df.head())
##    sepal_length  sepal_width  petal_length  petal_width species
## 0           5.1          3.5           1.4          0.2  setosa
## 1           4.9          3.0           1.4          0.2  setosa
## 2           4.7          3.2           1.3          0.2  setosa
## 3           4.6          3.1           1.5          0.2  setosa
## 4           5.0          3.6           1.4          0.2  setosa
# tính trung bình theo species
mean_by_species = df.groupby("species").mean()
print(mean_by_species)
##             sepal_length  sepal_width  petal_length  petal_width
## species                                                         
## setosa             5.006        3.428         1.462        0.246
## versicolor         5.936        2.770         4.260        1.326
## virginica          6.588        2.974         5.552        2.026
# scatter Sepal.Length vs Petal.Length
plt.figure()
plt.scatter(df["sepal_length"], df["petal_length"])
plt.xlabel("Sepal Length")
plt.ylabel("Petal Length")
plt.title("Scatter Plot: Sepal vs Petal Length")
plt.show()

# scatter có màu theo loài
plt.figure()
for sp in df["species"].unique():
    subset = df[df["species"] == sp]
    plt.scatter(subset["sepal_length"], subset["petal_length"], label=sp)
plt.legend()
plt.xlabel("Sepal Length")
plt.ylabel("Petal Length")
plt.title("Scatter by Species")
plt.show()