data1 <- read.csv("doanh_thu_ban_le.csv", stringsAsFactors = FALSE)
dim(data1)
## [1] 108   4
names(data1)
## [1] "thang"     "san_pham"  "khu_vuc"   "doanh_thu"
str(data1)
## 'data.frame':    108 obs. of  4 variables:
##  $ thang    : int  1 1 1 1 1 1 1 1 1 2 ...
##  $ san_pham : chr  "A" "A" "A" "B" ...
##  $ khu_vuc  : chr  "Bac" "Trung" "Nam" "Bac" ...
##  $ doanh_thu: int  170 170 170 220 220 220 270 270 270 190 ...
summary(data1)
##      thang         san_pham           khu_vuc            doanh_thu  
##  Min.   : 1.00   Length:108         Length:108         Min.   :170  
##  1st Qu.: 3.75   Class :character   Class :character   1st Qu.:270  
##  Median : 6.50   Mode  :character   Mode  :character   Median :330  
##  Mean   : 6.50                                         Mean   :330  
##  3rd Qu.: 9.25                                         3rd Qu.:390  
##  Max.   :12.00                                         Max.   :490
table1(~ doanh_thu | khu_vuc, data = data1)
Bac
(N=36)
Nam
(N=36)
Trung
(N=36)
Overall
(N=108)
doanh_thu
Mean (SD) 330 (81.3) 330 (81.3) 330 (81.3) 330 (80.6)
Median [Min, Max] 330 [170, 490] 330 [170, 490] 330 [170, 490] 330 [170, 490]
table1(~ doanh_thu | san_pham, data = data1)
A
(N=36)
B
(N=36)
C
(N=36)
Overall
(N=108)
doanh_thu
Mean (SD) 280 (70.0) 330 (70.0) 380 (70.0) 330 (80.6)
Median [Min, Max] 280 [170, 390] 330 [220, 440] 380 [270, 490] 330 [170, 490]
tong_hop_khu_vuc <- data1 %>%
  group_by(khu_vuc) %>%
  summarise(
    so_quan_sat = n(),
    doanh_thu_trung_binh = mean(doanh_thu, na.rm = TRUE),
    tong_doanh_thu = sum(doanh_thu, na.rm = TRUE),
    .groups = "drop"
  )

tong_hop_san_pham <- data1 %>%
  group_by(san_pham) %>%
  summarise(
    so_quan_sat = n(),
    doanh_thu_trung_binh = mean(doanh_thu, na.rm = TRUE),
    tong_doanh_thu = sum(doanh_thu, na.rm = TRUE),
    .groups = "drop"
  )

tong_hop_kv_sp <- data1 %>%
  group_by(khu_vuc, san_pham) %>%
  summarise(
    tong_doanh_thu = sum(doanh_thu, na.rm = TRUE),
    .groups = "drop"
  )

knitr::kable(tong_hop_khu_vuc)
khu_vuc so_quan_sat doanh_thu_trung_binh tong_doanh_thu
Bac 36 330 11880
Nam 36 330 11880
Trung 36 330 11880
knitr::kable(tong_hop_san_pham)
san_pham so_quan_sat doanh_thu_trung_binh tong_doanh_thu
A 36 280 10080
B 36 330 11880
C 36 380 13680
knitr::kable(tong_hop_kv_sp)
khu_vuc san_pham tong_doanh_thu
Bac A 3360
Bac B 3960
Bac C 4560
Nam A 3360
Nam B 3960
Nam C 4560
Trung A 3360
Trung B 3960
Trung C 4560
ggplot(tong_hop_khu_vuc, aes(x = khu_vuc, y = tong_doanh_thu)) +
  geom_col(width = 0.6) +
  geom_text(aes(label = scales::comma(tong_doanh_thu)), vjust = -0.3, size = 4) +
  labs(
    title = "Tổng doanh thu theo khu vực",
    x = "Khu vực",
    y = "Tổng doanh thu"
  ) +
  theme_minimal()

ggplot(tong_hop_san_pham, aes(x = san_pham, y = tong_doanh_thu)) +
  geom_col(width = 0.6) +
  geom_text(aes(label = scales::comma(tong_doanh_thu)), vjust = -0.3, size = 4) +
  labs(
    title = "Tổng doanh thu theo sản phẩm",
    x = "Sản phẩm",
    y = "Tổng doanh thu"
  ) +
  theme_minimal()

ggplot(tong_hop_kv_sp, aes(x = khu_vuc, y = tong_doanh_thu, fill = san_pham)) +
  geom_col(position = "dodge") +
  geom_text(
    aes(label = scales::comma(tong_doanh_thu)),
    position = position_dodge(width = 0.9),
    vjust = -0.3,
    size = 3.5
  ) +
  labs(
    title = "Tổng doanh thu theo khu vực và sản phẩm",
    x = "Khu vực",
    y = "Tổng doanh thu",
    fill = "Sản phẩm"
  ) +
  theme_minimal()

ggplot(data1, aes(x = doanh_thu)) +
  geom_histogram(binwidth = 40, boundary = 0) +
  labs(
    title = "Phân bố doanh thu",
    x = "Doanh thu",
    y = "Tần số"
  ) +
  theme_minimal()