# Tải Thư viện chung library
pacman::p_load(ggplot2, GGally, gganimate, tidyverse)L7_2
Trực quan hóa dữ liệu 2025 - P2
X <- openxlsx::read.xlsx("~/Library/CloudStorage/GoogleDrive-chuonghn90@gmail.com/Other computers/My Laptop/Lectures_Quant using R/Supermarket Transactions.xlsx", sheet = "Data")head(X, n=5) Transaction Purchase.Date Customer.ID Gender Marital.Status Homeowner
1 1 40895 7223 F M Y
2 2 40897 7841 M M Y
3 3 40898 8374 F M N
4 4 40898 9619 M M Y
5 5 40899 1900 F S Y
Children Annual.Income City State.or.Province Country Product.Family
1 2 $30K - $50K Los Angeles CA USA Food
2 5 $70K - $90K Los Angeles CA USA Food
3 2 $50K - $70K Bremerton WA USA Food
4 3 $30K - $50K Portland OR USA Food
5 3 $130K - $150K Beverly Hills CA USA Drink
Product.Department Product.Category Units.Sold Revenue
1 Snack Foods Snack Foods 5 27.38
2 Produce Vegetables 5 14.90
3 Snack Foods Snack Foods 3 5.52
4 Snacks Candy 4 4.44
5 Beverages Carbonated Beverages 4 14.00
names(X) [1] "Transaction" "Purchase.Date" "Customer.ID"
[4] "Gender" "Marital.Status" "Homeowner"
[7] "Children" "Annual.Income" "City"
[10] "State.or.Province" "Country" "Product.Family"
[13] "Product.Department" "Product.Category" "Units.Sold"
[16] "Revenue"
# thông tin thống kê từ 2 biến nhóm biến định
ggally_colbar(X,
aes(Gender, Marital.Status))
ggally_count(X,
aes(Marital.Status,Gender,
colour=Homeowner))
X |>
filter(Marital.Status=="M") |>
ggally_cross(
aes(Homeowner,Gender,
colour=Gender))
ggally_crosstable(X,
aes(Marital.Status,Gender),
cells = "col.prop",
fill = "std.resid")
X |>
ggally_facetbar(aes(Annual.Income,Children ))
ggally_facetbar(X,
aes(Marital.Status,Gender,
colour = Product.Family))
X |> filter(Marital.Status=="M") |>
ggally_rowbar(
aes(Gender,Product.Category))
Biểu diễn thông tin doanh thu theo địa phương và giới tính
BD1
ggplot(X, aes(City, Revenue)) +
geom_bar(stat = "identity") 
Chuyen dang huong bieu do
# Quay hệ trục toạ độ
ggplot(X, aes(City, Revenue )) +
geom_bar(stat = "identity") +
coord_flip()
Phan biet theo gioi tinh
ggplot(X, aes(City, Revenue,
fill = Gender)) +
geom_bar(stat = "identity") +
coord_flip()
ggplot(X, aes(City, Revenue, fill = Gender)) +
geom_bar(stat = "identity", position = "dodge") +
coord_flip()
ggplot(X, aes(City, Revenue, fill = Gender)) +
geom_bar(stat = "identity", position = "dodge") +
coord_flip() +
facet_wrap(~ Gender) 
X |>group_by(City, Gender) |>
summarise(Revenue = sum(Revenue))`summarise()` has grouped output by 'City'. You can override using the
`.groups` argument.
# A tibble: 46 × 3
# Groups: City [23]
City Gender Revenue
<chr> <chr> <dbl>
1 Acapulco F 2566.
2 Acapulco M 2596.
3 Bellingham F 453.
4 Bellingham M 539.
5 Beverly Hills F 5050.
6 Beverly Hills M 5270.
7 Bremerton F 5270.
8 Bremerton M 5705.
9 Camacho F 3643.
10 Camacho M 2154.
# ℹ 36 more rows
X |> group_by(City, Gender) %>%
summarise(Revenue = sum(Revenue, na.rm = TRUE)) %>%
ungroup() |>
ggplot( aes(Revenue, City)) +
geom_point(aes(color = Gender))`summarise()` has grouped output by 'City'. You can override using the
`.groups` argument.

X |>group_by(City, Gender) %>%
summarise(Revenue = sum(Revenue, na.rm = TRUE)) %>%
ungroup() |>
ggplot( aes(Revenue, City, fill = Gender)) +
geom_bar(stat = "identity", position = "dodge") +
facet_wrap(~ Gender)`summarise()` has grouped output by 'City'. You can override using the
`.groups` argument.

X |>group_by(City, Gender) %>%
summarise(Revenue = sum(Revenue, na.rm = TRUE)) %>%
ungroup() |>
ggplot(
aes(Revenue, City)) +
geom_point(aes(color = Gender))+
geom_line(aes(group = City)) `summarise()` has grouped output by 'City'. You can override using the
`.groups` argument.

X |>group_by(City, Gender) %>%
summarise(Revenue = sum(Revenue, na.rm = TRUE)) %>%
ungroup() |>
ggplot( aes(Revenue, City, label = round(Revenue, 0))) +
geom_point(aes(color = Gender))+
geom_line(aes(group = City)) +
geom_text()`summarise()` has grouped output by 'City'. You can override using the
`.groups` argument.

X |>group_by(City, Gender) %>%
summarise(Revenue = sum(Revenue, na.rm = T)) %>%
ungroup() -> X2`summarise()` has grouped output by 'City'. You can override using the
`.groups` argument.
X2 %>%
group_by(City) %>%
arrange(desc(Revenue)) %>%
slice(1)# A tibble: 23 × 3
# Groups: City [23]
City Gender Revenue
<chr> <chr> <dbl>
1 Acapulco M 2596.
2 Bellingham M 539.
3 Beverly Hills M 5270.
4 Bremerton M 5705.
5 Camacho F 3643.
6 Guadalajara F 291.
7 Hidalgo F 7361.
8 Los Angeles M 6282.
9 Merida F 4770.
10 Mexico City F 1255.
# ℹ 13 more rows
right_label <- X2 %>%
group_by(City) %>%
arrange(desc(Revenue)) %>%
slice(1)
left_label <- X2 %>%
group_by(City) %>%
arrange(desc(Revenue)) %>%
slice(2)ggplot(X2,
aes(Revenue, City)) +
geom_line(aes(group = City)) +
geom_point(aes(color = Gender), size = 1.5) +
geom_text(data = right_label, aes(color = Gender, label = round(Revenue, 0)),
size = 3, hjust = -.5) +
geom_text(data = left_label, aes(color = Gender, label = round(Revenue, 0)),
size = 3, hjust = 1.5) +
scale_x_continuous(limits = c(-500, 10500))
Thubac<- X2 %>%
group_by(City) %>%
summarise(Revenue = sum(Revenue, na.rm = TRUE)) %>%
arrange(Revenue) %>%
mutate(City = factor(City, levels = .$City))X2 <- X2 %>%
mutate(City= factor(City, levels = Thubac$City))ggplot(X2,
aes(Revenue, City)) +
geom_line(aes(group = City)) +
geom_point(aes(color = Gender), size = 1.5) +
geom_text(data = right_label, aes(color = Gender, label = round(Revenue, 0)),
size = 3, hjust = -.5) +
geom_text(data = left_label, aes(color = Gender, label = round(Revenue, 0)),
size = 3, hjust = 1.5) 
head(X2)# A tibble: 6 × 3
City Gender Revenue
<fct> <chr> <dbl>
1 Acapulco F 2566.
2 Acapulco M 2596.
3 Bellingham F 453.
4 Bellingham M 539.
5 Beverly Hills F 5050.
6 Beverly Hills M 5270.
X2 |>
group_by(City) |>
mutate(D=max(Revenue)/min(Revenue)) %>%
arrange(desc(D)) |>
head(10)# A tibble: 10 × 4
# Groups: City [5]
City Gender Revenue D
<fct> <chr> <dbl> <dbl>
1 Hidalgo F 7361. 1.86
2 Hidalgo M 3952. 1.86
3 Camacho F 3643. 1.69
4 Camacho M 2154. 1.69
5 Walla Walla F 488. 1.38
6 Walla Walla M 676. 1.38
7 San Andres F 3459. 1.37
8 San Andres M 4748. 1.37
9 Yakima F 2159. 1.35
10 Yakima M 2911. 1.35
X2 |>
group_by(City) |>
mutate(D=max(Revenue)/min(Revenue)) %>%
arrange(desc(D)) |>
head(10) ->BigDX2 %>%
filter(City %in% BigD$City)# A tibble: 10 × 3
City Gender Revenue
<fct> <chr> <dbl>
1 Camacho F 3643.
2 Camacho M 2154.
3 Hidalgo F 7361.
4 Hidalgo M 3952.
5 San Andres F 3459.
6 San Andres M 4748.
7 Walla Walla F 488.
8 Walla Walla M 676.
9 Yakima F 2159.
10 Yakima M 2911.
highlight <- X2 %>%
filter(City %in% BigD$City)ggplot(X2,
aes(Revenue, City)) +
geom_line(aes(group = City), color="gray") +
geom_point(aes(color = Gender), size = 0.5) +
geom_line(data = highlight, aes(group = City))+
geom_point(data = highlight, aes(color = Gender), size = 2)
right_label1 <- filter(right_label, City %in% BigD$City)
left_label1 <- filter(left_label, City %in% BigD$City)ggplot(X2,
aes(Revenue, City)) +
geom_line(aes(group = City), color="gray") +
geom_point(aes(color = Gender), size = 1) +
geom_line(data = highlight, aes(group = City))+
geom_point(data = highlight, aes(color = Gender), size = 5)+
geom_text(data = right_label1, aes(color = Gender, label = round(Revenue, 0)),size = 3, hjust = -.5) +
geom_text(data = left_label1, aes(color = Gender, label = round(Revenue, 0)),size = 3, hjust = 1.5) 
Loại biểu đồ tương tác
Biểu đồ động
library(htmlwidgets)
library(plotly)
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
head(X) Transaction Purchase.Date Customer.ID Gender Marital.Status Homeowner
1 1 40895 7223 F M Y
2 2 40897 7841 M M Y
3 3 40898 8374 F M N
4 4 40898 9619 M M Y
5 5 40899 1900 F S Y
6 6 40899 6696 F M Y
Children Annual.Income City State.or.Province Country Product.Family
1 2 $30K - $50K Los Angeles CA USA Food
2 5 $70K - $90K Los Angeles CA USA Food
3 2 $50K - $70K Bremerton WA USA Food
4 3 $30K - $50K Portland OR USA Food
5 3 $130K - $150K Beverly Hills CA USA Drink
6 3 $10K - $30K Beverly Hills CA USA Food
Product.Department Product.Category Units.Sold Revenue
1 Snack Foods Snack Foods 5 27.38
2 Produce Vegetables 5 14.90
3 Snack Foods Snack Foods 3 5.52
4 Snacks Candy 4 4.44
5 Beverages Carbonated Beverages 4 14.00
6 Deli Side Dishes 3 4.37
X %>%
group_by(City, Gender,Product.Family ) %>%
mutate(R=sum(Revenue)) -> X2ggplot(X2,
aes(R , City, size = R , color = Product.Family)) +
geom_point()
ggplot(X2,
aes(R , City, size = R , color = Product.Family)) +
geom_point() -> p1
p1
ggplotly(p1 )ggplot(X2,
aes(Revenue, City)) +
geom_line(aes(group = City), color="gray") +
geom_point(aes(color = Gender), size = 1) +
geom_line(data = highlight, aes(group = City))+
geom_point(data = highlight, aes(color = Gender), size = 5)+
geom_text(data = right_label1, aes(color = Gender, label = round(Revenue, 0)),size = 3, hjust = -.5) +
geom_text(data = left_label1, aes(color = Gender, label = round(Revenue, 0)),size = 3, hjust = 1.5) -> PX |>group_by(City, Gender) %>%
summarise(Revenue = sum(Revenue, na.rm = TRUE)) %>%
ungroup() -> X2`summarise()` has grouped output by 'City'. You can override using the
`.groups` argument.
ggplot(X2,
aes(Revenue, City)) +
geom_line(aes(group = City), color="gray") +
geom_point(aes(color = Gender), size = 0.5) +
geom_line(data = highlight, aes(group = City))+
geom_point(data = highlight, aes(color = Gender), size = 2)+
geom_text(data = right_label1, aes(color = Gender, label = round(Revenue, 0)),size = 3, hjust = -.5) +
geom_text(data = left_label1, aes(color = Gender, label = round(Revenue, 0)),size = 3, hjust = 1.5) -> P2 ggplotly(P2)X0 <- ggplot(X, aes(City, Revenue, fill = Gender)) +
geom_bar(stat = "identity", position = "dodge") +
coord_flip() +
facet_wrap(~ Gender)
ggplotly(X0)Biểu diễn dữ liệu thông qua 1 app trung gian từ R
if(!require('GWalkR')) {
install.packages('GWalkR')
library('GWalkR')
}Loading required package: GWalkR
# !formatR
library(GWalkR)
library(shiny)
Dat<-X # Data
app <- shinyApp(
ui = fluidPage(
titlePanel("Explore the data here: "),
gwalkrOutput("mygraph")
),
server = function(input, output, session) {
output$mygraph = renderGwalkr(
gwalkr(Dat)
)
}
)
if (interactive()) app