L7_2

Advanced Visualization 1

# Load the ggplot2 package
library(ggplot2)
library(GGally)

Warning: package 'GGally' was built under R version 4.4.1

Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2

library(gganimate)

Warning: package 'gganimate' was built under R version 4.4.1

library(tidyverse)

Warning: package 'tidyverse' was built under R version 4.4.1

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ lubridate 1.9.3     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.1

── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

ggplot(mpg, 
       aes(x = displ, y = hwy)) + 
  geom_point()

ggplot(mpg, 
       aes(displ, hwy)) +
  geom_point()

ggplot(economics, aes(date, unemploy)) + geom_line()

ggplot(mpg, aes(cty)) + geom_histogram()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Thêm màu sắc, hình dạng

ggplot(mpg, 
       aes(displ, hwy,  
           colour = class)) +
  geom_point()

ggplot(mpg, 
       aes(displ, hwy, 
           colour = class, 
           shape = drv)) +
  geom_point()

ggplot(mpg, aes(displ, hwy,
                colour = class, 
                shape = drv, 
                size = cyl)) +
  geom_point()

ggplot(mpg, aes(displ, hwy,
                colour = class, 
                shape = drv, 
                size = cyl)) +
  geom_point(color="blue")

ggplot(mpg, aes(displ, hwy,  
                size = cyl)) +
  geom_point(color="blue")

ggplot(mpg, aes(displ, hwy,  
                size = cyl)) +
  geom_point(aes(color="Đây chỉ là tên gán vào"))

Giao diện

ggplot(mpg, aes(displ, hwy)) + 
  geom_point() + 
  facet_wrap(~class)

Các dạng cơ bản biểu đồ

geom_smooth() fits a smoother to the data and displays the smooth and its standard error.

ggplot(mpg, 
       aes(displ, hwy)) + 
  geom_point() +
  geom_smooth() +
  facet_wrap(~class)

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: span too small.  fewer data values than degrees of freedom.

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at 5.6935

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.5065

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 0

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 0.65044

Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : span too small.  fewer
data values than degrees of freedom.

Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
5.6935

Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
0.5065

Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
number 0

Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : There are other near
singularities as well. 0.65044

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at 4.008

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.708

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 0

Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 0.25

Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
4.008

Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
0.708

Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
number 0

Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : There are other near
singularities as well. 0.25

geom_boxplot() produces a box-and-whisker plot to summarise the distribution of a set of points.

ggplot(mpg, aes(displ, hwy)) + 
  geom_boxplot()+ 
  facet_wrap(~class)

Warning: Continuous x aesthetic
ℹ did you forget `aes(group = ...)`?

geom_histogram() and geom_freqpoly() show the distribution of continuous variables.

ggplot(mpg, aes( hwy)) + 
  geom_histogram() +
  facet_wrap(~class)

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

ggplot(mpg, aes( hwy)) + 
  geom_freqpoly() +
  facet_wrap(~class)

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

geom_bar() shows the distribution of categorical variables.

ggplot(mpg, 
       aes(displ)) + 
  geom_bar() +
  geom_density() +
  facet_wrap(~class)

geom_path() and geom_line()

Kết hợp nhiều biểu đồ vào trong 1

ggplot(mpg, aes(displ, hwy)) + 
  geom_point() + 
  geom_smooth()

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(mpg, aes(displ, hwy)) + 
  geom_point() + 
  geom_smooth(span = 0.2)

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(mpg, aes(displ, hwy)) + 
  geom_point() + 
  geom_smooth(span = 1)

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

ggplot(mpg, aes(displ, hwy)) + 
  geom_point() + 
  geom_smooth(method = "lm")

`geom_smooth()` using formula = 'y ~ x'

Boxplots and jittered points

ggplot(mpg, aes(drv, hwy)) + 
  geom_point()

ggplot(mpg, aes(drv, hwy)) + geom_jitter()

ggplot(mpg, aes(drv, hwy)) + geom_boxplot()

ggplot(mpg, aes(drv, hwy)) + geom_violin()

ggplot(mpg, aes(drv, hwy)) + 
  geom_jitter()+ 
  geom_violin()

ggplot(mpg, aes(drv, hwy))+ 
  geom_violin() + 
  geom_jitter()

Bar

ggplot(mpg, aes(drv, hwy)) + geom_bar(stat = "identity")

ggplot(mpg, aes(drv, hwy)) + geom_point()

Time series

ggplot(economics, aes(date, unemploy / pop)) +
  geom_line()

ggplot(economics, aes(date, uempmed)) +
  geom_line()

ggplot(economics, aes(unemploy / pop, uempmed)) + 
  geom_path() +
  geom_point()

ggplot(economics, aes(unemploy / pop, uempmed)) + 
  geom_path(colour = "grey50") +
  geom_point(aes(colour = date))

Kết hợp biểu đồ với các thông tin khác

# This example uses the ChickWeight dataset, which comes with ggplot2
# First plot
p1 <- ggplot(ChickWeight, 
             aes(x=Time, y=weight, 
                 colour=Diet, 
                 group=Chick)) +
    geom_line() +
    ggtitle("Tiêu đề thôi, không gì cả")

# Second plot
p2 <- ggplot(ChickWeight, aes(x=Time, y=weight, colour=Diet)) +
    geom_point(alpha=.3) +
    geom_smooth(alpha=.2, size=1) +
    ggtitle("Tiêu đề 2 - biểu đồ smooth ")

Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

# Third plot
p3 <- ggplot(subset(ChickWeight, Time==21), aes(x=weight, colour=Diet)) +
    geom_density() +
    ggtitle("Tiêu đề 3- Biểu đồ phân phối")

# Fourth plot
p4 <- ggplot(subset(ChickWeight, Time==21), aes(x=weight, fill=Diet)) +
    geom_histogram(colour="black", binwidth=50) +
    facet_grid(Diet ~ .) +
    ggtitle("Tiêu đề 4 - biểu đồ phân phối histogram") +
    theme(legend.position="none")        # No legend (redundant in this graph)

gridExtra::grid.arrange(p1,p2,p3,p4, ncol=2)

`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Phần 2: Các biểu đồ khác

ggplot(mpg, 
       aes(manufacturer, cyl)) +
        geom_bar(stat = "identity") +
        coord_flip()

ggplot(mpg, 
       aes(manufacturer, cyl)) +
        geom_bar(stat = "identity") +
        coord_flip() +
        geom_point(colour="red")

ggplot(mpg, 
       aes(manufacturer, cyl, 
           fill = model )) +
        geom_bar(stat = "identity") +
        coord_flip()

mpg |>
  subset(manufacturer== "toyota" |manufacturer== "audi" )  |>
ggplot( aes(manufacturer, cyl, 
            fill = model )) +
        geom_bar(stat = "identity") +
        coord_flip()

mpg |>
  subset(manufacturer!="chevrolet") |>
ggplot( aes(manufacturer, cyl, fill = model )) +
        geom_bar(stat = "identity", position = "dodge") +
        coord_flip()+
        facet_wrap(~ year )

X <- openxlsx::read.xlsx("C:\\Users\\Huynh Chuong\\Desktop\\University\\UEL\\Class_QuantMethods\\2024\\Lectures\\Supermarket Transactions.xlsx", sheet = "Data")

head(X, n=20)

   Transaction Purchase.Date Customer.ID Gender Marital.Status Homeowner
1            1         40895        7223      F              M         Y
2            2         40897        7841      M              M         Y
3            3         40898        8374      F              M         N
4            4         40898        9619      M              M         Y
5            5         40899        1900      F              S         Y
6            6         40899        6696      F              M         Y
7            7         40900        9673      M              S         Y
8            8         40902         354      F              M         Y
9            9         40902        1293      M              M         Y
10          10         40902        7938      M              S         N
11          11         40903        9357      F              M         N
12          12         40903        3097      M              M         Y
13          13         40903        2741      M              S         N
14          14         40903        2032      F              M         N
15          15         40904        6651      M              S         N
16          16         40904        5230      M              M         Y
17          17         40904        6161      F              S         N
18          18         40905        5717      F              S         N
19          19         40905         354      F              M         Y
20          20         40905        3458      F              S         N
   Children Annual.Income          City State.or.Province Country
1         2   $30K - $50K   Los Angeles                CA     USA
2         5   $70K - $90K   Los Angeles                CA     USA
3         2   $50K - $70K     Bremerton                WA     USA
4         3   $30K - $50K      Portland                OR     USA
5         3 $130K - $150K Beverly Hills                CA     USA
6         3   $10K - $30K Beverly Hills                CA     USA
7         2   $30K - $50K         Salem                OR     USA
8         2       $150K +        Yakima                WA     USA
9         3   $10K - $30K    Bellingham                WA     USA
10        1   $50K - $70K     San Diego                CA     USA
11        0   $30K - $50K Beverly Hills                CA     USA
12        1   $30K - $50K Beverly Hills                CA     USA
13        3   $70K - $90K    Bellingham                WA     USA
14        3   $10K - $30K        Yakima                WA     USA
15        0   $30K - $50K      Portland                OR     USA
16        3   $30K - $50K         Salem                OR     USA
17        1   $10K - $30K        Tacoma                WA     USA
18        0   $30K - $50K   Los Angeles                CA     USA
19        2       $150K +        Yakima                WA     USA
20        5   $10K - $30K     San Diego                CA     USA
   Product.Family Product.Department     Product.Category Units.Sold Revenue
1            Food        Snack Foods          Snack Foods          5   27.38
2            Food            Produce           Vegetables          5   14.90
3            Food        Snack Foods          Snack Foods          3    5.52
4            Food             Snacks                Candy          4    4.44
5           Drink          Beverages Carbonated Beverages          4   14.00
6            Food               Deli          Side Dishes          3    4.37
7            Food       Frozen Foods      Breakfast Foods          4   13.78
8            Food       Canned Foods          Canned Soup          6    7.34
9  Non-Consumable          Household    Cleaning Supplies          1    2.41
10 Non-Consumable Health and Hygiene       Pain Relievers          2    8.96
11           Food        Snack Foods          Snack Foods          3   11.82
12           Food       Baking Goods         Baking Goods          5   14.45
13           Food       Canned Foods          Canned Tuna          4   19.18
14 Non-Consumable          Household     Plastic Products          4   19.50
15           Food            Produce                Fruit          5   13.06
16 Non-Consumable Health and Hygiene              Hygiene          5   12.42
17           Food       Frozen Foods                 Meat          5   17.65
18           Food        Snack Foods          Snack Foods          3    8.74
19          Drink              Dairy                Dairy          5    7.75
20          Drink          Beverages               Drinks          5    6.66

names(X)

 [1] "Transaction"        "Purchase.Date"      "Customer.ID"       
 [4] "Gender"             "Marital.Status"     "Homeowner"         
 [7] "Children"           "Annual.Income"      "City"              
[10] "State.or.Province"  "Country"            "Product.Family"    
[13] "Product.Department" "Product.Category"   "Units.Sold"        
[16] "Revenue"

ggally_colbar(X,
              aes(Gender, Marital.Status))

ggally_count(X,
              aes(Marital.Status,Gender, 
                  colour=Homeowner))

X |> 
  filter(Marital.Status=="M") |>
ggally_cross(
  aes(Homeowner,Gender, 
                  colour=Gender))

ggally_crosstable(X, 
                  aes(Marital.Status,Gender), 
                  cells = "col.prop", 
                  fill = "std.resid")

X |> 
ggally_facetbar(aes(Annual.Income,Children  ))

ggally_facetbar(X, 
                aes(Marital.Status,Gender, 
                    colour = Product.Category))

X |> filter(Marital.Status=="M") |>
ggally_rowbar( 
                aes(Gender,Product.Category))

ggplot(X, aes(City, Revenue, 
              fill = Gender)) +
        geom_bar(stat = "identity") +
        coord_flip()

ggplot(X, aes(City, Revenue, fill = Gender)) +
        geom_bar(stat = "identity", position = "dodge") +
        coord_flip()

ggplot(X, aes(City, Revenue, fill = Gender)) +
        geom_bar(stat = "identity", position = "dodge") +
        coord_flip() +
        facet_wrap(~ Gender)

X |>group_by(City, Gender) |>
   summarise(Revenue = sum(Revenue))

`summarise()` has grouped output by 'City'. You can override using the
`.groups` argument.

# A tibble: 46 × 3
# Groups:   City [23]
   City          Gender Revenue
   <chr>         <chr>    <dbl>
 1 Acapulco      F        2566.
 2 Acapulco      M        2596.
 3 Bellingham    F         453.
 4 Bellingham    M         539.
 5 Beverly Hills F        5050.
 6 Beverly Hills M        5270.
 7 Bremerton     F        5270.
 8 Bremerton     M        5705 
 9 Camacho       F        3643.
10 Camacho       M        2154.
# ℹ 36 more rows

X |>group_by(City, Gender) %>%
        summarise(Revenue = sum(Revenue, na.rm = TRUE)) %>%
        ungroup()  |>
        ggplot( aes(Revenue, City)) +
        geom_point(aes(color = Gender))

`summarise()` has grouped output by 'City'. You can override using the
`.groups` argument.

X |>group_by(City, Gender) %>%
        summarise(Revenue = sum(Revenue, na.rm = TRUE)) %>%
        ungroup()  |>
ggplot( aes(Revenue, City, fill = Gender))  +
        geom_bar(stat = "identity", position = "dodge") +
        facet_wrap(~ Gender)

`summarise()` has grouped output by 'City'. You can override using the
`.groups` argument.

X |>group_by(City, Gender) %>%
        summarise(Revenue = sum(Revenue, na.rm = TRUE)) %>%
        ungroup()  |>
ggplot( 
  aes(Revenue, City))  + 
        geom_point(aes(color = Gender))+
        geom_line(aes(group = City))

`summarise()` has grouped output by 'City'. You can override using the
`.groups` argument.

X |>group_by(City, Gender) %>%
        summarise(Revenue = sum(Revenue, na.rm = TRUE)) %>%
        ungroup()  |>
ggplot( aes(Revenue, City, label = round(Revenue, 0)))  + 
        geom_point(aes(color = Gender))+
        geom_line(aes(group = City)) +
  geom_text()

`summarise()` has grouped output by 'City'. You can override using the
`.groups` argument.

X |>group_by(City, Gender) %>%
        summarise(Revenue = sum(Revenue, na.rm = T)) %>%
        ungroup()  -> X2

`summarise()` has grouped output by 'City'. You can override using the
`.groups` argument.

X2 %>%
        group_by(City) %>%
        arrange(desc(Revenue)) %>%
        slice(1)

# A tibble: 23 × 3
# Groups:   City [23]
   City          Gender Revenue
   <chr>         <chr>    <dbl>
 1 Acapulco      M        2596.
 2 Bellingham    M         539.
 3 Beverly Hills M        5270.
 4 Bremerton     M        5705 
 5 Camacho       F        3643.
 6 Guadalajara   F         291.
 7 Hidalgo       F        7361.
 8 Los Angeles   M        6282.
 9 Merida        F        4770.
10 Mexico City   F        1255.
# ℹ 13 more rows

right_label <- X2 %>%
        group_by(City) %>%
        arrange(desc(Revenue)) %>%
        slice(1)

left_label <- X2 %>%
        group_by(City) %>%
        arrange(desc(Revenue)) %>%
        slice(2)

ggplot(X2, 
       aes(Revenue, City)) +
        geom_line(aes(group = City)) +
        geom_point(aes(color = Gender), size = 1.5) +
        geom_text(data = right_label, aes(color = Gender, label = round(Revenue, 0)),
                  size = 3, hjust = -.5) +
        geom_text(data = left_label, aes(color = Gender, label = round(Revenue, 0)),
                  size = 3, hjust = 1.5) +
        scale_x_continuous(limits = c(-500, 10500))

Thubac<- X2 %>%
        group_by(City) %>%
        summarise(Revenue = sum(Revenue, na.rm = TRUE)) %>%
        arrange(Revenue) %>%
        mutate(City = factor(City, levels = .$City))

X2 <- X2 %>%
  mutate(City= factor(City, levels = Thubac$City))

ggplot(X2,
       aes(Revenue, City)) +
        geom_line(aes(group = City)) +
        geom_point(aes(color = Gender), size = 1.5) +
        geom_text(data = right_label, aes(color = Gender, label = round(Revenue, 0)),
                  size = 3, hjust = -.5) +
        geom_text(data = left_label, aes(color = Gender, label = round(Revenue, 0)),
                  size = 3, hjust = 1.5)

head(X2)

# A tibble: 6 × 3
  City          Gender Revenue
  <fct>         <chr>    <dbl>
1 Acapulco      F        2566.
2 Acapulco      M        2596.
3 Bellingham    F         453.
4 Bellingham    M         539.
5 Beverly Hills F        5050.
6 Beverly Hills M        5270.

X2 |>
  group_by(City) |>
  mutate(D=max(Revenue)/min(Revenue)) %>%
  arrange(desc(D)) |>
  head(10)

# A tibble: 10 × 4
# Groups:   City [5]
   City        Gender Revenue     D
   <fct>       <chr>    <dbl> <dbl>
 1 Hidalgo     F        7361.  1.86
 2 Hidalgo     M        3952.  1.86
 3 Camacho     F        3643.  1.69
 4 Camacho     M        2154.  1.69
 5 Walla Walla F         488.  1.38
 6 Walla Walla M         676.  1.38
 7 San Andres  F        3459.  1.37
 8 San Andres  M        4748.  1.37
 9 Yakima      F        2159.  1.35
10 Yakima      M        2911.  1.35

X2 |>
  group_by(City) |>
  mutate(D=max(Revenue)/min(Revenue)) %>%
  arrange(desc(D)) |>
  head(10) ->BigD

X2 %>%
  filter(City %in% BigD$City)

# A tibble: 10 × 3
   City        Gender Revenue
   <fct>       <chr>    <dbl>
 1 Camacho     F        3643.
 2 Camacho     M        2154.
 3 Hidalgo     F        7361.
 4 Hidalgo     M        3952.
 5 San Andres  F        3459.
 6 San Andres  M        4748.
 7 Walla Walla F         488.
 8 Walla Walla M         676.
 9 Yakima      F        2159.
10 Yakima      M        2911.

highlight <- X2 %>%
  filter(City %in% BigD$City)

ggplot(X2,
       aes(Revenue, City)) +
        geom_line(aes(group = City), color="gray") +
        geom_point(aes(color = Gender), size = 0.5) +
         geom_line(data = highlight, aes(group = City))+
          geom_point(data = highlight, aes(color = Gender), size = 2)

right_label1 <- filter(right_label, City %in% BigD$City)
left_label1 <- filter(left_label, City %in% BigD$City)

ggplot(X2,
       aes(Revenue, City)) +
        geom_line(aes(group = City), color="gray") +
        geom_point(aes(color = Gender), size = 1) +
         geom_line(data = highlight, aes(group = City))+
          geom_point(data = highlight, aes(color = Gender), size = 5)+
  geom_text(data = right_label1, aes(color = Gender, label = round(Revenue, 0)),size = 3, hjust = -.5) +
        geom_text(data = left_label1, aes(color = Gender, label = round(Revenue, 0)),size = 3, hjust = 1.5)

Interactive 1

library(htmlwidgets)
library(plotly)

Warning: package 'plotly' was built under R version 4.4.1


Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout

head(X)

  Transaction Purchase.Date Customer.ID Gender Marital.Status Homeowner
1           1         40895        7223      F              M         Y
2           2         40897        7841      M              M         Y
3           3         40898        8374      F              M         N
4           4         40898        9619      M              M         Y
5           5         40899        1900      F              S         Y
6           6         40899        6696      F              M         Y
  Children Annual.Income          City State.or.Province Country Product.Family
1        2   $30K - $50K   Los Angeles                CA     USA           Food
2        5   $70K - $90K   Los Angeles                CA     USA           Food
3        2   $50K - $70K     Bremerton                WA     USA           Food
4        3   $30K - $50K      Portland                OR     USA           Food
5        3 $130K - $150K Beverly Hills                CA     USA          Drink
6        3   $10K - $30K Beverly Hills                CA     USA           Food
  Product.Department     Product.Category Units.Sold Revenue
1        Snack Foods          Snack Foods          5   27.38
2            Produce           Vegetables          5   14.90
3        Snack Foods          Snack Foods          3    5.52
4             Snacks                Candy          4    4.44
5          Beverages Carbonated Beverages          4   14.00
6               Deli          Side Dishes          3    4.37

  X %>%
  group_by(City, Gender,Product.Family ) %>%
  mutate(R=sum(Revenue)) -> X2

ggplot(X2,
       aes(R , City, size = R , color = Product.Family)) +
  geom_point()

ggplot(X2,
       aes(R , City, size = R , color = Product.Family)) +
  geom_point() -> p1

ggplot(X2,
       aes(Revenue, City)) +
        geom_line(aes(group = City), color="gray") +
        geom_point(aes(color = Gender), size = 1) +
         geom_line(data = highlight, aes(group = City))+
          geom_point(data = highlight, aes(color = Gender), size = 5)+
  geom_text(data = right_label1, aes(color = Gender, label = round(Revenue, 0)),size = 3, hjust = -.5) +
        geom_text(data = left_label1, aes(color = Gender, label = round(Revenue, 0)),size = 3, hjust = 1.5)  -> P

ggplotly(p1 )

X |>group_by(City, Gender) %>%
        summarise(Revenue = sum(Revenue, na.rm = TRUE)) %>%
        ungroup()  -> X2

`summarise()` has grouped output by 'City'. You can override using the
`.groups` argument.

ggplot(X2,
       aes(Revenue, City)) +
        geom_line(aes(group = City), color="gray") +
        geom_point(aes(color = Gender), size = 0.5) +
         geom_line(data = highlight, aes(group = City))+
          geom_point(data = highlight, aes(color = Gender), size = 2)+
  geom_text(data = right_label1, aes(color = Gender, label = round(Revenue, 0)),size = 3, hjust = -.5) +
        geom_text(data = left_label1, aes(color = Gender, label = round(Revenue, 0)),size = 3, hjust = 1.5) -> P2

ggplotly(P2)