Clearing R environment

rm(list=ls())

Installing packages

library("tidyverse")
library("reshape2")

Importing data

read.csv("C:\\Users\\admin\\Downloads\\Audience.csv") -> data1
knitr::kable(data1[1:6, 1:3], caption = "Original data")
Original data
Age Women Men
18-24 11.80% 2.60%
25-34 30.30% 8.20%
35-44 16.20% 2.60%
45-54 10% 2.60%
55-64 11.50% 3.40%
65+ 0.40% 0.40%

Converting into long data

As the figures for women and men are of the same type, and I would like to compare between different genders, I convert the table into a longer data like below, in which column “Age” is kept, while columns “Women” and “Men” are merged

melt(data = data1, id.vars=c("Age")) -> data2
knitr::kable(data2, caption = "Longer data")
Longer data
Age variable value
18-24 Women 11.80%
25-34 Women 30.30%
35-44 Women 16.20%
45-54 Women 10%
55-64 Women 11.50%
65+ Women 0.40%
18-24 Men 2.60%
25-34 Men 8.20%
35-44 Men 2.60%
45-54 Men 2.60%
55-64 Men 3.40%
65+ Men 0.40%

Checking the type of data in data2

data2 %>% str()
## 'data.frame':    12 obs. of  3 variables:
##  $ Age     : chr  "18-24" "25-34" "35-44" "45-54" ...
##  $ variable: Factor w/ 2 levels "Women","Men": 1 1 1 1 1 1 2 2 2 2 ...
##  $ value   : chr  "11.80%" "30.30%" "16.20%" "10%" ...

As the value is set as character, I assume that it should be converted into numeric for better graphing

data2 %>% mutate(value=str_replace_all(value, pattern = "%", replacement="") %>% as.numeric())-> data2

Setting hjust and color for the graph label, which would be used later

data2 %>% pull(value)-> value1 #create a vector from a column
hjust_label <- case_when(value1>=1 ~ 1.5, TRUE ~ 0)
color_label <- case_when(value1>=1 ~ "white", TRUE ~ "red")

Plotting

ggplot(data = data2, aes(y = data2$Age, x = data2$value, fill = data2$variable))+
  geom_col(position = position_dodge(), width = 0.8)+ #seperate columns by sex
  geom_text(aes(label = data2$value), position = position_dodge(0.7), #divide the label
            hjust = hjust_label, color = color_label, size = 4)+
  theme(axis.ticks = element_blank())+
  labs(title = "Audience Insight",
       subtitle = "2023",
       caption = "Channel: Facebook",
       x = "percent (%)", y = "Age", fill = "Gender")+
  scale_fill_manual(values = c("#056fa1", "grey"))+
  theme(title = element_text(color = "#056fa1", family = "bold"))+
  theme(panel.background = element_rect(fill = "#1f2847"))+
  scale_x_continuous(breaks = seq(0, 40, 10), limits = c(0,40), position = "bottom")+
  theme(panel.grid.minor = element_blank(), panel.grid.major.y = element_blank())

The plot is not yet nice.

Another solution is to come back to data1, we will need to convert the variables Women and Men to numeric and put them in data3

data1 %>% mutate(Women=str_replace_all(Women, pattern = "%", replacement="") %>% as.numeric()) -> data3
data3 %>% mutate(Men=str_replace_all(Men, pattern = "%", replacement="") %>% as.numeric()) -> data3

Now we sort the value of women from smallest to largest, then convert to long data

data3 %>% arrange(Women) %>% mutate(Age = factor(Age, levels = Age)) -> data3
melt(data = data3, id.vars=c("Age")) -> data3

You can see the data structure changes as in the below table:

knitr::kable(data2, caption = "Longer data")
Longer data
Age variable value
18-24 Women 11.8
25-34 Women 30.3
35-44 Women 16.2
45-54 Women 10.0
55-64 Women 11.5
65+ Women 0.4
18-24 Men 2.6
25-34 Men 8.2
35-44 Men 2.6
45-54 Men 2.6
55-64 Men 3.4
65+ Men 0.4

We also set the hjust and color for label as below:

data3 %>% pull(value)-> value3 #create a vector from a column
hjust_label3 <- case_when(value3>=4 ~ 1.5, TRUE ~ -0.5)
color_label3 <- case_when(value3>=1 ~ "white", TRUE ~ "red")

Now we graph

ggplot(data = data3, aes(y = data3$Age, x = data3$value, fill = data3$variable))+
  geom_col(position = position_dodge(), width = 0.8)+ #seperate columns by sex
  geom_text(aes(label = data3$value), position = position_dodge(0.7), #divide the label
            hjust = hjust_label3, color = color_label3, size = 4)+
  theme(axis.ticks = element_blank())+
  labs(title = "Audience Insight",
       subtitle = "2023",
       caption = "Channel: Facebook",
       x = "percent (%)", y = "Age", fill = "Gender")+
  scale_fill_manual(values = c("#056fa1", "grey"))+
  theme(title = element_text(color = "#056fa1", family = "bold"))+
  theme(panel.background = element_rect(fill = "#1f2847"))+
  scale_x_continuous(breaks = seq(0, 40, 10), limits = c(0,40), position = "bottom")+
  theme(panel.grid.minor = element_blank(), panel.grid.major.y = element_blank())+
  facet_wrap(data3$variable) -> p2

I try to add a pie chart for more insight.

data3 %>% ggplot(aes(x="", y = data3$value, fill = data3$variable))+
  geom_bar(width = 1, stat = "identity")+
  coord_polar("y", start=0)+
  labs(title = "Gender contribution",
       x = "percent (%)", y = "", fill = "")+
  scale_fill_manual(values = c("#056fa1", "grey"))+
  theme(axis.ticks = element_blank(), axis.text = element_blank())+
  theme(title = element_text(color = "#056fa1", family = "bold")) -> p3
library(gridExtra)
grid.arrange(p2, p3, nrow = 2)