rm(list=ls())
library("tidyverse")
library("reshape2")
read.csv("C:\\Users\\admin\\Downloads\\Audience.csv") -> data1
knitr::kable(data1[1:6, 1:3], caption = "Original data")
Age | Women | Men |
---|---|---|
18-24 | 11.80% | 2.60% |
25-34 | 30.30% | 8.20% |
35-44 | 16.20% | 2.60% |
45-54 | 10% | 2.60% |
55-64 | 11.50% | 3.40% |
65+ | 0.40% | 0.40% |
As the figures for women and men are of the same type, and I would like to compare between different genders, I convert the table into a longer data like below, in which column “Age” is kept, while columns “Women” and “Men” are merged
melt(data = data1, id.vars=c("Age")) -> data2
knitr::kable(data2, caption = "Longer data")
Age | variable | value |
---|---|---|
18-24 | Women | 11.80% |
25-34 | Women | 30.30% |
35-44 | Women | 16.20% |
45-54 | Women | 10% |
55-64 | Women | 11.50% |
65+ | Women | 0.40% |
18-24 | Men | 2.60% |
25-34 | Men | 8.20% |
35-44 | Men | 2.60% |
45-54 | Men | 2.60% |
55-64 | Men | 3.40% |
65+ | Men | 0.40% |
data2 %>% str()
## 'data.frame': 12 obs. of 3 variables:
## $ Age : chr "18-24" "25-34" "35-44" "45-54" ...
## $ variable: Factor w/ 2 levels "Women","Men": 1 1 1 1 1 1 2 2 2 2 ...
## $ value : chr "11.80%" "30.30%" "16.20%" "10%" ...
As the value is set as character, I assume that it should be converted into numeric for better graphing
data2 %>% mutate(value=str_replace_all(value, pattern = "%", replacement="") %>% as.numeric())-> data2
data2 %>% pull(value)-> value1 #create a vector from a column
hjust_label <- case_when(value1>=1 ~ 1.5, TRUE ~ 0)
color_label <- case_when(value1>=1 ~ "white", TRUE ~ "red")
ggplot(data = data2, aes(y = data2$Age, x = data2$value, fill = data2$variable))+
geom_col(position = position_dodge(), width = 0.8)+ #seperate columns by sex
geom_text(aes(label = data2$value), position = position_dodge(0.7), #divide the label
hjust = hjust_label, color = color_label, size = 4)+
theme(axis.ticks = element_blank())+
labs(title = "Audience Insight",
subtitle = "2023",
caption = "Channel: Facebook",
x = "percent (%)", y = "Age", fill = "Gender")+
scale_fill_manual(values = c("#056fa1", "grey"))+
theme(title = element_text(color = "#056fa1", family = "bold"))+
theme(panel.background = element_rect(fill = "#1f2847"))+
scale_x_continuous(breaks = seq(0, 40, 10), limits = c(0,40), position = "bottom")+
theme(panel.grid.minor = element_blank(), panel.grid.major.y = element_blank())
The plot is not yet nice.
Another solution is to come back to data1, we will need to convert the variables Women and Men to numeric and put them in data3
data1 %>% mutate(Women=str_replace_all(Women, pattern = "%", replacement="") %>% as.numeric()) -> data3
data3 %>% mutate(Men=str_replace_all(Men, pattern = "%", replacement="") %>% as.numeric()) -> data3
Now we sort the value of women from smallest to largest, then convert to long data
data3 %>% arrange(Women) %>% mutate(Age = factor(Age, levels = Age)) -> data3
melt(data = data3, id.vars=c("Age")) -> data3
You can see the data structure changes as in the below table:
knitr::kable(data2, caption = "Longer data")
Age | variable | value |
---|---|---|
18-24 | Women | 11.8 |
25-34 | Women | 30.3 |
35-44 | Women | 16.2 |
45-54 | Women | 10.0 |
55-64 | Women | 11.5 |
65+ | Women | 0.4 |
18-24 | Men | 2.6 |
25-34 | Men | 8.2 |
35-44 | Men | 2.6 |
45-54 | Men | 2.6 |
55-64 | Men | 3.4 |
65+ | Men | 0.4 |
We also set the hjust and color for label as below:
data3 %>% pull(value)-> value3 #create a vector from a column
hjust_label3 <- case_when(value3>=4 ~ 1.5, TRUE ~ -0.5)
color_label3 <- case_when(value3>=1 ~ "white", TRUE ~ "red")
Now we graph
ggplot(data = data3, aes(y = data3$Age, x = data3$value, fill = data3$variable))+
geom_col(position = position_dodge(), width = 0.8)+ #seperate columns by sex
geom_text(aes(label = data3$value), position = position_dodge(0.7), #divide the label
hjust = hjust_label3, color = color_label3, size = 4)+
theme(axis.ticks = element_blank())+
labs(title = "Audience Insight",
subtitle = "2023",
caption = "Channel: Facebook",
x = "percent (%)", y = "Age", fill = "Gender")+
scale_fill_manual(values = c("#056fa1", "grey"))+
theme(title = element_text(color = "#056fa1", family = "bold"))+
theme(panel.background = element_rect(fill = "#1f2847"))+
scale_x_continuous(breaks = seq(0, 40, 10), limits = c(0,40), position = "bottom")+
theme(panel.grid.minor = element_blank(), panel.grid.major.y = element_blank())+
facet_wrap(data3$variable) -> p2
I try to add a pie chart for more insight.
data3 %>% ggplot(aes(x="", y = data3$value, fill = data3$variable))+
geom_bar(width = 1, stat = "identity")+
coord_polar("y", start=0)+
labs(title = "Gender contribution",
x = "percent (%)", y = "", fill = "")+
scale_fill_manual(values = c("#056fa1", "grey"))+
theme(axis.ticks = element_blank(), axis.text = element_blank())+
theme(title = element_text(color = "#056fa1", family = "bold")) -> p3
library(gridExtra)
grid.arrange(p2, p3, nrow = 2)