## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tidyr' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.3
## Warning: package 'purrr' was built under R version 4.3.3
## Warning: package 'dplyr' was built under R version 4.3.3
## Warning: package 'stringr' was built under R version 4.3.3
## Warning: package 'forcats' was built under R version 4.3.3
## Warning: package 'lubridate' was built under R version 4.3.3

In September 2019, YouGov survey asked 1,639 GB adults the following question:

In hindsight, do you think Britain was right/wrong to vote to leave EU?

The data from the survey are in data/brexit.csv.

brexit <- read_csv("data/brexit.csv")

In the course video we made the following visualisation.

brexit <- brexit %>%
  mutate(
    region = fct_relevel(region, "london", "rest_of_south", "midlands_wales", "north", "scot"),
    region = fct_recode(region, London = "london", `Rest of South` = "rest_of_south", `Midlands / Wales` = "midlands_wales", North = "north", Scotland = "scot")
  )

ggplot(brexit, aes(y = opinion, fill = opinion)) +
  geom_bar() +
  facet_wrap(~region, nrow = 1, labeller = label_wrap_gen(width = 12)) +
  guides(fill = FALSE) +
  labs(
    title = "Was Britain right/wrong to vote to leave EU?",
    subtitle = "YouGov Survey Results, 2-3 September 2019",
    caption = "Source: bit.ly/2lCJZVg",
    x = NULL, y = NULL
  ) +
  scale_fill_manual(values = c(
    "Wrong" = "#ef8a62",
    "Right" = "#67a9cf",
    "Don't know" = "gray"
  )) +
  theme_minimal()
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

In this application exercise we tell different stories with the same data.

Exercise 1 - Free scales

Add scales = "free_x" as an argument to the facet_wrap() function. How does the visualisation change? How is the story this visualisation telling different than the story the original plot tells?

Tambahkan scales = "free_x" sebagai argumen dalam fungsi facet_wrap(). Bagaimana visualisasi berubah? Bagaimana cerita yang disampaikan oleh visualisasi ini berbeda dari cerita yang disampaikan oleh plot aslinya?

brexit <- brexit %>%
  mutate(
    region = fct_relevel(region, "london", "rest_of_south", "midlands_wales", "north", "scot"),
    region = fct_recode(region, London = "london", `Rest of South` = "rest_of_south", `Midlands / Wales` = "midlands_wales", North = "north", Scotland = "scot")
  )
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `region = fct_relevel(...)`.
## Caused by warning:
## ! 5 unknown levels in `f`: london, rest_of_south, midlands_wales, north, and scot
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
ggplot(brexit, aes(y = opinion, fill = opinion)) +
  geom_bar() +
  facet_wrap(~region,
    nrow = 1, labeller = label_wrap_gen(width = 12),
    scales = "free_x"  # tambahkan free_x ini
  ) +
  guides(fill = FALSE) +
  labs(
    title = "Was Britain right/wrong to vote to leave EU?",
    subtitle = "YouGov Survey Results, 2-3 September 2019",
    caption = "Source: bit.ly/2lCJZVg",
    x = NULL, y = NULL
  ) +
  scale_fill_manual(values = c(
    "Wrong" = "#ef8a62",
    "Right" = "#67a9cf",
    "Don't know" = "gray"
  )) +
  theme_minimal()

Interpretasi : Berdasarkan hal tersebut tedapat perbedaan yaitu dengan penambahan scales = “free_x”, setiap facet memiliki skala X yang berbeda sesuai dengan datanya, sehingga variasi dalam masing-masing kategori lebih jelas. Penambahan ini membuat setiap wilayah memiliki skala X sendiri, sehingga ukuran grafik lebih proporsional dengan jumlah responden pada wilayah tersebut.Sedagkan tanpa scales = “free_x”, semua facet memiliki skala X yang sama, sehingga perbandingan antar kategori lebih langsung tetapi bisa membuat beberapa data terlihat terlalu padat atau tersebar.

Exercise 2 - Comparing proportions across facets

First, calculate the proportion of wrong, right, and don’t know answers in each category and then plot these proportions (rather than the counts) and then improve axis labeling. How is the story this visualisation telling different than the story the original plot tells? Hint: You’ll need the scales package to improve axis labeling, which means you’ll need to load it on top of the document as well.

Pertama, hitung proporsi jawaban salah (wrong), benar (right), dan tidak tahu (don’t know) dalam setiap kategori. Kemudian, buat plot berdasarkan proporsi tersebut (bukan jumlah total). Setelah itu, perbaiki pelabelan sumbu. Bagaimana cerita yang disampaikan oleh visualisasi ini berbeda dari cerita yang disampaikan oleh plot aslinya?

library(ggplot2)
library(dplyr)
library(forcats)
library(scales)
## Warning: package 'scales' was built under R version 4.3.3
## 
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
## 
##     discard
## The following object is masked from 'package:readr':
## 
##     col_factor
brexit <- brexit %>%
  mutate(region = as.factor(region)) %>%
  mutate(
    region = fct_relevel(region, "london", "rest_of_south", "midlands_wales", "north", "scot"),
    region = fct_recode(region, 
                        "London" = "london", 
                        "Rest of South" = "rest_of_south",  
                        "Midlands / Wales" = "midlands_wales", 
                        "North" = "north", 
                        "Scotland" = "scot")
  )
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `region = fct_relevel(...)`.
## Caused by warning:
## ! 5 unknown levels in `f`: london, rest_of_south, midlands_wales, north, and scot
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
if (!"opinion" %in% colnames(brexit)) {
  stop("Error: The 'opinion' column is missing from the dataset.")
}

brexit_prop <- brexit %>%
  group_by(region, opinion) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(region) %>%
  mutate(proportion = count / sum(count))

ggplot(brexit_prop, aes(x = opinion, y = proportion, fill = opinion)) +
  geom_col() + 
  facet_wrap(~region, ncol = 2) +  
  scale_y_continuous(labels = percent_format()) +  
  labs(
    title = "Was Britain right/wrong to vote to leave EU?",
    subtitle = "YouGov Survey Results, 2-3 September 2019",
    caption = "Source: bit.ly/2lCJZVg",
    x = NULL, y = "Proportion",
    fill = "Opinion"
  ) +
  scale_fill_manual(values = c(
    "Wrong" = "#ef8a62",
    "Right" = "#67a9cf",
    "Don't know" = "gray"
  )) +
  coord_flip() +
  theme_minimal() +
  theme(
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    strip.text = element_text(face = "bold", size = 10)  
  )

Interpretasi : Berdasarkan chart tersebut menunjukkan bahwa opini tentang Brexit bervariasi di setiap wilayah Inggris. Scotland dan London cenderung menilai Brexit sebagai kesalahan, dengan proporsi “Wrong” yang jauh lebih tinggi dibanding “Right”. Sementara untuk wilayah Rest of South, Midlands/Wales, dan North lebih seimbang, sedangkan proporsi responden yang tidak tahu (Don’t know) relatif kecil di semua wilayah Inggris, hal ini menandakan bahwa masyarakat umumnya memiliki pendapat yang tegas tentang Brexit.

Exercise 3 - Comparing proportions across bars

Recreate the same visualisation from the previous exercise, this time dodging the bars for opinion proportions for each region, rather than faceting by region and then improve the legend. How is the story this visualisation telling different than the story the previous plot tells?

Buat kembali visualisasi yang sama dari latihan sebelumnya, kali ini dengan menggeser (dodging) batang untuk proporsi opini di setiap wilayah, bukan dengan memisahkan (faceting) berdasarkan wilayah. Kemudian, perbaiki legenda. Bagaimana cerita yang disampaikan oleh visualisasi ini berbeda dari cerita yang disampaikan oleh grafik sebelumnya?

library(dplyr)
library(ggplot2)

brexit_prop <- brexit %>%
  group_by(region, opinion) %>%
  summarise(count = n(), .groups = "drop") %>%
  group_by(region) %>%  
  mutate(prop = count / sum(count))  

ggplot(brexit_prop, aes(x = region, y = prop, fill = opinion)) +
  geom_col(position = "dodge") +
  scale_y_continuous(labels = scales::percent_format()) +
  labs(
    title = "Comparison of Brexit Opinions Across Regions",
    subtitle = "YouGov Survey Results, 2-3 September 2019",
    x = "Region", y = "Proportion",
    fill = "Opinion"
  ) +
  scale_fill_manual(values = c(
    "Wrong" = "#ef8a62",
    "Right" = "#67a9cf",
    "Don't know" = "gray"
  )) + 
  theme_minimal()

Interpretasi : Visualisasi ini menunjukkan distribusi opini tentang Brexit di berbagai wilayah dengan pendekatan “dodged bars”, yang memungkinkan perbandingan langsung antara kategori opini dalam setiap wilayah. Dari grafik, terlihat bahwa opini “Wrong” (hijau) lebih besar pada wilayah Skotlandia dan London, sementara wilayah lainnya memiliki distribusi yang lebih seimbang antara “Right” (biru) dan “Wrong” (hijau). Sedangkan untuk opini “Don’t know” (merah) tetap rendah di semua wilayah, hal ini menunjukan bahwa sebagian besar responden memiliki sikap yang jelas terhadap Brexit.