Week 3 Assignment

Author

yushuang yang

1 - Understand ‘Core Content’ on Canvas

Prepare toolkits and import datafiles

library(tidyverse)
library(haven)
csew <- read_sav("D:\\MA-health culture and societies\\SOCI523-R\\523-week3 assignment\\csew1314teachingopen.sav")

Translate column of ‘sex’ and ‘bcsvictim’ into factors, which the meaning represented by the numbers

csew$sexf <- as_factor(csew$sex)
csew$bcsvictimf <- as_factor(csew$bcsvictim)

Draw a plot to display how is a person’s susceptibility to crime (becoming a victim) related to his gender and family’s wealth status?

a <- csew |>
  drop_na(edeprivex) |>
  ggplot(aes(x = sexf, fill = bcsvictimf)) + 
  geom_bar (position = "fill") +
  facet_wrap(~edeprivex, 1)
a

2 - Add transparency to the bars in the most recent plot, as suggested in the final slide this week.

a <- csew |>
  drop_na(edeprivex) |>
  ggplot(aes(x = sexf, fill = bcsvictimf)) + 
  geom_bar (alpha=0.5,position = "fill") +
  facet_wrap(~edeprivex, 1)
a

3 - Change the number in the code for the ‘facet_wrap’ function we used this week. What happens? What do you think is an appropriate number to use here?

A:Changing the number to 3 rearranges the charts into a grid with 3 rows. The appropriate number is 1 because keeping the charts in a single row allows me to easily compare the trends side-by-side.

a <- csew |>
  drop_na(edeprivex) |>
  ggplot(aes(x = sexf, fill = bcsvictimf)) + 
  geom_bar (alpha=0.5,position = "fill") +
  facet_wrap(~edeprivex, 1)
a

4 - What is still ‘wrong’ with the chart you have, based on the principles we have considered so far?

A:I think a graph should be self-explanatory, allowing audiences to understand the story quickly. so I put labels for axises, note for explain what the digits 1 to 5 means.

csew$edeprivexf <- as_factor(csew$edeprivex)

a <- csew |> 
  drop_na(edeprivexf) |> 
  ggplot(aes(x = sexf, fill = bcsvictimf)) + 
  geom_bar(position = "fill") +             
  facet_wrap(~edeprivexf, nrow = 1) +
  scale_y_continuous(labels = scales::percent)+
  labs(y = 'Proportion',
       x = 'Gender',
       title = 'Crime Victimization by Deprivation Index',
       caption = 'Note: 1 = 20% most deprived wards') +
  theme(plot.title = element_text(hjust = 0.5))
a

5 - Create a new bar chart using at least one different variable from the crime survey data file. Come next time with your code, the chart it makes, and an explanation of what you think we can learn from it (this need not be particularly complicated, or even especially interesting).

5.1 - plot1

This chart shows the correlation between crime victims and age groups.

Translate meanning of columns

csew$agegrp7f <- as_factor(csew$agegrp7)
csew$bcsvictimf <- as_factor(csew$bcsvictim)

Draw a bar graph

b <- csew |>
  drop_na(agegrp7f, bcsvictimf) |>
  ggplot(aes(x = agegrp7f, fill = bcsvictimf)) +
  geom_bar(position = 'fill') +                  # use 100% stacked bar chart
  coord_flip() +                                 # swap X and Y axes to make long labels readable.
  scale_y_continuous(labels = scales::percent) + # format the axis numbers as percentages   
  labs(y = 'Proportion',                         # add clear titles and labels
       x = 'Age Group',
       title = 'Crime Victimization by Age Group',
       fill = 'Victim Status') +                 #rename the legend title
  theme(plot.title = element_text(hjust = 0.5))  # center the main title
b

5.1 - plot2

plot_lollipop <- csew |>
  drop_na(agegrp7f, bcsvictimf) |>
  count(agegrp7f, bcsvictimf) |>
  group_by(agegrp7f) |>
  mutate(prop = n / sum(n)) |>
  ungroup() |>
  ggplot(aes(x = agegrp7f, y = prop)) +
  geom_segment(aes(x = agegrp7f, xend = agegrp7f, y = 0, yend = prop), 
               color = "gray70", size = 1) +
  geom_point(size = 5, color = "#00BFC4") +
  geom_text(aes(label = scales::percent(prop, accuracy = 0.1)), 
            vjust = -1.5, size = 3.5) +
  labs(title = "Crime Victimization Risk by Age Group",
       x = "Age Group",
       y = "Victimization Rate") +
  theme_minimal()
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
plot_lollipop

plot_bubble_line <- csew |>
  drop_na(agegrp7f, bcsvictimf) |>
  count(agegrp7f, bcsvictimf) |>
  group_by(agegrp7f) |>
  mutate(prop = n / sum(n)) |>
  ungroup() |>
  filter(bcsvictimf == "Victim of crime") 
plot_bubble_line |> 
  ggplot(aes(x = agegrp7f, y = prop)) +
  geom_line(aes(group = 1), color = "gray60", size = 1, linetype = "dashed") +
  geom_point(aes(size = prop), color = "#00BFC4", alpha = 0.8) + 
  geom_text(aes(label = scales::percent(prop, accuracy = 1)), 
            vjust = -2, 
            color = "black", size = 3.5) +
  scale_size(range = c(6, 18), guide = "none") + 
  scale_y_continuous(labels = scales::percent, limits = c(0, 0.35)) + 
  labs(
    title = "Victimization Risk: Trend & Magnitude",
    subtitle = "Bubble size represents the proportion of victims in each age group",
    x = "Age Group",
    y = "Victimization Rate"
  ) +
  theme_minimal()

5.2 - plot3

compares the levels of worry about burglary between rural and urban residents, revealing that people in urban areas tend to be more worried about burglary than those in rural areas.

csew$rural2f <- as_factor(csew$rural2)
csew$wburglf <- as_factor(csew$wburgl)

c <- csew |>
  drop_na(rural2f, wburglf) |>
  ggplot(aes(x = rural2f, fill = wburglf)) +
  geom_bar(position = 'fill') +
  scale_y_continuous(labels = scales::percent) +
  labs(y = 'Proportion',
       x = 'Area Type - Rural vs Urban',
       title = 'Worry about Burglary by Area Type',
       fill = 'Level of Worry') + 
  theme(plot.title = element_text(hjust = 0.5))
c