library(tidyverse)
library(haven)
csew <- read_sav("D:\\MA-health culture and societies\\SOCI523-R\\523-week3 assignment\\csew1314teachingopen.sav")Week 3 Assignment
1 - Understand ‘Core Content’ on Canvas
Prepare toolkits and import datafiles
Translate column of ‘sex’ and ‘bcsvictim’ into factors, which the meaning represented by the numbers
csew$sexf <- as_factor(csew$sex)
csew$bcsvictimf <- as_factor(csew$bcsvictim)Draw a plot to display how is a person’s susceptibility to crime (becoming a victim) related to his gender and family’s wealth status?
a <- csew |>
drop_na(edeprivex) |>
ggplot(aes(x = sexf, fill = bcsvictimf)) +
geom_bar (position = "fill") +
facet_wrap(~edeprivex, 1)
a2 - Add transparency to the bars in the most recent plot, as suggested in the final slide this week.
a <- csew |>
drop_na(edeprivex) |>
ggplot(aes(x = sexf, fill = bcsvictimf)) +
geom_bar (alpha=0.5,position = "fill") +
facet_wrap(~edeprivex, 1)
a3 - Change the number in the code for the ‘facet_wrap’ function we used this week. What happens? What do you think is an appropriate number to use here?
A:Changing the number to 3 rearranges the charts into a grid with 3 rows. The appropriate number is 1 because keeping the charts in a single row allows me to easily compare the trends side-by-side.
a <- csew |>
drop_na(edeprivex) |>
ggplot(aes(x = sexf, fill = bcsvictimf)) +
geom_bar (alpha=0.5,position = "fill") +
facet_wrap(~edeprivex, 1)
a4 - What is still ‘wrong’ with the chart you have, based on the principles we have considered so far?
A:I think a graph should be self-explanatory, allowing audiences to understand the story quickly. so I put labels for axises, note for explain what the digits 1 to 5 means.
csew$edeprivexf <- as_factor(csew$edeprivex)
a <- csew |>
drop_na(edeprivexf) |>
ggplot(aes(x = sexf, fill = bcsvictimf)) +
geom_bar(position = "fill") +
facet_wrap(~edeprivexf, nrow = 1) +
scale_y_continuous(labels = scales::percent)+
labs(y = 'Proportion',
x = 'Gender',
title = 'Crime Victimization by Deprivation Index',
caption = 'Note: 1 = 20% most deprived wards') +
theme(plot.title = element_text(hjust = 0.5))
a5 - Create a new bar chart using at least one different variable from the crime survey data file. Come next time with your code, the chart it makes, and an explanation of what you think we can learn from it (this need not be particularly complicated, or even especially interesting).
5.1 - plot1
This chart shows the correlation between crime victims and age groups.
Translate meanning of columns
csew$agegrp7f <- as_factor(csew$agegrp7)
csew$bcsvictimf <- as_factor(csew$bcsvictim)Draw a bar graph
b <- csew |>
drop_na(agegrp7f, bcsvictimf) |>
ggplot(aes(x = agegrp7f, fill = bcsvictimf)) +
geom_bar(position = 'fill') + # use 100% stacked bar chart
coord_flip() + # swap X and Y axes to make long labels readable.
scale_y_continuous(labels = scales::percent) + # format the axis numbers as percentages
labs(y = 'Proportion', # add clear titles and labels
x = 'Age Group',
title = 'Crime Victimization by Age Group',
fill = 'Victim Status') + #rename the legend title
theme(plot.title = element_text(hjust = 0.5)) # center the main title
b5.1 - plot2
plot_lollipop <- csew |>
drop_na(agegrp7f, bcsvictimf) |>
count(agegrp7f, bcsvictimf) |>
group_by(agegrp7f) |>
mutate(prop = n / sum(n)) |>
ungroup() |>
ggplot(aes(x = agegrp7f, y = prop)) +
geom_segment(aes(x = agegrp7f, xend = agegrp7f, y = 0, yend = prop),
color = "gray70", size = 1) +
geom_point(size = 5, color = "#00BFC4") +
geom_text(aes(label = scales::percent(prop, accuracy = 0.1)),
vjust = -1.5, size = 3.5) +
labs(title = "Crime Victimization Risk by Age Group",
x = "Age Group",
y = "Victimization Rate") +
theme_minimal()Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
plot_lollipopplot_bubble_line <- csew |>
drop_na(agegrp7f, bcsvictimf) |>
count(agegrp7f, bcsvictimf) |>
group_by(agegrp7f) |>
mutate(prop = n / sum(n)) |>
ungroup() |>
filter(bcsvictimf == "Victim of crime")
plot_bubble_line |>
ggplot(aes(x = agegrp7f, y = prop)) +
geom_line(aes(group = 1), color = "gray60", size = 1, linetype = "dashed") +
geom_point(aes(size = prop), color = "#00BFC4", alpha = 0.8) +
geom_text(aes(label = scales::percent(prop, accuracy = 1)),
vjust = -2,
color = "black", size = 3.5) +
scale_size(range = c(6, 18), guide = "none") +
scale_y_continuous(labels = scales::percent, limits = c(0, 0.35)) +
labs(
title = "Victimization Risk: Trend & Magnitude",
subtitle = "Bubble size represents the proportion of victims in each age group",
x = "Age Group",
y = "Victimization Rate"
) +
theme_minimal()5.2 - plot3
compares the levels of worry about burglary between rural and urban residents, revealing that people in urban areas tend to be more worried about burglary than those in rural areas.
csew$rural2f <- as_factor(csew$rural2)
csew$wburglf <- as_factor(csew$wburgl)
c <- csew |>
drop_na(rural2f, wburglf) |>
ggplot(aes(x = rural2f, fill = wburglf)) +
geom_bar(position = 'fill') +
scale_y_continuous(labels = scales::percent) +
labs(y = 'Proportion',
x = 'Area Type - Rural vs Urban',
title = 'Worry about Burglary by Area Type',
fill = 'Level of Worry') +
theme(plot.title = element_text(hjust = 0.5))
c