1095393
# Load all the libraries you need for your analysis here:
library(haven)
## Warning: package 'haven' was built under R version 4.4.2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(RColorBrewer)
library(labeling)
library(readr)
library(tibble)
# Import Spring Survey 2023 data set:
df <- read_sav("pew_spring.sav")
Intro Insert introduction
Literature Review Insert literature review
Research Question How do attitudes to abortion vary by age in the UK?
Hypothesis H₀: There is no significant difference in attitudes to abortion across different age groups in the UK. H₁: Attitudes to abortion vary significantly across different age groups in the UK.
# Selection Criteria of UK, Clean Age & Abortion variables
df$country <- as_factor(df$country)
UK_unfiltered <- df %>% filter(country == "UK")
UK <- UK_unfiltered %>%
filter(!age %in% c("98", "99")) %>%
filter(!abortion_legal %in% c(8, 9))
# Checking filter
table (UK$abortion_legal)
##
## 1 2 3 4
## 400 443 96 39
table(UK$age)
##
## 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43
## 12 11 16 12 7 10 14 11 19 12 9 13 11 10 18 8 15 13 7 11 9 13 17 7 9 13
## 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
## 14 18 16 7 20 13 19 6 17 16 11 9 17 15 19 14 25 8 20 30 21 17 11 20 18 18
## 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
## 14 17 14 15 15 24 25 21 11 9 13 15 9 8 6 10 3 4 7 5 3 1 2 1
# Generating summary statistics for each variable
length(unique(UK$id)) #total n for new dataset
## [1] 978
summary(UK$age) # summary stats for age
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 18.00 38.00 56.00 54.09 70.00 93.00
summary(UK$abortion_legal) # summary stats for abortion responses
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 2.000 1.769 2.000 4.000
VISUALISATION 1 - SUMMARY STATISTICS
# Summary statistics for 'age' by 'abortion_legal'
summary_table <- UK %>%
group_by(abortion_legal) %>%
summarise(
Count = n(), # Count of observations
Mean_Age = mean(age), # Mean of age, excluding NAs
Median_Age = median(age), # Median of age, excluding NAs
SD_Age = sd(age), # Standard deviation of age
Min_Age = min(age), # Minimum age
Max_Age = max(age), # Maximum age
IQR_Age = IQR(age) # Interquartile range of age
)
# Print the summary table
print(summary_table)
## # A tibble: 4 × 8
## abortion_legal Count Mean_Age Median_Age SD_Age Min_Age Max_Age IQR_Age
## <dbl+lbl> <int> <dbl> <dbl> <dbl> <dbl+l> <dbl+l> <dbl>
## 1 1 [Legal in all case… 400 50.7 52 18.8 18 93 30
## 2 2 [Legal in most cas… 443 57.8 62 19.4 18 92 30
## 3 3 [Illegal in most c… 96 52.0 48 19.9 18 90 33.2
## 4 4 [Illegal in all ca… 39 51.2 53 17.8 19 86 28
Insert comments
VISUALISATION 2 - AGE DISTRIBTUION
hist(UK$age, main="Age distribution of UK participants", xlab="Age")
Insert comments - The mean is less than the median for answers ‘1’,‘2’, and ‘4’ suggesting left skewed distributions for these answers, and then for ‘3’, mean is greater than median suggesting right skewed distributions.
VISUALISATION 3 - BOXPLOT
# Create a boxplot of hours played by video game:
boxplot(UK$age ~ UK$abortion_legal, main="Age distribtuion by attitudes towards abortion", xlab="Age", ylab="Attitudes towards abortion")
Insert comments -
VISUALISATION 4 - HISTOGRAM
ggplot(data=UK, aes(x = age)) +
geom_histogram(binwidth = 1, fill = "white", colour = "black") +
facet_grid(abortion_legal ~ .) +
ggtitle("Attitudes towards abortion by age") +
xlab("Age") +
ylab("Attitudes towards abortion")
Insert comments - VISUALISATION 5 - HYPOTHESIS TESTING & CONFIDENCE TESTING
# Create age group in categories
UK <- UK %>%
mutate(age_group = case_when(
age >= 18 & age <= 50 ~ "Young", # Age between 18 and 50
age > 50 ~ "Old", # Age over 50
TRUE ~ NA_character_ # Handle missing values (NA)
))
table(UK$age_group)
##
## Old Young
## 564 414
# Calculate summary statistics including 95% confidence intervals for the mean abortion_legal, grouped by age_group
summary_table_with_ci <- UK %>%
filter(!is.na(abortion_legal), !is.na(age_group)) %>% # Exclude NAs from both variables
group_by(age_group) %>% # Now group by age_group
summarise(
Count = n(), # Count of observations
Mean_Abortion_Legal = mean(abortion_legal), # Mean of abortion_legal
Median_Abortion_Legal = median(abortion_legal), # Median of abortion_legal
SD_Abortion_Legal = sd(abortion_legal), # Standard deviation of abortion_legal
Min_Abortion_Legal = min(abortion_legal), # Minimum of abortion_legal
Max_Abortion_Legal = max(abortion_legal), # Maximum of abortion_legal
IQR_Abortion_Legal = IQR(abortion_legal), # Interquartile range of abortion_legal
CI_Lower = mean(abortion_legal) - qt(0.975, df = n() - 1) * sd(abortion_legal) / sqrt(n()), # Lower bound of 95% CI
CI_Upper = mean(abortion_legal) + qt(0.975, df = n() - 1) * sd(abortion_legal) / sqrt(n()) # Upper bound of 95% CI
)
# Print the summary table with confidence intervals
print(summary_table_with_ci)
## # A tibble: 2 × 10
## age_group Count Mean_Abortion_Legal Median_Abortion_Legal SD_Abortion_Legal
## <chr> <int> <dbl> <dbl> <dbl>
## 1 Old 564 1.79 2 0.736
## 2 Young 414 1.74 2 0.843
## # ℹ 5 more variables: Min_Abortion_Legal <dbl+lbl>,
## # Max_Abortion_Legal <dbl+lbl>, IQR_Abortion_Legal <dbl>, CI_Lower <dbl>,
## # CI_Upper <dbl>
Insert comments - Confidence intervals show that Means are different but not very, so attitudes are consistent accross ages. Minor differences but not much.
NOT VISUALISATION - EXTRA BIT FOR CORRELATION AND COVARIANCE
# Calculate covariance between age and attitudes towards abortion:
cov(UK$age, UK$abortion_legal, use = "complete.obs")
## [1] 0.945386
# Note that unlike correlation the covariance is sensitive to the scale.
# Calculate correlation between age and attitudes towards abortion:
cor(UK$age, UK$abortion_legal, use = "complete.obs")
## [1] 0.06217122
Insert comments - Covariance of 0.945386: Indicates a slightly positive relationship between age and attitudes towards abortion (i.e., as one increases, the other tends to increase, but the relationship is weak).
Correlation of 0.06217122: This is a very weak positive linear relationship, suggesting that age does not strongly influence attitudes towards abortion. The small positive value indicates that there is no meaningful pattern or strong correlation between age and people’s views on abortion.
Conclusion
Insert conclusion