1095393

# Load all the libraries you need for your analysis here: 
library(haven)
## Warning: package 'haven' was built under R version 4.4.2
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(RColorBrewer)
library(labeling)
library(readr)
library(tibble)

# Import Spring Survey 2023 data set:
df <- read_sav("pew_spring.sav")

Intro Insert introduction

Literature Review Insert literature review

Research Question How do attitudes to abortion vary by age in the UK?

Hypothesis H₀: There is no significant difference in attitudes to abortion across different age groups in the UK. H₁: Attitudes to abortion vary significantly across different age groups in the UK.

# Selection Criteria of UK, Clean Age & Abortion variables
df$country <- as_factor(df$country)
UK_unfiltered <- df %>% filter(country == "UK")
UK <- UK_unfiltered %>% 
  filter(!age %in% c("98", "99")) %>%
  filter(!abortion_legal %in% c(8, 9))
# Checking filter
table (UK$abortion_legal)
## 
##   1   2   3   4 
## 400 443  96  39
table(UK$age)
## 
## 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 
## 12 11 16 12  7 10 14 11 19 12  9 13 11 10 18  8 15 13  7 11  9 13 17  7  9 13 
## 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 
## 14 18 16  7 20 13 19  6 17 16 11  9 17 15 19 14 25  8 20 30 21 17 11 20 18 18 
## 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 
## 14 17 14 15 15 24 25 21 11  9 13 15  9  8  6 10  3  4  7  5  3  1  2  1
# Generating summary statistics for each variable
length(unique(UK$id)) #total n for new dataset
## [1] 978
summary(UK$age) # summary stats for age
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   18.00   38.00   56.00   54.09   70.00   93.00
summary(UK$abortion_legal) # summary stats for abortion responses
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   2.000   1.769   2.000   4.000

VISUALISATION 1 - SUMMARY STATISTICS

# Summary statistics for 'age' by 'abortion_legal'
summary_table <- UK %>%
  group_by(abortion_legal) %>%
  summarise(
    Count = n(),               # Count of observations
    Mean_Age = mean(age),      # Mean of age, excluding NAs
    Median_Age = median(age),  # Median of age, excluding NAs
    SD_Age = sd(age),          # Standard deviation of age
    Min_Age = min(age),        # Minimum age
    Max_Age = max(age),        # Maximum age
    IQR_Age = IQR(age)         # Interquartile range of age
  )

# Print the summary table
print(summary_table)
## # A tibble: 4 × 8
##   abortion_legal        Count Mean_Age Median_Age SD_Age Min_Age Max_Age IQR_Age
##   <dbl+lbl>             <int>    <dbl>      <dbl>  <dbl> <dbl+l> <dbl+l>   <dbl>
## 1 1 [Legal in all case…   400     50.7         52   18.8 18      93         30  
## 2 2 [Legal in most cas…   443     57.8         62   19.4 18      92         30  
## 3 3 [Illegal in most c…    96     52.0         48   19.9 18      90         33.2
## 4 4 [Illegal in all ca…    39     51.2         53   17.8 19      86         28

Insert comments

VISUALISATION 2 - AGE DISTRIBTUION

hist(UK$age, main="Age distribution of UK participants", xlab="Age")

Insert comments - The mean is less than the median for answers ‘1’,‘2’, and ‘4’ suggesting left skewed distributions for these answers, and then for ‘3’, mean is greater than median suggesting right skewed distributions.

VISUALISATION 3 - BOXPLOT

# Create a boxplot of hours played by video game:
boxplot(UK$age ~ UK$abortion_legal, main="Age distribtuion by attitudes towards abortion", xlab="Age", ylab="Attitudes towards abortion") 

Insert comments -

VISUALISATION 4 - HISTOGRAM

ggplot(data=UK, aes(x = age)) + 
  geom_histogram(binwidth = 1, fill = "white", colour = "black") + 
  facet_grid(abortion_legal ~ .) +
  ggtitle("Attitudes towards abortion by age") +
  xlab("Age") +
  ylab("Attitudes towards abortion") 

Insert comments - VISUALISATION 5 - HYPOTHESIS TESTING & CONFIDENCE TESTING

# Create age group in categories
UK <- UK %>%
  mutate(age_group = case_when(
    age >= 18 & age <= 50 ~ "Young",  # Age between 18 and 50
    age > 50 ~ "Old",                 # Age over 50
    TRUE ~ NA_character_              # Handle missing values (NA)
  ))
table(UK$age_group)
## 
##   Old Young 
##   564   414
# Calculate summary statistics including 95% confidence intervals for the mean abortion_legal, grouped by age_group
summary_table_with_ci <- UK %>%
  filter(!is.na(abortion_legal), !is.na(age_group)) %>%  # Exclude NAs from both variables
  group_by(age_group) %>%  # Now group by age_group
  summarise(
    Count = n(),                            # Count of observations
    Mean_Abortion_Legal = mean(abortion_legal),  # Mean of abortion_legal
    Median_Abortion_Legal = median(abortion_legal), # Median of abortion_legal
    SD_Abortion_Legal = sd(abortion_legal),        # Standard deviation of abortion_legal
    Min_Abortion_Legal = min(abortion_legal),      # Minimum of abortion_legal
    Max_Abortion_Legal = max(abortion_legal),      # Maximum of abortion_legal
    IQR_Abortion_Legal = IQR(abortion_legal),      # Interquartile range of abortion_legal
    CI_Lower = mean(abortion_legal) - qt(0.975, df = n() - 1) * sd(abortion_legal) / sqrt(n()),  # Lower bound of 95% CI
    CI_Upper = mean(abortion_legal) + qt(0.975, df = n() - 1) * sd(abortion_legal) / sqrt(n())   # Upper bound of 95% CI
  )

# Print the summary table with confidence intervals
print(summary_table_with_ci)
## # A tibble: 2 × 10
##   age_group Count Mean_Abortion_Legal Median_Abortion_Legal SD_Abortion_Legal
##   <chr>     <int>               <dbl>                 <dbl>             <dbl>
## 1 Old         564                1.79                     2             0.736
## 2 Young       414                1.74                     2             0.843
## # ℹ 5 more variables: Min_Abortion_Legal <dbl+lbl>,
## #   Max_Abortion_Legal <dbl+lbl>, IQR_Abortion_Legal <dbl>, CI_Lower <dbl>,
## #   CI_Upper <dbl>

Insert comments - Confidence intervals show that Means are different but not very, so attitudes are consistent accross ages. Minor differences but not much.

NOT VISUALISATION - EXTRA BIT FOR CORRELATION AND COVARIANCE

# Calculate covariance between age and attitudes towards abortion:
cov(UK$age, UK$abortion_legal, use = "complete.obs") 
## [1] 0.945386
# Note that unlike correlation the covariance is sensitive to the scale.

# Calculate correlation between age and attitudes towards abortion:
cor(UK$age, UK$abortion_legal, use = "complete.obs")
## [1] 0.06217122

Insert comments - Covariance of 0.945386: Indicates a slightly positive relationship between age and attitudes towards abortion (i.e., as one increases, the other tends to increase, but the relationship is weak).

Correlation of 0.06217122: This is a very weak positive linear relationship, suggesting that age does not strongly influence attitudes towards abortion. The small positive value indicates that there is no meaningful pattern or strong correlation between age and people’s views on abortion.

Conclusion

Insert conclusion