# Creating a subset of ESS data for German respondents born between 1990 and 2005.
germany_data_subset <- ess %>%
filter(cntry == "DE", yrbrn > 1989 & yrbrn < 2006) %>%
mutate(
stfgov = ifelse(stfgov %in% c(77, 88, 99), NA, stfgov), # Handling special codes as NA for stfgov
eisced = ifelse(eisced %in% c(77, 88, 99), NA, eisced), # Handling special codes as NA for eisced
gndr = ifelse(gndr %in% c(9), NA, gndr) # Handling special codes as NA for gndr
) %>%
select(stfgov, eisced, gndr) %>%
na.omit() # Removing rows with any NA values
# Generate a summary skim of the subset data
datasummary_skim(germany_data_subset)
# Calculate summary statistics for government satisfaction by education level
summary_statistics <- germany_data %>%
group_by(educ_level) %>% # Group data by education level
summarise(
# Calculate the mean of 'stfgov', remove missing values with na.rm = TRUE
Satisfied_Mean = mean(stfgov, na.rm = TRUE),
# Calculate the standard deviation of 'stfgov', remove missing values with na.rm = TRUE
Satisfied_SD = sd(stfgov, na.rm = TRUE),
# Count the number of observations in each education level group
N = n()
)
# Build a linear regression model to predict government satisfaction using education level as a predictor
model <- lm(stfgov ~ educ_level, data = germany_data)
# Build a linear regression model to predict government satisfaction using education level and gender category as predictors
model_2 <- lm(stfgov ~ educ_level + gender_cat, data = germany_data)
summary_2 <- summary(model_2)
print(summary_2)
# Create a plot of model coefficients using ggplot2
coefficients_plot <- ggplot(tidy(model), aes(x = term, y = estimate)) +
geom_point() +
geom_errorbar(aes(ymin = estimate - std.error, ymax = estimate + std.error), width = 0.2) +
theme_minimal() +
labs(title = "Effect of Educational Level on How satisfied with the national government",
x = "Educational Level", # Label for x-axis
y = "Estimate") # Label for y-axis
# Perform a hypothesis test to assess between 'stfgov' and 'educ_level'
hypothesis_test <- germany_data %>%
specify(stfgov ~ educ_level) %>%
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>%
calculate(stat = "F")
# Display the results of the hypothesis test
hypothesis_test
# Calculating the test statistic on observed data
test_stat <- germany_data %>%
specify(explanatory = educ_level,
response = stfgov) %>%
hypothesize(null = "independence") %>%
calculate(stat = "F")
print(test_stat$stat)
## [1] 17.80072
# Simulate the null distribution
null_distribution <- germany_data %>%
specify(explanatory = educ_level,
response = stfgov) %>%
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>%
calculate(stat = "F")
# Calculate the p-value
p_val <- null_distribution %>%
get_pvalue(obs_stat = test_stat, direction = "two-sided")
## Warning: Please be cautious in reporting a p-value of 0. This result is an
## approximation based on the number of `reps` chosen in the `generate()` step.
## See `?get_p_value()` for more information.
library(effects)
# Fitting a linear model with interaction
model_with_interaction <- lm(stfgov ~ educ_level * gndr, data = germany_data)
# Creating an interaction plot
interaction_plot <- interaction.plot(x.factor = germany_data$educ_level,
trace.factor = germany_data$gndr,
response = model_with_interaction$fitted.values,
xlab = "Educational Level",
ylab = " How satisfied with the national government",
main = "Interaction Effect of Educational Level and Gender on How satisfied with the national government",
legend = TRUE)
# Printing the interaction plot
print(interaction_plot)
## NULL
print(summary_statistics)
## # A tibble: 3 × 4
## educ_level Satisfied_Mean Satisfied_SD N
## <chr> <dbl> <dbl> <int>
## 1 High 5.91 10.9 7881
## 2 Low 7.00 16.1 18500
## 3 Medium 6.31 13.2 7664
print(summary(model))
##
## Call:
## lm(formula = stfgov ~ educ_level, data = germany_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -7.002 -4.002 -2.002 -0.312 93.090
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.9099 0.1621 36.45 < 2e-16 ***
## educ_levelLow 1.0919 0.1936 5.64 1.72e-08 ***
## educ_levelMedium 0.4018 0.2309 1.74 0.0819 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 14.39 on 34042 degrees of freedom
## Multiple R-squared: 0.001045, Adjusted R-squared: 0.000986
## F-statistic: 17.8 on 2 and 34042 DF, p-value: 1.876e-08
anova_test <- aov(stfgov ~ educ_level, data = germany_data)
anova_summary <- summary(anova_test)
# Get the mean square (usually used as an estimate of variance) and F-value from the ANOVA table
anova_stats <- anova_summary[[1]][, c("Mean Sq", "F value")]
# Transform into a dataframe for ggplot
anova_stats_df <- as.data.frame(anova_stats)
# Add the term names to the dataframe
anova_stats_df$Term <- rownames(anova_stats_df)
# Plotting
ggplot(anova_stats_df, aes(x = Term, y = `Mean Sq`)) +
geom_bar(stat = "identity", fill = 'skyblue') +
geom_errorbar(aes(ymin = `Mean Sq` - sqrt(`Mean Sq`), ymax = `Mean Sq` + sqrt(`Mean Sq`)), width = 0.4) +
theme_minimal() +
labs(title = "ANOVA Test for Educational Level on How satisfied with the national government",
x = "Educational Level",
y = "Mean Square Error")