This is an R Markdown document. Markdown is a simple formatting
syntax for authoring HTML, PDF, and MS Word documents. For more details
on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be
generated that includes both content as well as the output of any
embedded R code chunks within the document. You can embed an R code
chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
Description of Data
Results
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ purrr::compose() masks flextable::compose()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Loading required package: carData
##
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
##
## Loading required package: viridisLite
##
##
## Attaching package: 'kableExtra'
##
##
## The following object is masked from 'package:dplyr':
##
## group_rows
##
##
## The following objects are masked from 'package:flextable':
##
## as_image, footnote
france_data <- france_data %>% filter(cntry == "FR") %>%
mutate(
# Recoding age variable, setting 999 to NA
age = ifelse(agea == 999, NA, agea),
# Recoding cohort variable, setting years before 1900 and after 2000 to NA
cohort = ifelse(yrbrn < 1930 | yrbrn > 2000, NA, yrbrn),
# Recoding generational cohorts based on year of birth
gen = case_when(
yrbrn %in% 1900:1964 ~ "1",
yrbrn %in% 1965:1996 ~ "2",
TRUE ~ as.character(cohort) # Keeping other values as character if they do not fit the ranges
),
# Converting cohort to a factor with labels
gen = factor(gen,
levels = c("1", "2"),
labels = c("youth", "adult"))
)
france_data <- france_data %>% filter(cntry == "FR") %>%
# Recoding question of belonging to ethnic minority
mutate(minority = case_when(
blgetmg %in% c(7, 8, 9) ~ NA_character_,
blgetmg == 1 ~ "Ethnic Minority",
blgetmg == 2 ~ "Not Ethnic Minority",
TRUE ~ as.character(blgetmg) # Convert other values to character if necessary
))
# here's how to make sure it's really cleaned
france_data <- france_data %>% filter(!is.na(trstplc))
france_data <- france_data %>% filter(!is.na(minority))
france_data <- france_data %>% filter(!is.na(gen))
# double-checking + quick distribution
#unique(france_data$trstplc)
#table(france_data$trstplc)
#table(france_data$gen)
#Plotting outcome and explanatory.
ggplot(france_data, mapping = aes(x = gen, fill = trstplc)) + geom_bar()
## Warning: The following aesthetics were dropped during statistical transformation: fill
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
## Warning: The following aesthetics were dropped during statistical transformation: fill
## ℹ This can happen when ggplot fails to infer the correct grouping structure in
## the data.
## ℹ Did you forget to specify a `group` aesthetic or to convert a numerical
## variable into a factor?
# data summary skim
datasummary_skim(france_data %>% select(trstplc, yrbrn, blgetmg))
Unique (#)
Missing (%)
Mean
SD
Min
Median
Max
trstplc
11
0
6.0
2.2
0.0
6.0
10.0
yrbrn
89
0
1960.1
18.3
1905.0
1960.0
1996.0
blgetmg
2
0
2.0
0.2
1.0
2.0
2.0
#Cleaning average trust in police by summarizing averages, grouping by al countries, filtering answers with refusal, don't know, and No answer.
country_averages <- ess %>%
filter(!(trstplc %in% c(77, 88, 99))) %>%
group_by(cntry) %>%
summarize(avg_trstplc = mean(trstplc, na.rm = TRUE))
# Reorder the factor levels of 'cntry' based on 'avg_trstplc'
country_averages$cntry <- reorder(country_averages$cntry, country_averages$avg_trstplc)
# Plotting the factor levels of 'cntry' based on 'avg_trstplc.'
Main_Trend_of_Country_Feeling_Close_to_Trust_in_Police <- ggplot(country_averages, aes(x = cntry, y = avg_trstplc, label = cntry)) +
geom_point(aes(color = avg_trstplc), size = 3) +
geom_text(nudge_y = 0.2, size = 3) +
labs(
x = "Country",
y = "Average Trust in Police",
title = "Main Trend of Average Trust in Police by Country"
) +
scale_color_gradient(low = "green", high = "darkgreen") +
geom_line(aes(group = 1), color = "darkgreen", linetype = "dashed")
theme_minimal() +
theme(
axis.text.x = element_blank(),
axis.title.x = element_text(face="bold", size=14))
## `summarise()` has grouped output by 'gen'. You can override using the `.groups`
## argument.
## `summarise()` has grouped output by 'wrkorg_recode'. You can override using the
#Check#unique(filtered_data_cleaned$gen)
## `.groups` argument.
Percentage_of_Attitudes_Towards_Police_by_Generations <- ggplot(percentages, aes(x = as.factor(gen), y = percentage, fill = factor(trstplc))) +
geom_bar(stat = "identity", position = position_dodge(width = 0.9), width = 0.7) +
geom_text(aes(label = paste0(round(percentage, 1), "%"), group = trstplc),
position = position_dodge(width = 0.9), vjust = -0.5, size = 3) + # Add rounded percentage labels
labs(title = "Percentage of Attitudes Towards Police by Generations",
x = "Attitude towards Police",
y = "Percentage (%)") +
scale_x_discrete(breaks = c("youth", "adult"), labels = c("youth", "adult")) +
scale_fill_manual(values = c("10" = "lightgreen", "9" = "lightgreen", "8" = "lightgreen", "7" = "lightgreen", "6" = "lightgreen", "5" = "lightgreen", "4" = "lightgreen", "3" = "lightgreen", "2" = "lightgreen", "1" = "lightgreen", "0" = "lightgreen"),
name = "Trust in Police Levels") +
theme_minimal()
print(Percentage_of_Attitudes_Towards_Police_by_Generations)
# Step 1: Calculate the test statistic of your sample
test_stat <- france_data %>%
specify(explanatory = gen, # change variable name for explanatory variable
response = trstplc) %>% # change variable name for outcome of interest
hypothesize(null = "independence") %>%
calculate(stat = "t") # replace in between quotation marks appropriate test statistic
## Warning: The statistic is based on a difference or ratio; by default, for
## difference-based statistics, the explanatory variable is subtracted in the
## order "youth" - "adult", or divided in the order "youth" / "adult" for
## ratio-based statistics. To specify this order yourself, supply `order =
## c("youth", "adult")` to the calculate() function.
print(test_stat$stat)
## t
## 5.407069
# Step 2: Simulate the null distribution
null_distribution_gen<- france_data %>%
specify(explanatory = gen,
response = trstplc) %>%
hypothesize(null = "independence") %>%
generate(reps = 1000, type = "permute") %>% # only line we are adding, use reps = 1000 as standard
calculate( stat = "t", order = c("youth", "adult"))
null_distribution_gen
# Step 3: Calculate the p-value of your sample
p_val_gen <- null_distribution_gen %>% # replace name here if you assigned something other than null_distribution above
get_pvalue(obs_stat = test_stat, direction = "two-sided") # would only replace test_stat if assigned another name in Step 1
## Warning: Please be cautious in reporting a p-value of 0. This result is an
## approximation based on the number of `reps` chosen in the `generate()` step.
## See `?get_p_value()` for more information.
## Three models
# List of packages
packages <- c("tidyverse", "infer", "fst", "modelsummary", "effects", "survey", "MASS") # add any you need here
# Install packages if they aren't installed already
new_packages <- packages[!(packages %in% installed.packages()[,"Package"])]
if(length(new_packages)) install.packages(new_packages)
# Load the packages
lapply(packages, library, character.only = TRUE)
## Loading required package: grid
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
## Loading required package: survival
##
## Attaching package: 'survey'
## The following object is masked from 'package:graphics':
##
## dotchart
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
### Seperate here
# We use the "effect" function to call the interaction term from model 1. We assign the results to "effect_plot"
interaction_plot_1 <- effect("gen", model1, na.rm=TRUE)
#We plot here the interaction term
plot(interaction_plot_1,
main="Interaction effect the youth and adult x minority status",
xlab="the youth and adult",
ylab="Trust in Police")
# We use the "effect" function to call the interaction term from model 2. We assign the results to "effect_plot"
interaction_plot_2 <- effect("gen*minority", model2, na.rm=TRUE)
## NOTE: gen:minority does not appear in the model
#We plot here the interaction term
plot(interaction_plot_2,
main="Interaction effect the youth and adult x minority status",
xlab="the youth and adult",
ylab="Trust in Police")
# We use the "effect" function to call the interaction term from model 3. We assign the results to "effect_plot"
interaction_plot_3 <- effect("gen*minority", model3, na.rm=TRUE)
#We plot here the interaction term
plot(interaction_plot_3,
main="Interaction effect the youth and adult x minority status",
xlab="the youth and adult",
ylab="Trust in Police")
# Now, let's extract just the coefficients
coefficients1 <- coef(model1)
print(coefficients1)
#For example, for my own interpretation
#(intercept of education level)
#if a person has a BA= 8.257593
#If a person has no BA= 8.257593
#if a person is a male= 8.1828554
#If a person is a female= 8.1828554 -0.1502479
#If a person has paid work= 8.0712298
#If a person has no paid work= 8.0712298 + 0.0721098
# A first crack at a table output
tidy(model1)
##
## Call:
## lm(formula = trstplc ~ gen, data = france_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.055 -1.055 0.137 1.945 4.137
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.05462 0.02303 262.950 < 2e-16 ***
## genadult -0.19158 0.03525 -5.435 5.55e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.24 on 16509 degrees of freedom
## Multiple R-squared: 0.001786, Adjusted R-squared: 0.001726
## F-statistic: 29.54 on 1 and 16509 DF, p-value: 5.55e-08
summary(model2)
##
## Call:
## lm(formula = trstplc ~ gen + minority, data = france_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.0659 -1.0659 0.1126 1.9341 4.5261
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.65242 0.08900 63.507 < 2e-16 ***
## genadult -0.17850 0.03534 -5.051 4.43e-07 ***
## minorityNot Ethnic Minority 0.41348 0.08839 4.678 2.92e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.239 on 16508 degrees of freedom
## Multiple R-squared: 0.003108, Adjusted R-squared: 0.002987
## F-statistic: 25.73 on 2 and 16508 DF, p-value: 6.956e-12
summary(model3)
##
## Call:
## lm(formula = trstplc ~ gen + minority + gen * minority, data = france_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6.0682 -1.0682 0.1158 1.9318 4.4747
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.56977 0.13938 39.961 < 2e-16 ***
## genadult -0.04447 0.17749 -0.251 0.802184
## minorityNot Ethnic Minority 0.49844 0.14132 3.527 0.000421 ***
## genadult:minorityNot Ethnic Minority -0.13956 0.18112 -0.771 0.440978
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 2.239 on 16507 degrees of freedom
## Multiple R-squared: 0.003144, Adjusted R-squared: 0.002962
## F-statistic: 17.35 on 3 and 16507 DF, p-value: 3.032e-11
# Using modelsummary for prettier table, focusing here on just R squared and adj.
modelsummary::modelsummary(
list(model1),
gof_map = c("nobs", "r.squared", "adj.r.squared"))