The main objective of this project is to analyze the European Social Survey (ESS) dataset and gain insights into the relationships between various socio-demographic factors and individuals’ perceptions of trust, happiness, and social activities.
# Load necessary libraries
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.2 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.3 ✔ tibble 3.2.1
## ✔ lubridate 1.9.2 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)
library(lubridate)
library(ggplot2)
# Import the data
ESS8e02_2 <- read_csv("~/Desktop/ESS8e02_2/ESS8e02_2.csv")
## Rows: 44387 Columns: 535
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): name, proddate, cntry, ctzshipc, cntbrthc, lnghom1, lnghom2, fbrn...
## dbl (525): essround, edition, idno, dweight, pspwght, pweight, anweight, nws...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Select relevant columns
relevant_columns <- c(
"idno", "cntry", "ppltrst", "pplfair", "pplhlp", "happy",
"sclmeet", "sclact", "gndr", "agea", "edulvlb", "marsts", "chldhm",
"domicil", "hinctnta", "wkhtot", "isco08", "emplrel", "eiscedp", "pdwrkp",
"isco08p", "edulvlpb", "pdwrkp", "edctnp", "uemplap", "uemplip", "dsbldp",
"rtrdp", "cmsrvp", "hswrkp", "mnactp", "icppdwk", "crpdwkp", "isco08p",
"wkhtotp", "eiscedf", "edulvlfb", "emprm14", "occm14b", "eiscedm", "edulvlmb",
"emprm14", "occm14b", "ipcrtiv", "imprich", "ipeqopt", "ipshabt", "impsafe",
"impdiff", "ipfrule", "ipudrst", "ipmodst", "ipgdtim", "impfree", "iphlppl",
"ipsuces", "ipstrgv", "ipadvnt", "ipbhprp", "iprspot", "iplylfr", "impenv",
"imptrad", "impfun"
)
# Subset the loaded data
relevant_data <- ESS8e02_2[, relevant_columns]
# Check for missing values
missing_values <- colSums(is.na(relevant_data))
print(missing_values)
## idno cntry ppltrst pplfair pplhlp happy sclmeet sclact
## 0 0 0 0 0 0 0 0
## gndr agea edulvlb marsts chldhm domicil hinctnta wkhtot
## 0 0 0 0 0 0 0 0
## isco08 emplrel eiscedp pdwrkp isco08p edulvlpb pdwrkp edctnp
## 0 0 0 0 0 0 0 0
## uemplap uemplip dsbldp rtrdp cmsrvp hswrkp mnactp icppdwk
## 0 0 0 0 0 0 0 0
## crpdwkp isco08p wkhtotp eiscedf edulvlfb emprm14 occm14b eiscedm
## 0 0 0 0 0 0 0 0
## edulvlmb emprm14 occm14b ipcrtiv imprich ipeqopt ipshabt impsafe
## 0 0 0 0 0 0 0 0
## impdiff ipfrule ipudrst ipmodst ipgdtim impfree iphlppl ipsuces
## 0 0 0 0 0 0 0 0
## ipstrgv ipadvnt ipbhprp iprspot iplylfr impenv imptrad impfun
## 0 0 0 0 0 0 0 0
# Calculate summary statistics
summary(relevant_data)
## idno cntry ppltrst pplfair
## Min. : 1 Length:44387 Min. : 0.000 Min. : 0.000
## 1st Qu.: 1208 Class :character 1st Qu.: 4.000 1st Qu.: 5.000
## Median : 2589 Mode :character Median : 5.000 Median : 6.000
## Mean : 31545782 Mean : 5.483 Mean : 6.388
## 3rd Qu.: 11058 3rd Qu.: 7.000 3rd Qu.: 7.000
## Max. :551603139 Max. :99.000 Max. :99.000
## pplhlp happy sclmeet sclact
## Min. : 0.000 Min. : 0.00 Min. : 1.000 Min. :1.000
## 1st Qu.: 4.000 1st Qu.: 7.00 1st Qu.: 4.000 1st Qu.:2.000
## Median : 5.000 Median : 8.00 Median : 5.000 Median :3.000
## Mean : 5.542 Mean : 7.83 Mean : 5.199 Mean :2.809
## 3rd Qu.: 7.000 3rd Qu.: 9.00 3rd Qu.: 6.000 3rd Qu.:3.000
## Max. :99.000 Max. :99.00 Max. :99.000 Max. :9.000
## gndr agea edulvlb marsts
## Min. :1.000 Min. : 15.00 Min. : 0.0 Min. : 1.00
## 1st Qu.:1.000 1st Qu.: 34.00 1st Qu.: 229.0 1st Qu.: 6.00
## Median :2.000 Median : 49.00 Median : 322.0 Median : 6.00
## Mean :1.528 Mean : 52.46 Mean : 425.1 Mean :35.96
## 3rd Qu.:2.000 3rd Qu.: 64.00 3rd Qu.: 520.0 3rd Qu.:66.00
## Max. :9.000 Max. :999.00 Max. :9999.0 Max. :99.00
## chldhm domicil hinctnta wkhtot isco08
## Min. :1.000 Min. :1.00 Min. : 1.00 Min. : 0.0 Min. : 0
## 1st Qu.:1.000 1st Qu.:2.00 1st Qu.: 3.00 1st Qu.: 38.0 1st Qu.: 3115
## Median :2.000 Median :3.00 Median : 6.00 Median : 40.0 Median : 5164
## Mean :1.651 Mean :2.88 Mean :18.78 Mean :143.5 Mean :11203
## 3rd Qu.:2.000 3rd Qu.:4.00 3rd Qu.: 9.00 3rd Qu.: 50.0 3rd Qu.: 8111
## Max. :9.000 Max. :9.00 Max. :99.00 Max. :999.0 Max. :99999
## emplrel eiscedp pdwrkp isco08p edulvlpb
## Min. :1.000 Min. : 1.00 Min. :0.000 Min. : 0 Min. : 0
## 1st Qu.:1.000 1st Qu.: 4.00 1st Qu.:0.000 1st Qu.: 6111 1st Qu.: 322
## Median :1.000 Median : 7.00 Median :0.000 Median :66666 Median : 710
## Mean :1.569 Mean :30.78 Mean :0.357 Mean :45020 Mean :3102
## 3rd Qu.:1.000 3rd Qu.:66.00 3rd Qu.:1.000 3rd Qu.:66666 3rd Qu.:6666
## Max. :9.000 Max. :99.00 Max. :1.000 Max. :99999 Max. :9999
## pdwrkp edctnp uemplap uemplip
## Min. :0.000 Min. :0.00000 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000
## Median :0.000 Median :0.00000 Median :0.00000 Median :0.000000
## Mean :0.357 Mean :0.01169 Mean :0.01579 Mean :0.006939
## 3rd Qu.:1.000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :1.000 Max. :1.00000 Max. :1.00000 Max. :1.000000
## dsbldp rtrdp cmsrvp hswrkp
## Min. :0.00000 Min. :0.0000 Min. :0.0000000 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.0000000 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.0000000 Median :0.00000
## Mean :0.01518 Mean :0.1421 Mean :0.0002703 Mean :0.09221
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.0000000 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.0000000 Max. :1.00000
## mnactp icppdwk crpdwkp isco08p
## Min. : 1.00 Min. :1.000 Min. :1.000 Min. : 0
## 1st Qu.:66.00 1st Qu.:1.000 1st Qu.:6.000 1st Qu.: 6111
## Median :66.00 Median :2.000 Median :6.000 Median :66666
## Mean :62.29 Mean :3.322 Mean :5.109 Mean :45020
## 3rd Qu.:66.00 3rd Qu.:6.000 3rd Qu.:6.000 3rd Qu.:66666
## Max. :99.00 Max. :9.000 Max. :9.000 Max. :99999
## wkhtotp eiscedf edulvlfb emprm14 occm14b
## Min. : 0.0 Min. : 1.0 Min. : 0 Min. :1.000 Min. : 1.00
## 1st Qu.: 45.0 1st Qu.: 1.0 1st Qu.: 113 1st Qu.:1.000 1st Qu.: 5.00
## Median :666.0 Median : 3.0 Median : 321 Median :2.000 Median : 9.00
## Mean :471.9 Mean :11.6 Mean :1163 Mean :2.043 Mean :31.68
## 3rd Qu.:666.0 3rd Qu.: 5.0 3rd Qu.: 520 3rd Qu.:3.000 3rd Qu.:66.00
## Max. :999.0 Max. :99.0 Max. :9999 Max. :9.000 Max. :99.00
## eiscedm edulvlmb emprm14 occm14b
## Min. : 1.000 Min. : 0 Min. :1.000 Min. : 1.00
## 1st Qu.: 1.000 1st Qu.: 113 1st Qu.:1.000 1st Qu.: 5.00
## Median : 2.000 Median : 229 Median :2.000 Median : 9.00
## Mean : 8.483 Mean : 845 Mean :2.043 Mean :31.68
## 3rd Qu.: 4.000 3rd Qu.: 323 3rd Qu.:3.000 3rd Qu.:66.00
## Max. :99.000 Max. :9999 Max. :9.000 Max. :99.00
## ipcrtiv imprich ipeqopt ipshabt
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:3.000 1st Qu.:1.000 1st Qu.:2.000
## Median :2.000 Median :4.000 Median :2.000 Median :3.000
## Mean :2.694 Mean :4.174 Mean :2.289 Mean :3.323
## 3rd Qu.:3.000 3rd Qu.:5.000 3rd Qu.:3.000 3rd Qu.:4.000
## Max. :9.000 Max. :9.000 Max. :9.000 Max. :9.000
## impsafe impdiff ipfrule ipudrst
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:2.000
## Median :2.000 Median :3.000 Median :3.000 Median :2.000
## Mean :2.462 Mean :3.102 Mean :3.341 Mean :2.495
## 3rd Qu.:3.000 3rd Qu.:4.000 3rd Qu.:4.000 3rd Qu.:3.000
## Max. :9.000 Max. :9.000 Max. :9.000 Max. :9.000
## ipmodst ipgdtim impfree iphlppl
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:2.000 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1.000
## Median :2.000 Median :3.000 Median :2.000 Median :2.000
## Mean :2.749 Mean :3.008 Mean :2.286 Mean :2.306
## 3rd Qu.:3.000 3rd Qu.:4.000 3rd Qu.:3.000 3rd Qu.:3.000
## Max. :9.000 Max. :9.000 Max. :9.000 Max. :9.000
## ipsuces ipstrgv ipadvnt ipbhprp iprspot
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.00
## 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:3.000 1st Qu.:2.000 1st Qu.:2.00
## Median :3.000 Median :2.000 Median :4.000 Median :3.000 Median :3.00
## Mean :3.285 Mean :2.468 Mean :3.928 Mean :2.811 Mean :3.28
## 3rd Qu.:4.000 3rd Qu.:3.000 3rd Qu.:5.000 3rd Qu.:4.000 3rd Qu.:4.00
## Max. :9.000 Max. :9.000 Max. :9.000 Max. :9.000 Max. :9.00
## iplylfr impenv imptrad impfun
## Min. :1.000 Min. :1.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.:1.000 1st Qu.:2.000 1st Qu.:2.000
## Median :2.000 Median :2.000 Median :3.000 Median :3.000
## Mean :2.068 Mean :2.274 Mean :2.849 Mean :3.092
## 3rd Qu.:2.000 3rd Qu.:3.000 3rd Qu.:4.000 3rd Qu.:4.000
## Max. :9.000 Max. :9.000 Max. :9.000 Max. :9.000
## Data Visualization
# Age Distribution
# Create a histogram of ages
ggplot(ESS8e02_2, aes(x = agea)) +
geom_histogram(binwidth = 5, fill = "blue", color = "black") +
labs(title = "Distribution of Ages", x = "Age", y = "Frequency")
#Education Level Distribution
# Create a bar plot for education levels
education_plot <- ggplot(data = ESS8e02_2, aes(x = factor(edulvlb))) +
geom_bar() +
labs(title = "Distribution of Education Levels",
x = "Education Level",
y = "Frequency")
# Display the plot
print(education_plot)
# Education Level vs Trust in People
# Scatter plot: Education Level vs Trust in People
ggplot(ESS8e02_2, aes(x = edulvlb, y = ppltrst)) +
geom_point() +
labs(x = "Education Level", y = "Trust in People") +
ggtitle("Education Level vs Trust in People")
# Education Level vs Happiness
# Scatter plot: Education Level vs Happiness
ggplot(ESS8e02_2, aes(x = edulvlb, y = happy)) +
geom_point() +
labs(x = "Education Level", y = "Happiness") +
ggtitle("Education Level vs Happiness")
# Education Level vs Social Activities
# Scatter plot: Education Level vs Social Activities
ggplot(ESS8e02_2, aes(x = edulvlb, y = sclact)) +
geom_point() +
labs(x = "Education Level", y = "Social Activities") +
ggtitle("Education Level vs Social Activities")
# Linear Regression
# Perform linear regression
reg_model <- lm(happy ~ edulvlb, data = relevant_data)
# Print the summary of the regression
summary(reg_model)
##
## Call:
## lm(formula = happy ~ edulvlb, data = relevant_data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -10.140 -0.923 0.112 1.109 91.237
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.696e+00 3.600e-02 213.783 < 2e-16 ***
## edulvlb 3.143e-04 5.368e-05 5.855 4.81e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.866 on 44385 degrees of freedom
## Multiple R-squared: 0.0007717, Adjusted R-squared: 0.0007492
## F-statistic: 34.28 on 1 and 44385 DF, p-value: 4.812e-09
## Project Conclusion
# Summary of Insights and Findings:
# Throughout the project, we analyzed the European Social Survey (ESS) dataset to understand the relationships between socio-demographic factors and perceptions of trust, happiness, and social activities. Here are the key insights:
# Age Distribution:
# We observed diverse age distribution with the median age around 49.
# Education Level and Trust:
# Education level showed a slight positive correlation with trust in people.
# Education Level and Happiness:
# No significant correlation was found between education level and happiness.
# Education Level and Social Activities:
# Weak positive correlation between education level and engagement in social activities.
# Linear Regression:
# Education level had a statistically significant but small positive effect on happiness.
# Limitations and Next Steps:
# It's important to acknowledge the limitations and consider further exploration:
# Causality vs. Correlation.
# Other factors that were not explored.
# Data quality and potential errors.
# Next steps include including more factors, conducting qualitative analysis, and performing longitudinal analysis.
##Final Thoughts:
# In conclusion, this project offered insights into the complex relationships between socio-demographic factors and perceptions of trust, happiness, and social activities. As we move forward, a holistic approach is crucial to understanding the intricate dynamics of human behaviors and emotions.