############################################################
# Clean R Environment
############################################################
# Remove all objects from the workspace
rm(list = ls())
############################################################
# Data File Information
############################################################
# Data file used: gss2018.rda
############################################################
# Set Working Directory
############################################################
# Check current working directory
getwd()
## [1] "D:/D Drive/Ph.D. Course Work/PhD_2025/StudyMaterial"
# Set the directory where data file is stored
setwd("D:/D Drive/Ph.D. Course Work/PhD_2025/DataFile")
# Verify working directory
getwd()
## [1] "D:/D Drive/Ph.D. Course Work/PhD_2025/DataFile"
############################################################
# Load Data into R
############################################################
load("gss2018.rda")
############################################################
# Load Required Libraries
############################################################
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
############################################################
# Create Working Copy of the Data
############################################################
gssdf = GSS
# Summary of original data
summary(gssdf)
## YEAR BALLOT USETECH HAPPY
## Min. :2018 Min. :1.000 Min. : -1.00 Min. :1.000
## 1st Qu.:2018 1st Qu.:1.000 1st Qu.: -1.00 1st Qu.:1.000
## Median :2018 Median :2.000 Median : 10.00 Median :2.000
## Mean :2018 Mean :2.002 Mean : 48.09 Mean :1.855
## 3rd Qu.:2018 3rd Qu.:3.000 3rd Qu.: 80.00 3rd Qu.:2.000
## Max. :2018 Max. :3.000 Max. :999.00 Max. :8.000
## PARTYID RINCOME RACE SEX
## Min. :0.000 Min. : 0.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.: 0.000 1st Qu.:1.000 1st Qu.:1.000
## Median :3.000 Median : 9.000 Median :1.000 Median :2.000
## Mean :2.968 Mean : 7.509 Mean :1.394 Mean :1.552
## 3rd Qu.:5.000 3rd Qu.:12.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :9.000 Max. :98.000 Max. :3.000 Max. :2.000
## DEGREE EDUC AGE MARITAL
## Min. :0.000 Min. : 0.00 Min. :18.00 Min. :1.00
## 1st Qu.:1.000 1st Qu.:12.00 1st Qu.:34.00 1st Qu.:1.00
## Median :1.000 Median :14.00 Median :48.00 Median :2.00
## Mean :1.684 Mean :13.84 Mean :49.13 Mean :2.67
## 3rd Qu.:3.000 3rd Qu.:16.00 3rd Qu.:63.00 3rd Qu.:5.00
## Max. :4.000 Max. :99.00 Max. :99.00 Max. :9.00
## HRS2 HRS1 WRKSTAT ID_
## Min. :-1.00000 Min. :-1.00 Min. :1.000 Min. : 1
## 1st Qu.:-1.00000 1st Qu.:-1.00 1st Qu.:1.000 1st Qu.: 588
## Median :-1.00000 Median :30.00 Median :2.000 Median :1176
## Mean : 0.08017 Mean :24.47 Mean :2.963 Mean :1175
## 3rd Qu.:-1.00000 3rd Qu.:40.00 3rd Qu.:5.000 3rd Qu.:1762
## Max. :99.00000 Max. :99.00 Max. :9.000 Max. :2348
## UNHAPPY
## Min. :0.000
## 1st Qu.:0.000
## Median :0.000
## Mean :1.039
## 3rd Qu.:2.000
## Max. :9.000
############################################################
# Data Pre-processing and Cleaning
############################################################
# Recode missing values and convert variables to factors
gssdf = gssdf%>%mutate(
HAPPYC = as.factor(case_when(
HAPPY == 1 ~ "Very Happy",
HAPPY == 2 ~ "Pretty Happy",
HAPPY == 3 ~ "Not too happy",
TRUE ~ NA)),
GENDERC = as.factor(case_when(
SEX == 1 ~ "Male",
TRUE ~ "Female")),
DEGC = as.factor(case_when(
DEGREE == 0 ~ "<High Sc.",
DEGREE == 1 ~ "High Sc.",
DEGREE == 2 ~ "HS.",
DEGREE == 3 ~ "Bachelor",
DEGREE == 4 ~ "Graduate",
TRUE ~ NA)),
USGC = case_when(
USETECH == -1 ~NA,
USETECH == 999 ~NA,
USETECH == 998 ~NA,
TRUE ~ USETECH),
AGEC = case_when(
AGE == 98 ~ NA,
AGE == 99 ~ NA,
TRUE ~ AGE))
gssdf = gssdf%>%select(USGC,DEGC,GENDERC)%>%drop_na()
############################################################
# Exploratory Data Analysis (EDA)
############################################################
# Compare mean USETECH across education levels
usedeg <- gssdf %>%
group_by(DEGC) %>%
summarize(
aveT = mean(USGC),
sdT = sd(USGC)
)
usedeg
## # A tibble: 5 × 3
## DEGC aveT sdT
## <fct> <dbl> <dbl>
## 1 <High Sc. 24.8 36.2
## 2 Bachelor 67.9 32.1
## 3 Graduate 68.7 30.2
## 4 High Sc. 49.6 38.6
## 5 HS. 62.4 35.2
############################################################
# Boxplots for USETECH across DEGREE
############################################################
ggplot(gssdf, aes(x = DEGC, y = USGC, fill = DEGC)) +
geom_boxplot() +
geom_jitter(width = 0.2, alpha = 0.4) +
labs(title = "USETECH by Education Level",
x = "Education Level",
y = "Use of Technology")

############################################################
# Density Plots for USETECH by DEGREE
############################################################
ggplot(gssdf, aes(x = USGC, fill = DEGC)) +
geom_density(alpha = 0.4) +
facet_wrap(~ DEGC) +
labs(title = "Density of USETECH by Education Level")

############################################################
# One-Way ANOVA (Assuming Equal Variance)
############################################################
model = aov(USGC ~ DEGC, data = gssdf)
summary(model)
## Df Sum Sq Mean Sq F value Pr(>F)
## DEGC 4 221301 55325 43.3 <2e-16 ***
## Residuals 1404 1793757 1278
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
############################################################
# Post Hoc Tests for One-Way ANOVA
############################################################
# Pairwise t-tests with Bonferroni adjustment
pairwise.t.test(gssdf$USGC,
gssdf$DEGC,
p.adj = "bonf")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: gssdf$USGC and gssdf$DEGC
##
## <High Sc. Bachelor Graduate High Sc.
## Bachelor < 2e-16 - - -
## Graduate < 2e-16 1.0000 - -
## High Sc. 3.8e-11 8.0e-13 7.3e-09 -
## HS. 2.8e-15 1.0000 1.0000 0.0022
##
## P value adjustment method: bonferroni
# Tukey HSD test
TukeyHSD(model)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = USGC ~ DEGC, data = gssdf)
##
## $DEGC
## diff lwr upr p adj
## Bachelor-<High Sc. 43.0859568 32.653180 53.518734 0.0000000
## Graduate-<High Sc. 43.9107249 32.256416 55.565034 0.0000000
## High Sc.-<High Sc. 24.8247754 15.145211 34.504340 0.0000000
## HS.-<High Sc. 37.6070313 25.201887 50.012175 0.0000000
## Graduate-Bachelor 0.8247681 -8.438819 10.088355 0.9992282
## High Sc.-Bachelor -18.2611813 -24.870652 -11.651711 0.0000000
## HS.-Bachelor -5.4789255 -15.671017 4.713166 0.5833665
## High Sc.-Graduate -19.0859494 -27.492207 -10.679691 0.0000000
## HS.-Graduate -6.3036936 -17.743047 5.135659 0.5592907
## HS.-High Sc. 12.7822558 3.362603 22.201908 0.0020352
############################################################
# Checking Normality Assumption (Q-Q Plot)
############################################################
ggplot(gssdf, aes(sample = USGC, colour = DEGC)) +
stat_qq() +
stat_qq_line() +
facet_wrap(~ DEGC) +
labs(title = "Q-Q Plot for USETECH by Degree")

############################################################
# Brown-Forsythe Test (Robust to Non-Normality)
############################################################
gssdf <- gssdf %>%
group_by(DEGC) %>%
mutate(usetrn = abs(USGC - median(USGC, na.rm = TRUE)))
oneway.test(usetrn ~ DEGC,
data = gssdf,
var.equal = TRUE)
##
## One-way analysis of means
##
## data: usetrn and DEGC
## F = 18.44, num df = 4, denom df = 1404, p-value = 8.845e-15
############################################################
# Levene's Test for Homogeneity of Variance
############################################################
leveneTest(USGC ~ DEGC, data = gssdf)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 18.44 8.845e-15 ***
## 1404
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
############################################################
# Two-Way ANOVA: DEGREE and SEX
############################################################
ggplot(gssdf, aes(x = DEGC, y = USGC, fill = GENDERC)) +
geom_boxplot() +
labs(title = "USETECH by Degree and Sex")

############################################################
# Interaction Plot (Mean USETECH)
############################################################
ggplot(gssdf, aes(x = DEGC, y = USGC, colour = GENDERC)) +
stat_summary(fun = mean, geom = "point") +
stat_summary(fun = mean, geom = "line", aes(group = GENDERC)) +
labs(title = "Interaction Plot: DEGREE × SEX")

############################################################
# Two-Way ANOVA Model
############################################################
x <- aov(USGC ~ DEGC * GENDERC, data = gssdf)
# ANOVA table
summary(x)
## Df Sum Sq Mean Sq F value Pr(>F)
## DEGC 4 221301 55325 44.209 < 2e-16 ***
## GENDERC 1 16473 16473 13.163 0.000296 ***
## DEGC:GENDERC 4 26510 6627 5.296 0.000311 ***
## Residuals 1399 1750775 1251
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
############################################################
# Post Hoc Test for Two-Way ANOVA
############################################################
TukeyHSD(x)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = USGC ~ DEGC * GENDERC, data = gssdf)
##
## $DEGC
## diff lwr upr p adj
## Bachelor-<High Sc. 43.0859568 32.760484 53.411429 0.0000000
## Graduate-<High Sc. 43.9107249 32.376284 55.445165 0.0000000
## High Sc.-<High Sc. 24.8247754 15.244768 34.404783 0.0000000
## HS.-<High Sc. 37.6070313 25.329478 49.884584 0.0000000
## Graduate-Bachelor 0.8247681 -8.343540 9.993076 0.9991960
## High Sc.-Bachelor -18.2611813 -24.802671 -11.719691 0.0000000
## HS.-Bachelor -5.4789255 -15.566188 4.608337 0.5733923
## High Sc.-Graduate -19.0859494 -27.405746 -10.766152 0.0000000
## HS.-Graduate -6.3036936 -17.625389 5.018002 0.5490670
## HS.-High Sc. 12.7822558 3.459487 22.105024 0.0017563
##
## $GENDERC
## diff lwr upr p adj
## Male-Female -6.80899 -10.50928 -3.108699 0.0003174
##
## $`DEGC:GENDERC`
## diff lwr upr p adj
## Bachelor:Female-<High Sc.:Female 44.0775194 26.5912218 61.563817 0.0000000
## Graduate:Female-<High Sc.:Female 42.2192029 23.0019908 61.436415 0.0000000
## High Sc.:Female-<High Sc.:Female 32.1878788 15.7321731 48.643584 0.0000000
## HS.:Female-<High Sc.:Female 46.7797619 27.2270154 66.332508 0.0000000
## <High Sc.:Male-<High Sc.:Female 2.0378788 -18.5317533 22.607511 0.9999995
## Bachelor:Male-<High Sc.:Female 44.3530702 26.5940220 62.112118 0.0000000
## Graduate:Male-<High Sc.:Female 48.3917749 28.4942914 68.289258 0.0000000
## High Sc.:Male-<High Sc.:Female 19.8510848 3.4227876 36.279382 0.0052315
## HS.:Male-<High Sc.:Female 23.3560606 0.5896498 46.122471 0.0389231
## Graduate:Female-Bachelor:Female -1.8583165 -16.3376501 12.621017 0.9999951
## High Sc.:Female-Bachelor:Female -11.8896406 -22.4319416 -1.347340 0.0133486
## HS.:Female-Bachelor:Female 2.7022425 -12.2195454 17.624030 0.9999069
## <High Sc.:Male-Bachelor:Female -42.0396406 -58.2710800 -25.808201 0.0000000
## Bachelor:Male-Bachelor:Female 0.2755508 -12.2037783 12.754880 1.0000000
## Graduate:Male-Bachelor:Female 4.3142555 -11.0564866 19.684998 0.9967894
## High Sc.:Male-Bachelor:Female -24.2264346 -34.7259018 -13.726967 0.0000000
## HS.:Male-Bachelor:Female -20.7214588 -39.6597618 -1.783156 0.0192858
## High Sc.:Female-Graduate:Female -10.0313241 -23.2476303 3.184982 0.3233313
## HS.:Female-Graduate:Female 4.5605590 -12.3566037 21.477722 0.9976459
## <High Sc.:Male-Graduate:Female -40.1813241 -58.2641962 -22.098452 0.0000000
## Bachelor:Male-Graduate:Female 2.1338673 -12.6737082 16.941443 0.9999867
## Graduate:Male-Graduate:Female 6.1725720 -11.1418829 23.487027 0.9816675
## High Sc.:Male-Graduate:Female -22.3681181 -35.5502821 -9.185954 0.0000039
## HS.:Male-Graduate:Female -18.8631423 -39.4104039 1.684119 0.1039186
## HS.:Female-High Sc.:Female 14.5918831 0.8922699 28.291496 0.0261888
## <High Sc.:Male-High Sc.:Female -30.1500000 -45.2655308 -15.034469 0.0000000
## Bachelor:Male-High Sc.:Female 12.1651914 1.1764108 23.153972 0.0167764
## Graduate:Male-High Sc.:Female 16.2038961 2.0166004 30.391192 0.0113631
## High Sc.:Male-High Sc.:Female -12.3367940 -21.0119573 -3.661631 0.0003049
## HS.:Male-High Sc.:Female -8.8318182 -26.8228985 9.159262 0.8690307
## <High Sc.:Male-HS.:Female -44.7418831 -63.1809427 -26.302824 0.0000000
## Bachelor:Male-HS.:Female -2.4266917 -17.6671952 12.813812 0.9999688
## Graduate:Male-HS.:Female 1.6120130 -16.0741116 19.298138 0.9999998
## High Sc.:Male-HS.:Female -26.9286771 -40.5953557 -13.261999 0.0000000
## HS.:Male-HS.:Female -23.4237013 -44.2851158 -2.562287 0.0141081
## Bachelor:Male-<High Sc.:Male 42.3151914 25.7902764 58.840106 0.0000000
## Graduate:Male-<High Sc.:Male 46.3538961 27.5496712 65.158121 0.0000000
## High Sc.:Male-<High Sc.:Male 17.8132060 2.7275183 32.898894 0.0072699
## HS.:Male-<High Sc.:Male 21.3181818 -0.4992077 43.135571 0.0619111
## Graduate:Male-Bachelor:Male 4.0387047 -11.6416301 19.719040 0.9983501
## High Sc.:Male-Bachelor:Male -24.5019854 -35.4496792 -13.554292 0.0000000
## HS.:Male-Bachelor:Male -20.9970096 -40.1874372 -1.806582 0.0192892
## High Sc.:Male-Graduate:Male -28.5406901 -42.6961858 -14.385194 0.0000000
## HS.:Male-Graduate:Male -25.0357143 -46.2205808 -3.850848 0.0071871
## HS.:Male-High Sc.:Male 3.5049758 -14.4610385 21.470990 0.9998264