rm(list=ls())
rm(list=ls())
# Data file used gss2018.rda
#Set the directory
getwd()
## [1] "D:/D Drive/Ph.D. Course Work/Ph.D. 2024"
setwd("D:\\D Drive\\Ph.D. Course Work\\Ph.D. 2024\\Data")
getwd()
## [1] "D:/D Drive/Ph.D. Course Work/Ph.D. 2024/Data"
#Load the data into R
load("gss2018.rda")
#Necessary Library
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(tidyr)
gss2018=GSS
summary(gss2018)
## YEAR BALLOT USETECH HAPPY
## Min. :2018 Min. :1.000 Min. : -1.00 Min. :1.000
## 1st Qu.:2018 1st Qu.:1.000 1st Qu.: -1.00 1st Qu.:1.000
## Median :2018 Median :2.000 Median : 10.00 Median :2.000
## Mean :2018 Mean :2.002 Mean : 48.09 Mean :1.855
## 3rd Qu.:2018 3rd Qu.:3.000 3rd Qu.: 80.00 3rd Qu.:2.000
## Max. :2018 Max. :3.000 Max. :999.00 Max. :8.000
## PARTYID RINCOME RACE SEX
## Min. :0.000 Min. : 0.000 Min. :1.000 Min. :1.000
## 1st Qu.:1.000 1st Qu.: 0.000 1st Qu.:1.000 1st Qu.:1.000
## Median :3.000 Median : 9.000 Median :1.000 Median :2.000
## Mean :2.968 Mean : 7.509 Mean :1.394 Mean :1.552
## 3rd Qu.:5.000 3rd Qu.:12.000 3rd Qu.:2.000 3rd Qu.:2.000
## Max. :9.000 Max. :98.000 Max. :3.000 Max. :2.000
## DEGREE EDUC AGE MARITAL
## Min. :0.000 Min. : 0.00 Min. :18.00 Min. :1.00
## 1st Qu.:1.000 1st Qu.:12.00 1st Qu.:34.00 1st Qu.:1.00
## Median :1.000 Median :14.00 Median :48.00 Median :2.00
## Mean :1.684 Mean :13.84 Mean :49.13 Mean :2.67
## 3rd Qu.:3.000 3rd Qu.:16.00 3rd Qu.:63.00 3rd Qu.:5.00
## Max. :4.000 Max. :99.00 Max. :99.00 Max. :9.00
## HRS2 HRS1 WRKSTAT ID_
## Min. :-1.00000 Min. :-1.00 Min. :1.000 Min. : 1
## 1st Qu.:-1.00000 1st Qu.:-1.00 1st Qu.:1.000 1st Qu.: 588
## Median :-1.00000 Median :30.00 Median :2.000 Median :1176
## Mean : 0.08017 Mean :24.47 Mean :2.963 Mean :1175
## 3rd Qu.:-1.00000 3rd Qu.:40.00 3rd Qu.:5.000 3rd Qu.:1762
## Max. :99.00000 Max. :99.00 Max. :9.000 Max. :2348
## UNHAPPY
## Min. :0.000
## 1st Qu.:0.000
## Median :0.000
## Mean :1.039
## 3rd Qu.:2.000
## Max. :9.000
#Data pre-processing as per the rule given.
gsscln=gss2018%>%mutate(USETECH=na_if(USETECH,-1))%>%
mutate(USETECH=na_if(USETECH,999))%>%
mutate(USETECH=na_if(USETECH,998))%>%
mutate(AGE=na_if(AGE,98))%>%
mutate(AGE=na_if(AGE,99))%>%
mutate(HAPPY=na_if(HAPPY,8))%>%
mutate(HAPPY=na_if(HAPPY,9))%>%
mutate(HAPPY=na_if(DEGREE,8))%>%
mutate(HAPPY=na_if(DEGREE,9))%>%
mutate(HAPPY=na_if(HAPPY,0))%>%
mutate(HAPPY=factor(x=HAPPY,levels=c(1,2,3),labels=c("Very Happy","Pretty Happy","Not to Happy")))%>%
mutate(SEX=factor(x=SEX,levels=c(1,2),labels=c("Male","Female")))%>%
mutate(DEGREE=factor(x=DEGREE,levels=c(0,1,2,3,4),labels=c("<High","High","Junior College","Bachelor","Graduate")))
#Exploratory Data Analysis for comaring the means of multiple groups
usedeg=gsscln%>%drop_na(USETECH)%>%
group_by(DEGREE)%>%
summarize(aveT=mean(USETECH),
sdT=sd(USETECH))
usedeg
## # A tibble: 5 × 3
## DEGREE aveT sdT
## <fct> <dbl> <dbl>
## 1 <High 24.8 36.2
## 2 High 49.6 38.6
## 3 Junior College 62.4 35.2
## 4 Bachelor 67.9 32.1
## 5 Graduate 68.7 30.2
#Boxplots for all groups
ggplot(gsscln,mapping=aes(x=DEGREE,y=USETECH,fill = DEGREE))+
geom_boxplot()+
geom_jitter()
## Warning: Removed 936 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 936 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Density plots for all groups
ggplot(gsscln,mapping=aes(x=USETECH,fill = DEGREE))+
geom_density(alpha=.4)+facet_wrap(~DEGREE)
## Warning: Removed 936 rows containing non-finite outside the scale range
## (`stat_density()`).

#Onw-way anova for equal variance
oneway.test(gsscln$USETECH~gsscln$DEGREE,var.equal = TRUE)
##
## One-way analysis of means
##
## data: gsscln$USETECH and gsscln$DEGREE
## F = 43.304, num df = 4, denom df = 1404, p-value < 2.2e-16
#Post hoc analysis for one way anova
pairwise.t.test(gsscln$USETECH,gsscln$DEGREE,p.adj = "bonf")
##
## Pairwise comparisons using t tests with pooled SD
##
## data: gsscln$USETECH and gsscln$DEGREE
##
## <High High Junior College Bachelor
## High 3.8e-11 - - -
## Junior College 2.8e-15 0.0022 - -
## Bachelor < 2e-16 8.0e-13 1.0000 -
## Graduate < 2e-16 7.3e-09 1.0000 1.0000
##
## P value adjustment method: bonferroni
TukeyHSD(aov(formula =USETECH~DEGREE,data=gsscln ))
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = USETECH ~ DEGREE, data = gsscln)
##
## $DEGREE
## diff lwr upr p adj
## High-<High 24.8247754 15.145211 34.50434 0.0000000
## Junior College-<High 37.6070313 25.201887 50.01218 0.0000000
## Bachelor-<High 43.0859568 32.653180 53.51873 0.0000000
## Graduate-<High 43.9107249 32.256416 55.56503 0.0000000
## Junior College-High 12.7822558 3.362603 22.20191 0.0020352
## Bachelor-High 18.2611813 11.651711 24.87065 0.0000000
## Graduate-High 19.0859494 10.679691 27.49221 0.0000000
## Bachelor-Junior College 5.4789255 -4.713166 15.67102 0.5833665
## Graduate-Junior College 6.3036936 -5.135659 17.74305 0.5592907
## Graduate-Bachelor 0.8247681 -8.438819 10.08835 0.9992282
#Checking normality assumption -Q-Q plot for oneway anova
ggplot(gsscln,mapping=aes(sample=USETECH))+
stat_qq(mapping=aes(colour = DEGREE))+
facet_wrap(~DEGREE)+
stat_qq_line()
## Warning: Removed 936 rows containing non-finite outside the scale range
## (`stat_qq()`).
## Warning: Removed 936 rows containing non-finite outside the scale range
## (`stat_qq_line()`).

#non-parametric Brown-Forsythe test
gsscln=gsscln%>%
group_by(DEGREE)%>%
mutate(usetrn=abs(USETECH-median(x=USETECH,na.rm=TRUE)))
oneway.test(gsscln$USETECH~gsscln$DEGREE,var.equal = TRUE)
##
## One-way analysis of means
##
## data: gsscln$USETECH and gsscln$DEGREE
## F = 43.304, num df = 4, denom df = 1404, p-value < 2.2e-16
#levene test for checking equal variance in group anova
leveneTest(USETECH~DEGREE,data=gsscln)
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 4 18.44 8.845e-15 ***
## 1404
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Two-way anova
ggplot(gsscln,mapping = aes(x=DEGREE,y=USETECH))+
geom_boxplot(aes(fill=SEX))
## Warning: Removed 936 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

#Checking the interaction effect for multiple factors
ggplot(gsscln,mapping=aes(x=DEGREE,y=USETECH,colour = SEX))+
stat_summary(fun.y = mean,geom="point")+
stat_summary(fun.y=mean,geom="line",aes(group=SEX))
## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 936 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Removed 936 rows containing non-finite outside the scale range
## (`stat_summary()`).

#Two-way anova test
x=aov(USETECH~DEGREE*SEX,data=gsscln)
x
## Call:
## aov(formula = USETECH ~ DEGREE * SEX, data = gsscln)
##
## Terms:
## DEGREE SEX DEGREE:SEX Residuals
## Sum of Squares 221300.6 16472.7 26509.9 1750774.6
## Deg. of Freedom 4 1 4 1399
##
## Residual standard error: 35.3758
## Estimated effects may be unbalanced
## 936 observations deleted due to missingness
#Two-way anova resuklts
summary(x)
## Df Sum Sq Mean Sq F value Pr(>F)
## DEGREE 4 221301 55325 44.209 < 2e-16 ***
## SEX 1 16473 16473 13.163 0.000296 ***
## DEGREE:SEX 4 26510 6627 5.296 0.000311 ***
## Residuals 1399 1750775 1251
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 936 observations deleted due to missingness
#Post hoc for tw-way anova
TukeyHSD(x)
## Tukey multiple comparisons of means
## 95% family-wise confidence level
##
## Fit: aov(formula = USETECH ~ DEGREE * SEX, data = gsscln)
##
## $DEGREE
## diff lwr upr p adj
## High-<High 24.8247754 15.244768 34.404783 0.0000000
## Junior College-<High 37.6070313 25.329478 49.884584 0.0000000
## Bachelor-<High 43.0859568 32.760484 53.411429 0.0000000
## Graduate-<High 43.9107249 32.376284 55.445165 0.0000000
## Junior College-High 12.7822558 3.459487 22.105024 0.0017563
## Bachelor-High 18.2611813 11.719691 24.802671 0.0000000
## Graduate-High 19.0859494 10.766152 27.405746 0.0000000
## Bachelor-Junior College 5.4789255 -4.608337 15.566188 0.5733923
## Graduate-Junior College 6.3036936 -5.018002 17.625389 0.5490670
## Graduate-Bachelor 0.8247681 -8.343540 9.993076 0.9991960
##
## $SEX
## diff lwr upr p adj
## Female-Male 6.80899 3.108699 10.50928 0.0003174
##
## $`DEGREE:SEX`
## diff lwr upr
## High:Male-<High:Male 17.8132060 2.7275183 32.8988937
## Junior College:Male-<High:Male 21.3181818 -0.4992077 43.1355713
## Bachelor:Male-<High:Male 42.3151914 25.7902764 58.8401064
## Graduate:Male-<High:Male 46.3538961 27.5496712 65.1581210
## <High:Female-<High:Male -2.0378788 -22.6075109 18.5317533
## High:Female-<High:Male 30.1500000 15.0344692 45.2655308
## Junior College:Female-<High:Male 44.7418831 26.3028236 63.1809427
## Bachelor:Female-<High:Male 42.0396406 25.8082011 58.2710800
## Graduate:Female-<High:Male 40.1813241 22.0984520 58.2641962
## Junior College:Male-High:Male 3.5049758 -14.4610385 21.4709901
## Bachelor:Male-High:Male 24.5019854 13.5542915 35.4496792
## Graduate:Male-High:Male 28.5406901 14.3851943 42.6961858
## <High:Female-High:Male -19.8510848 -36.2793820 -3.4227876
## High:Female-High:Male 12.3367940 3.6616307 21.0119573
## Junior College:Female-High:Male 26.9286771 13.2619985 40.5953557
## Bachelor:Female-High:Male 24.2264346 13.7269673 34.7259018
## Graduate:Female-High:Male 22.3681181 9.1859540 35.5502821
## Bachelor:Male-Junior College:Male 20.9970096 1.8065820 40.1874372
## Graduate:Male-Junior College:Male 25.0357143 3.8508477 46.2205808
## <High:Female-Junior College:Male -23.3560606 -46.1224714 -0.5896498
## High:Female-Junior College:Male 8.8318182 -9.1592621 26.8228985
## Junior College:Female-Junior College:Male 23.4237013 2.5622868 44.2851158
## Bachelor:Female-Junior College:Male 20.7214588 1.7831557 39.6597618
## Graduate:Female-Junior College:Male 18.8631423 -1.6841193 39.4104039
## Graduate:Male-Bachelor:Male 4.0387047 -11.6416301 19.7190396
## <High:Female-Bachelor:Male -44.3530702 -62.1121183 -26.5940220
## High:Female-Bachelor:Male -12.1651914 -23.1539720 -1.1764108
## Junior College:Female-Bachelor:Male 2.4266917 -12.8138117 17.6671952
## Bachelor:Female-Bachelor:Male -0.2755508 -12.7548798 12.2037783
## Graduate:Female-Bachelor:Male -2.1338673 -16.9414427 12.6737082
## <High:Female-Graduate:Male -48.3917749 -68.2892584 -28.4942914
## High:Female-Graduate:Male -16.2038961 -30.3911918 -2.0166004
## Junior College:Female-Graduate:Male -1.6120130 -19.2981376 16.0741116
## Bachelor:Female-Graduate:Male -4.3142555 -19.6849976 11.0564866
## Graduate:Female-Graduate:Male -6.1725720 -23.4870269 11.1418829
## High:Female-<High:Female 32.1878788 15.7321731 48.6435845
## Junior College:Female-<High:Female 46.7797619 27.2270154 66.3325084
## Bachelor:Female-<High:Female 44.0775194 26.5912218 61.5638170
## Graduate:Female-<High:Female 42.2192029 23.0019908 61.4364150
## Junior College:Female-High:Female 14.5918831 0.8922699 28.2914963
## Bachelor:Female-High:Female 11.8896406 1.3473395 22.4319416
## Graduate:Female-High:Female 10.0313241 -3.1849820 23.2476303
## Bachelor:Female-Junior College:Female -2.7022425 -17.6240305 12.2195454
## Graduate:Female-Junior College:Female -4.5605590 -21.4777217 12.3566037
## Graduate:Female-Bachelor:Female -1.8583165 -16.3376501 12.6210171
## p adj
## High:Male-<High:Male 0.0072699
## Junior College:Male-<High:Male 0.0619111
## Bachelor:Male-<High:Male 0.0000000
## Graduate:Male-<High:Male 0.0000000
## <High:Female-<High:Male 0.9999995
## High:Female-<High:Male 0.0000000
## Junior College:Female-<High:Male 0.0000000
## Bachelor:Female-<High:Male 0.0000000
## Graduate:Female-<High:Male 0.0000000
## Junior College:Male-High:Male 0.9998264
## Bachelor:Male-High:Male 0.0000000
## Graduate:Male-High:Male 0.0000000
## <High:Female-High:Male 0.0052315
## High:Female-High:Male 0.0003049
## Junior College:Female-High:Male 0.0000000
## Bachelor:Female-High:Male 0.0000000
## Graduate:Female-High:Male 0.0000039
## Bachelor:Male-Junior College:Male 0.0192892
## Graduate:Male-Junior College:Male 0.0071871
## <High:Female-Junior College:Male 0.0389231
## High:Female-Junior College:Male 0.8690307
## Junior College:Female-Junior College:Male 0.0141081
## Bachelor:Female-Junior College:Male 0.0192858
## Graduate:Female-Junior College:Male 0.1039186
## Graduate:Male-Bachelor:Male 0.9983501
## <High:Female-Bachelor:Male 0.0000000
## High:Female-Bachelor:Male 0.0167764
## Junior College:Female-Bachelor:Male 0.9999688
## Bachelor:Female-Bachelor:Male 1.0000000
## Graduate:Female-Bachelor:Male 0.9999867
## <High:Female-Graduate:Male 0.0000000
## High:Female-Graduate:Male 0.0113631
## Junior College:Female-Graduate:Male 0.9999998
## Bachelor:Female-Graduate:Male 0.9967894
## Graduate:Female-Graduate:Male 0.9816675
## High:Female-<High:Female 0.0000000
## Junior College:Female-<High:Female 0.0000000
## Bachelor:Female-<High:Female 0.0000000
## Graduate:Female-<High:Female 0.0000000
## Junior College:Female-High:Female 0.0261888
## Bachelor:Female-High:Female 0.0133486
## Graduate:Female-High:Female 0.3233313
## Bachelor:Female-Junior College:Female 0.9999069
## Graduate:Female-Junior College:Female 0.9976459
## Graduate:Female-Bachelor:Female 0.9999951