Data Analysis using R

One-Way, Two-way anova - case study

Day11

———————————————————————–

rm(list=ls())
rm(list=ls())
# Data file used gss2018.rda
#Set the directory
getwd()
## [1] "D:/D Drive/Ph.D. Course Work/Ph.D. 2024"
setwd("D:\\D Drive\\Ph.D. Course Work\\Ph.D. 2024\\Data")
getwd()
## [1] "D:/D Drive/Ph.D. Course Work/Ph.D. 2024/Data"
#Load the data into R
load("gss2018.rda")

#Necessary Library
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyr)
library(ggplot2)
library(car)
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
## 
##     recode
library(tidyr)
gss2018=GSS
summary(gss2018)
##       YEAR          BALLOT         USETECH           HAPPY      
##  Min.   :2018   Min.   :1.000   Min.   : -1.00   Min.   :1.000  
##  1st Qu.:2018   1st Qu.:1.000   1st Qu.: -1.00   1st Qu.:1.000  
##  Median :2018   Median :2.000   Median : 10.00   Median :2.000  
##  Mean   :2018   Mean   :2.002   Mean   : 48.09   Mean   :1.855  
##  3rd Qu.:2018   3rd Qu.:3.000   3rd Qu.: 80.00   3rd Qu.:2.000  
##  Max.   :2018   Max.   :3.000   Max.   :999.00   Max.   :8.000  
##     PARTYID         RINCOME            RACE            SEX       
##  Min.   :0.000   Min.   : 0.000   Min.   :1.000   Min.   :1.000  
##  1st Qu.:1.000   1st Qu.: 0.000   1st Qu.:1.000   1st Qu.:1.000  
##  Median :3.000   Median : 9.000   Median :1.000   Median :2.000  
##  Mean   :2.968   Mean   : 7.509   Mean   :1.394   Mean   :1.552  
##  3rd Qu.:5.000   3rd Qu.:12.000   3rd Qu.:2.000   3rd Qu.:2.000  
##  Max.   :9.000   Max.   :98.000   Max.   :3.000   Max.   :2.000  
##      DEGREE           EDUC            AGE           MARITAL    
##  Min.   :0.000   Min.   : 0.00   Min.   :18.00   Min.   :1.00  
##  1st Qu.:1.000   1st Qu.:12.00   1st Qu.:34.00   1st Qu.:1.00  
##  Median :1.000   Median :14.00   Median :48.00   Median :2.00  
##  Mean   :1.684   Mean   :13.84   Mean   :49.13   Mean   :2.67  
##  3rd Qu.:3.000   3rd Qu.:16.00   3rd Qu.:63.00   3rd Qu.:5.00  
##  Max.   :4.000   Max.   :99.00   Max.   :99.00   Max.   :9.00  
##       HRS2               HRS1          WRKSTAT           ID_      
##  Min.   :-1.00000   Min.   :-1.00   Min.   :1.000   Min.   :   1  
##  1st Qu.:-1.00000   1st Qu.:-1.00   1st Qu.:1.000   1st Qu.: 588  
##  Median :-1.00000   Median :30.00   Median :2.000   Median :1176  
##  Mean   : 0.08017   Mean   :24.47   Mean   :2.963   Mean   :1175  
##  3rd Qu.:-1.00000   3rd Qu.:40.00   3rd Qu.:5.000   3rd Qu.:1762  
##  Max.   :99.00000   Max.   :99.00   Max.   :9.000   Max.   :2348  
##     UNHAPPY     
##  Min.   :0.000  
##  1st Qu.:0.000  
##  Median :0.000  
##  Mean   :1.039  
##  3rd Qu.:2.000  
##  Max.   :9.000
#Data pre-processing as per the rule given. 
gsscln=gss2018%>%mutate(USETECH=na_if(USETECH,-1))%>%
  mutate(USETECH=na_if(USETECH,999))%>%
  mutate(USETECH=na_if(USETECH,998))%>%
  mutate(AGE=na_if(AGE,98))%>%
  mutate(AGE=na_if(AGE,99))%>%
  mutate(HAPPY=na_if(HAPPY,8))%>%
  mutate(HAPPY=na_if(HAPPY,9))%>%
  mutate(HAPPY=na_if(DEGREE,8))%>%
  mutate(HAPPY=na_if(DEGREE,9))%>%
  mutate(HAPPY=na_if(HAPPY,0))%>%
  mutate(HAPPY=factor(x=HAPPY,levels=c(1,2,3),labels=c("Very Happy","Pretty Happy","Not to Happy")))%>%
  mutate(SEX=factor(x=SEX,levels=c(1,2),labels=c("Male","Female")))%>%
  mutate(DEGREE=factor(x=DEGREE,levels=c(0,1,2,3,4),labels=c("<High","High","Junior College","Bachelor","Graduate")))
  
  #Exploratory Data Analysis for comaring the means of multiple groups
  usedeg=gsscln%>%drop_na(USETECH)%>%
  group_by(DEGREE)%>%
  summarize(aveT=mean(USETECH),
            sdT=sd(USETECH))

usedeg
## # A tibble: 5 × 3
##   DEGREE          aveT   sdT
##   <fct>          <dbl> <dbl>
## 1 <High           24.8  36.2
## 2 High            49.6  38.6
## 3 Junior College  62.4  35.2
## 4 Bachelor        67.9  32.1
## 5 Graduate        68.7  30.2
#Boxplots for all groups

ggplot(gsscln,mapping=aes(x=DEGREE,y=USETECH,fill = DEGREE))+
  geom_boxplot()+
  geom_jitter()
## Warning: Removed 936 rows containing non-finite outside the scale range
## (`stat_boxplot()`).
## Warning: Removed 936 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Density plots for all groups
ggplot(gsscln,mapping=aes(x=USETECH,fill = DEGREE))+
  geom_density(alpha=.4)+facet_wrap(~DEGREE)
## Warning: Removed 936 rows containing non-finite outside the scale range
## (`stat_density()`).

#Onw-way anova for equal variance
oneway.test(gsscln$USETECH~gsscln$DEGREE,var.equal = TRUE)
## 
##  One-way analysis of means
## 
## data:  gsscln$USETECH and gsscln$DEGREE
## F = 43.304, num df = 4, denom df = 1404, p-value < 2.2e-16
#Post hoc analysis for one way anova
pairwise.t.test(gsscln$USETECH,gsscln$DEGREE,p.adj = "bonf")
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  gsscln$USETECH and gsscln$DEGREE 
## 
##                <High   High    Junior College Bachelor
## High           3.8e-11 -       -              -       
## Junior College 2.8e-15 0.0022  -              -       
## Bachelor       < 2e-16 8.0e-13 1.0000         -       
## Graduate       < 2e-16 7.3e-09 1.0000         1.0000  
## 
## P value adjustment method: bonferroni
TukeyHSD(aov(formula =USETECH~DEGREE,data=gsscln )) 
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = USETECH ~ DEGREE, data = gsscln)
## 
## $DEGREE
##                               diff       lwr      upr     p adj
## High-<High              24.8247754 15.145211 34.50434 0.0000000
## Junior College-<High    37.6070313 25.201887 50.01218 0.0000000
## Bachelor-<High          43.0859568 32.653180 53.51873 0.0000000
## Graduate-<High          43.9107249 32.256416 55.56503 0.0000000
## Junior College-High     12.7822558  3.362603 22.20191 0.0020352
## Bachelor-High           18.2611813 11.651711 24.87065 0.0000000
## Graduate-High           19.0859494 10.679691 27.49221 0.0000000
## Bachelor-Junior College  5.4789255 -4.713166 15.67102 0.5833665
## Graduate-Junior College  6.3036936 -5.135659 17.74305 0.5592907
## Graduate-Bachelor        0.8247681 -8.438819 10.08835 0.9992282
#Checking normality assumption -Q-Q plot for oneway anova 
ggplot(gsscln,mapping=aes(sample=USETECH))+
  stat_qq(mapping=aes(colour = DEGREE))+
  facet_wrap(~DEGREE)+
  stat_qq_line() 
## Warning: Removed 936 rows containing non-finite outside the scale range
## (`stat_qq()`).
## Warning: Removed 936 rows containing non-finite outside the scale range
## (`stat_qq_line()`).

#non-parametric Brown-Forsythe test
gsscln=gsscln%>%
  group_by(DEGREE)%>%
  mutate(usetrn=abs(USETECH-median(x=USETECH,na.rm=TRUE)))
oneway.test(gsscln$USETECH~gsscln$DEGREE,var.equal = TRUE)  
## 
##  One-way analysis of means
## 
## data:  gsscln$USETECH and gsscln$DEGREE
## F = 43.304, num df = 4, denom df = 1404, p-value < 2.2e-16
#levene test for checking equal variance in group anova
leveneTest(USETECH~DEGREE,data=gsscln)
## Levene's Test for Homogeneity of Variance (center = median)
##         Df F value    Pr(>F)    
## group    4   18.44 8.845e-15 ***
##       1404                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Two-way anova
ggplot(gsscln,mapping = aes(x=DEGREE,y=USETECH))+
  geom_boxplot(aes(fill=SEX))
## Warning: Removed 936 rows containing non-finite outside the scale range
## (`stat_boxplot()`).

#Checking the interaction effect for multiple factors
ggplot(gsscln,mapping=aes(x=DEGREE,y=USETECH,colour = SEX))+
  stat_summary(fun.y = mean,geom="point")+
  stat_summary(fun.y=mean,geom="line",aes(group=SEX))
## Warning: The `fun.y` argument of `stat_summary()` is deprecated as of ggplot2 3.3.0.
## ℹ Please use the `fun` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 936 rows containing non-finite outside the scale range
## (`stat_summary()`).
## Removed 936 rows containing non-finite outside the scale range
## (`stat_summary()`).

#Two-way anova test
x=aov(USETECH~DEGREE*SEX,data=gsscln)
x
## Call:
##    aov(formula = USETECH ~ DEGREE * SEX, data = gsscln)
## 
## Terms:
##                    DEGREE       SEX DEGREE:SEX Residuals
## Sum of Squares   221300.6   16472.7    26509.9 1750774.6
## Deg. of Freedom         4         1          4      1399
## 
## Residual standard error: 35.3758
## Estimated effects may be unbalanced
## 936 observations deleted due to missingness
#Two-way anova resuklts
summary(x)
##               Df  Sum Sq Mean Sq F value   Pr(>F)    
## DEGREE         4  221301   55325  44.209  < 2e-16 ***
## SEX            1   16473   16473  13.163 0.000296 ***
## DEGREE:SEX     4   26510    6627   5.296 0.000311 ***
## Residuals   1399 1750775    1251                     
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 936 observations deleted due to missingness
#Post hoc for tw-way anova
TukeyHSD(x)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = USETECH ~ DEGREE * SEX, data = gsscln)
## 
## $DEGREE
##                               diff       lwr       upr     p adj
## High-<High              24.8247754 15.244768 34.404783 0.0000000
## Junior College-<High    37.6070313 25.329478 49.884584 0.0000000
## Bachelor-<High          43.0859568 32.760484 53.411429 0.0000000
## Graduate-<High          43.9107249 32.376284 55.445165 0.0000000
## Junior College-High     12.7822558  3.459487 22.105024 0.0017563
## Bachelor-High           18.2611813 11.719691 24.802671 0.0000000
## Graduate-High           19.0859494 10.766152 27.405746 0.0000000
## Bachelor-Junior College  5.4789255 -4.608337 15.566188 0.5733923
## Graduate-Junior College  6.3036936 -5.018002 17.625389 0.5490670
## Graduate-Bachelor        0.8247681 -8.343540  9.993076 0.9991960
## 
## $SEX
##                diff      lwr      upr     p adj
## Female-Male 6.80899 3.108699 10.50928 0.0003174
## 
## $`DEGREE:SEX`
##                                                  diff         lwr         upr
## High:Male-<High:Male                       17.8132060   2.7275183  32.8988937
## Junior College:Male-<High:Male             21.3181818  -0.4992077  43.1355713
## Bachelor:Male-<High:Male                   42.3151914  25.7902764  58.8401064
## Graduate:Male-<High:Male                   46.3538961  27.5496712  65.1581210
## <High:Female-<High:Male                    -2.0378788 -22.6075109  18.5317533
## High:Female-<High:Male                     30.1500000  15.0344692  45.2655308
## Junior College:Female-<High:Male           44.7418831  26.3028236  63.1809427
## Bachelor:Female-<High:Male                 42.0396406  25.8082011  58.2710800
## Graduate:Female-<High:Male                 40.1813241  22.0984520  58.2641962
## Junior College:Male-High:Male               3.5049758 -14.4610385  21.4709901
## Bachelor:Male-High:Male                    24.5019854  13.5542915  35.4496792
## Graduate:Male-High:Male                    28.5406901  14.3851943  42.6961858
## <High:Female-High:Male                    -19.8510848 -36.2793820  -3.4227876
## High:Female-High:Male                      12.3367940   3.6616307  21.0119573
## Junior College:Female-High:Male            26.9286771  13.2619985  40.5953557
## Bachelor:Female-High:Male                  24.2264346  13.7269673  34.7259018
## Graduate:Female-High:Male                  22.3681181   9.1859540  35.5502821
## Bachelor:Male-Junior College:Male          20.9970096   1.8065820  40.1874372
## Graduate:Male-Junior College:Male          25.0357143   3.8508477  46.2205808
## <High:Female-Junior College:Male          -23.3560606 -46.1224714  -0.5896498
## High:Female-Junior College:Male             8.8318182  -9.1592621  26.8228985
## Junior College:Female-Junior College:Male  23.4237013   2.5622868  44.2851158
## Bachelor:Female-Junior College:Male        20.7214588   1.7831557  39.6597618
## Graduate:Female-Junior College:Male        18.8631423  -1.6841193  39.4104039
## Graduate:Male-Bachelor:Male                 4.0387047 -11.6416301  19.7190396
## <High:Female-Bachelor:Male                -44.3530702 -62.1121183 -26.5940220
## High:Female-Bachelor:Male                 -12.1651914 -23.1539720  -1.1764108
## Junior College:Female-Bachelor:Male         2.4266917 -12.8138117  17.6671952
## Bachelor:Female-Bachelor:Male              -0.2755508 -12.7548798  12.2037783
## Graduate:Female-Bachelor:Male              -2.1338673 -16.9414427  12.6737082
## <High:Female-Graduate:Male                -48.3917749 -68.2892584 -28.4942914
## High:Female-Graduate:Male                 -16.2038961 -30.3911918  -2.0166004
## Junior College:Female-Graduate:Male        -1.6120130 -19.2981376  16.0741116
## Bachelor:Female-Graduate:Male              -4.3142555 -19.6849976  11.0564866
## Graduate:Female-Graduate:Male              -6.1725720 -23.4870269  11.1418829
## High:Female-<High:Female                   32.1878788  15.7321731  48.6435845
## Junior College:Female-<High:Female         46.7797619  27.2270154  66.3325084
## Bachelor:Female-<High:Female               44.0775194  26.5912218  61.5638170
## Graduate:Female-<High:Female               42.2192029  23.0019908  61.4364150
## Junior College:Female-High:Female          14.5918831   0.8922699  28.2914963
## Bachelor:Female-High:Female                11.8896406   1.3473395  22.4319416
## Graduate:Female-High:Female                10.0313241  -3.1849820  23.2476303
## Bachelor:Female-Junior College:Female      -2.7022425 -17.6240305  12.2195454
## Graduate:Female-Junior College:Female      -4.5605590 -21.4777217  12.3566037
## Graduate:Female-Bachelor:Female            -1.8583165 -16.3376501  12.6210171
##                                               p adj
## High:Male-<High:Male                      0.0072699
## Junior College:Male-<High:Male            0.0619111
## Bachelor:Male-<High:Male                  0.0000000
## Graduate:Male-<High:Male                  0.0000000
## <High:Female-<High:Male                   0.9999995
## High:Female-<High:Male                    0.0000000
## Junior College:Female-<High:Male          0.0000000
## Bachelor:Female-<High:Male                0.0000000
## Graduate:Female-<High:Male                0.0000000
## Junior College:Male-High:Male             0.9998264
## Bachelor:Male-High:Male                   0.0000000
## Graduate:Male-High:Male                   0.0000000
## <High:Female-High:Male                    0.0052315
## High:Female-High:Male                     0.0003049
## Junior College:Female-High:Male           0.0000000
## Bachelor:Female-High:Male                 0.0000000
## Graduate:Female-High:Male                 0.0000039
## Bachelor:Male-Junior College:Male         0.0192892
## Graduate:Male-Junior College:Male         0.0071871
## <High:Female-Junior College:Male          0.0389231
## High:Female-Junior College:Male           0.8690307
## Junior College:Female-Junior College:Male 0.0141081
## Bachelor:Female-Junior College:Male       0.0192858
## Graduate:Female-Junior College:Male       0.1039186
## Graduate:Male-Bachelor:Male               0.9983501
## <High:Female-Bachelor:Male                0.0000000
## High:Female-Bachelor:Male                 0.0167764
## Junior College:Female-Bachelor:Male       0.9999688
## Bachelor:Female-Bachelor:Male             1.0000000
## Graduate:Female-Bachelor:Male             0.9999867
## <High:Female-Graduate:Male                0.0000000
## High:Female-Graduate:Male                 0.0113631
## Junior College:Female-Graduate:Male       0.9999998
## Bachelor:Female-Graduate:Male             0.9967894
## Graduate:Female-Graduate:Male             0.9816675
## High:Female-<High:Female                  0.0000000
## Junior College:Female-<High:Female        0.0000000
## Bachelor:Female-<High:Female              0.0000000
## Graduate:Female-<High:Female              0.0000000
## Junior College:Female-High:Female         0.0261888
## Bachelor:Female-High:Female               0.0133486
## Graduate:Female-High:Female               0.3233313
## Bachelor:Female-Junior College:Female     0.9999069
## Graduate:Female-Junior College:Female     0.9976459
## Graduate:Female-Bachelor:Female           0.9999951