Exploratory Data Analysis

Load Libraries/Data

#####################Read and Pre-Clean the Data#######################
require(AICcmodavg)
## Loading required package: AICcmodavg
## Warning: package 'AICcmodavg' was built under R version 4.0.5
require(Amelia)
## Loading required package: Amelia
## Warning: package 'Amelia' was built under R version 4.0.5
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.0.5
## ## 
## ## Amelia II: Multiple Imputation
## ## (Version 1.8.0, built: 2021-05-26)
## ## Copyright (C) 2005-2021 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
require(bestglm)
## Loading required package: bestglm
## Warning: package 'bestglm' was built under R version 4.0.5
## Loading required package: leaps
## Warning: package 'leaps' was built under R version 4.0.5
require(car)
## Loading required package: car
## Warning: package 'car' was built under R version 4.0.5
## Loading required package: carData
require(carData)
require(corrplot)
## Loading required package: corrplot
## corrplot 0.90 loaded
require(dplyr)
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 4.0.5
## 
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
## 
##     recode
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
require(ggcorrplot)
## Loading required package: ggcorrplot
## Warning: package 'ggcorrplot' was built under R version 4.0.5
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.5
require(ggpubr)
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.0.5
require(gridExtra)
## Loading required package: gridExtra
## Warning: package 'gridExtra' was built under R version 4.0.5
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
require(gtable)
## Loading required package: gtable
## Warning: package 'gtable' was built under R version 4.0.5
require(heplots)
## Loading required package: heplots
## Warning: package 'heplots' was built under R version 4.0.5
require(kableExtra)
## Loading required package: kableExtra
## Warning: package 'kableExtra' was built under R version 4.0.5
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
require(leaps)
require(magrittr)
## Loading required package: magrittr
## Warning: package 'magrittr' was built under R version 4.0.5
require(MANOVA.RM)
## Loading required package: MANOVA.RM
## Warning: package 'MANOVA.RM' was built under R version 4.0.5
require(MASS)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
require(MVN)
## Loading required package: MVN
## Warning: package 'MVN' was built under R version 4.0.5
require(mvtnorm)
## Loading required package: mvtnorm
## Warning: package 'mvtnorm' was built under R version 4.0.5
require(nnet)
## Loading required package: nnet
## Warning: package 'nnet' was built under R version 4.0.5
require(psych) #to describe
## Loading required package: psych
## Warning: package 'psych' was built under R version 4.0.5
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
## The following object is masked from 'package:car':
## 
##     logit
require(purrr)
## Loading required package: purrr
## Warning: package 'purrr' was built under R version 4.0.5
## 
## Attaching package: 'purrr'
## The following object is masked from 'package:magrittr':
## 
##     set_names
## The following object is masked from 'package:car':
## 
##     some
require(ggplot2)
require(ggcorrplot)
require(qcc)
## Loading required package: qcc
## Warning: package 'qcc' was built under R version 4.0.5
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
require(rcompanion)
## Loading required package: rcompanion
## Warning: package 'rcompanion' was built under R version 4.0.5
## 
## Attaching package: 'rcompanion'
## The following object is masked from 'package:psych':
## 
##     phi
require(reticulate) #to use Python in R as well
## Loading required package: reticulate
require(reshape2)
## Loading required package: reshape2
## Warning: package 'reshape2' was built under R version 4.0.5
require(ResourceSelection)
## Loading required package: ResourceSelection
## Warning: package 'ResourceSelection' was built under R version 4.0.5
## ResourceSelection 0.3-5   2019-07-22
require(rstatix)
## Loading required package: rstatix
## Warning: package 'rstatix' was built under R version 4.0.5
## 
## Attaching package: 'rstatix'
## The following object is masked from 'package:MASS':
## 
##     select
## The following object is masked from 'package:ggcorrplot':
## 
##     cor_pmat
## The following object is masked from 'package:stats':
## 
##     filter
require(tidyverse)
## Loading required package: tidyverse
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble  3.1.2     v stringr 1.4.0
## v tidyr   1.1.3     v forcats 0.5.1
## v readr   2.0.1
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'stringr' was built under R version 4.0.5
## Warning: package 'forcats' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x psych::%+%()             masks ggplot2::%+%()
## x psych::alpha()           masks ggplot2::alpha()
## x gridExtra::combine()     masks dplyr::combine()
## x tidyr::extract()         masks magrittr::extract()
## x rstatix::filter()        masks dplyr::filter(), stats::filter()
## x kableExtra::group_rows() masks dplyr::group_rows()
## x dplyr::lag()             masks stats::lag()
## x dplyr::recode()          masks car::recode()
## x rstatix::select()        masks MASS::select(), dplyr::select()
## x purrr::set_names()       masks magrittr::set_names()
## x purrr::some()            masks car::some()

Base Functions

#########################################################################

#Correlation Plot Function
corfunction=function(d){
  mycorr=cor(d[, 1:ncol(d)]); p.mat=ggcorrplot::cor_pmat(d[,1:ncol(d)])
  myplot=ggcorrplot(mycorr, hc.order=TRUE,type="lower",
                    colors=c("red", "white","green"),tl.cex = 8, 
                    tl.col = "black", lab=TRUE, lab_size=2, p.mat=p.mat,
                    insig="pch", pch=4)
  print(myplot)}


#Barplot Function, Horizontal
mybar=function(df,y,fill,perc,title){
  a=ggplot(df, aes(x='',y, fill=fill, label=paste0(n," (", scales::percent(perc), ")")))+
  geom_bar(position="stack", stat="identity", width=.5)+
  geom_text(size = 3, position = position_stack(vjust = 0.5))+
  xlab("")+
  ylab("")+
  theme(legend.title = element_blank())+
  ggtitle(title)+
  theme(legend.text = element_text(size = 8))+
  theme(legend.position="right")+
  coord_flip()
  return(a)
}

#Barplot Function, Vertical

mybar2=function(df,y,fill,perc,title){
  a=ggplot(df, aes(x='',y, fill=fill, label=paste0(n," (", scales::percent(perc), ")")))+
  geom_bar(position="stack", stat="identity", width=.5)+
  geom_text(size = 3, position = position_stack(vjust = 0.5))+
  xlab("")+
  ylab("")+
  theme(legend.title = element_blank())+
  ggtitle(title)+
  theme(legend.text = element_text(size = 8))+
  theme(legend.position="right")
  return(a)
}

#Barplot Function, 2 Component

mybar3=function(y,varname,rotate=90){
  myt=table(mydata$Willingness_4Cat, y)
  myt=apply(myt,2,function(z) z/sum(z))
  myt=as.data.frame(myt)
  myt$Willingness=rownames(myt)
  myt=melt(myt, id.vars="Willingness")
  colnames(myt)=c("Willingness", varname,"Percent")
  a=ggplot(myt, aes(myt[,2], Percent, fill=Willingness, label=scales::percent(Percent)))+ 
  geom_bar( stat = "identity", position = "stack" ) +
    geom_text(size = 3, angle=rotate,position = position_stack(vjust = 0.5))+
  coord_flip() +
    xlab("")+
    ylab("")+
   theme_minimal() + theme(legend.title=element_blank(),legend.position = "bottom" )+
    ggtitle(varname)
  return(a)
}



#Boxplot Function, Y Variable

box=function(df,x,y,title){
  g=ggplot(df, aes(x, y , fill=x))+
            geom_boxplot(notch=TRUE)+
            ggtitle(title)+
            ylab("")+ 
            xlab("")+
            coord_flip()+
            theme(legend.position="none")
 return(g)
}

#Boxplot without Y variable

box2=function(df,x,title){
  g=ggplot(df, aes(x, fill="red"))+
            geom_boxplot()+
            ggtitle(title)+
            ylab("")+ 
            xlab("")+
            theme(legend.position="none")
  return(g)
}

#Pretty Print Function
myprint=function(x,y, z=""){
  x%>%kbl(col.names=y, caption=z)%>%kable_classic(html_font = "Cambria",full_width=F)
}

#Citation Function
cite=function(x){citation(x)}

#P Function

pfunction=function(p,x){

for (i in 1: length(x)){if(p[i]>.1){x[i]=paste0(x[i],"")} else{
  if(p[i]<.001){x[i]=paste0(x[i],"***")} else{
    if(p[i]<.01){x[i]=paste0(x[i], "**")} else{
      if(p[i]<.05){x[i]=paste0(x[i], "*")} else
      {x[i]=paste0(x[i], "+")}
      }
    }
  }
}
  return(noquote(x))

}


feedback=c("Boxplots", "Bargraphs", "Correlations", "Printing", "Citations", "P-Values")
myprint(feedback, "Functions")
Functions
Boxplots
Bargraphs
Correlations
Printing
Citations
P-Values
#########################################################################

Load / Impute

#########################################################################

mydata=read.csv("D:/Emily/data2.csv", stringsAsFactors = TRUE)
myprint(colnames(mydata),"Columns")
Columns
ID
Willingness_4Cat
Willingness_2Cat
Behavior_Reverse
Knowledge_Reverse
Risk_Knowledge_Reverse
Hesitancy
Risk_Score
News_Frequency
Trust
Greater_Risk_COVID
Had_COVID
Died_COVID
Wash_Hands
Wear_Mask
Social_Distance
Behavior_Score
Air_Spread
Mask_Effective
Asymptomatic_Pose_Risk
College_Deaths
Mostly_Mild
Knowledge_Score
Concern_Self
Likely_to_Get
Likely_Get_Sick
Live_Others
Class_in_Person
Inperson_Job
Medical_Condition
Super_Spreader
No_Distancing
No_Mask
Travel
Safe_Live_Others
Safe_Attend_Class
Safe_F2F_Job
Safe_Underlying_Condition
Safe_Gatherings..50
Safe_Socials_No_Distancing
Safe_Socials_No_Mask
Safe_Travel
Risk_Knowledge_Score
Concern_Others
Change_Others
Definition_Vaccine
Safety_Vaccine
Better_to_be_Sick
Over_Vaccination_Harmful
Herd_Immunity
Hesitancy_Score
Child_Vax
Annual_Flu_Shot
Flu_Shot_Reason
Meningitis_Required
COVID_Required
Family_Accept
Friends_Accept
US_Accept
CDC_Freq
Local_News_Freq
National_News_Freq
International_News_Freq
Social_Media_Freq
Texas_State_Freq
College_Friends_Freq
Parents_Freq
Professors_Freq
Adults_Freq
Doctor_Freq
Frequency_Score
Trust_CDC
Trust_Local
Trust_National
Trust_International
Trust_Social_Media
Trust_Texas_State
Trust_Friends
Trust_Parents
Trust_Professors
Trust_Adults
Trust_Doctor
Trust_Score
Politics_5Cat
Politics_3Cat
Race
White
Hispanic
Gender
Gender_2Cat
Age
Age_Cat
Year
Health_Maj_Min
Income
Insurance
#########################################################################

Check Missing

#########################################################################

missmap(mydata)

#########################################################################

Descriptives

Gender Imputation of Other

tempdata=mydata[mydata$Gender!='Other',]  #Set up temporary data
tempdata$Gender=factor(tempdata$Gender)   #Reset gender on 2 levels
preddata=mydata[mydata$Gender=='Other',]  #Keep a prediction group
###Variables Significant to the Prediction###
mylr=glm(Gender~Race+Age+Year+Willingness_4Cat,
         data=tempdata,family='binomial')
###Generate Predictions###
mypred=mylr%>%predict(preddata, type='response')
mypred=round(mypred,0)
mypred[mypred==0]='Female'
mypred[mypred==1]='Male'
###Impute###
mydata$Gender_2Cat=mydata$Gender
myseq=c(2,54,62,94,111,177,186,250,309,316,383,492,522,553)

j=1
for (i in myseq){
  mydata$Gender_2Cat[i]=mypred[j]
  j=j+1}

mydata$Gender_2Cat=factor(mydata$Gender_2Cat) ##reset 2 levels
table(mydata$Gender_2Cat)%>%kbl()%>%kable_classic(html_font='Cambria')
Var1 Freq
Female 307
Male 307
write.csv(mydata, "D:/Emily/data2.csv", row.names=FALSE)

Overall Descriptives

round(describe(mydata),3)%>%kbl(caption="Descriptives")%>%kable_classic()
Descriptives
vars n mean sd median trimmed mad min max range skew kurtosis se
ID 1 614 307.500 177.391 307.500 307.500 227.579 1.00 614.000 613.000 0.000 -1.206 7.159
Willingness_4Cat* 2 614 3.083 0.970 3.000 3.228 1.483 1.00 4.000 3.000 -0.862 -0.245 0.039
Willingness_2Cat* 3 614 1.780 0.414 2.000 1.850 0.000 1.00 2.000 1.000 -1.349 -0.179 0.017
Behavior_Reverse 4 614 0.157 0.160 0.091 0.132 0.135 0.00 0.818 0.818 1.467 2.436 0.006
Knowledge_Reverse 5 614 0.113 0.153 0.000 0.084 0.000 0.00 0.800 0.800 1.507 2.675 0.006
Risk_Knowledge_Reverse 6 614 0.224 0.159 0.194 0.202 0.143 0.00 0.774 0.774 1.309 1.636 0.006
Hesitancy 7 614 0.201 0.212 0.200 0.170 0.297 0.00 1.000 1.000 1.071 0.941 0.009
Risk_Score 8 614 0.568 0.201 0.625 0.578 0.185 0.00 1.000 1.000 -0.491 -0.395 0.008
News_Frequency 9 614 0.551 0.137 0.545 0.545 0.135 0.25 1.000 0.750 0.448 0.319 0.006
Trust 10 614 0.598 0.116 0.591 0.602 0.101 0.25 1.000 0.750 -0.274 0.808 0.005
Greater_Risk_COVID* 11 614 1.713 0.453 2.000 1.766 0.000 1.00 2.000 1.000 -0.941 -1.116 0.018
Had_COVID* 12 614 1.790 0.408 2.000 1.862 0.000 1.00 2.000 1.000 -1.420 0.016 0.016
Died_COVID* 13 614 1.168 0.374 1.000 1.085 0.000 1.00 2.000 1.000 1.774 1.149 0.015
Wash_Hands* 14 614 3.205 0.878 3.000 3.317 1.483 1.00 4.000 3.000 -0.843 -0.186 0.035
Wear_Mask* 15 614 3.774 0.604 4.000 3.937 0.000 1.00 4.000 3.000 -2.915 8.266 0.024
Social_Distance* 16 614 3.296 0.804 3.000 3.415 1.483 1.00 4.000 3.000 -0.998 0.425 0.032
Behavior_Score 17 614 10.275 1.763 11.000 10.551 1.483 3.00 12.000 9.000 -1.467 2.436 0.071
Air_Spread* 18 614 1.839 0.368 2.000 1.923 0.000 1.00 2.000 1.000 -1.838 1.380 0.015
Mask_Effective* 19 614 1.935 0.247 2.000 2.000 0.000 1.00 2.000 1.000 -3.516 10.376 0.010
Asymptomatic_Pose_Risk* 20 614 1.945 0.229 2.000 2.000 0.000 1.00 2.000 1.000 -3.879 13.065 0.009
College_Deaths* 21 614 1.940 0.238 2.000 2.000 0.000 1.00 2.000 1.000 -3.687 11.611 0.010
Mostly_Mild* 22 614 1.777 0.417 2.000 1.846 0.000 1.00 2.000 1.000 -1.327 -0.240 0.017
Knowledge_Score* 23 614 4.435 0.763 5.000 4.581 0.000 1.00 5.000 4.000 -1.507 2.675 0.031
Concern_Self* 24 614 2.446 0.912 2.000 2.433 1.483 1.00 4.000 3.000 -0.041 -0.832 0.037
Likely_to_Get* 25 614 2.549 0.828 3.000 2.561 1.483 1.00 4.000 3.000 -0.190 -0.519 0.033
Likely_Get_Sick* 26 614 3.112 0.995 3.000 3.142 1.483 1.00 5.000 4.000 -0.236 0.022 0.040
Live_Others* 27 614 1.917 0.276 2.000 2.000 0.000 1.00 2.000 1.000 -3.014 7.097 0.011
Class_in_Person* 28 614 1.547 0.498 2.000 1.559 0.000 1.00 2.000 1.000 -0.189 -1.967 0.020
Inperson_Job* 29 614 1.518 0.500 2.000 1.522 0.000 1.00 2.000 1.000 -0.072 -1.998 0.020
Medical_Condition* 30 614 1.145 0.352 1.000 1.057 0.000 1.00 2.000 1.000 2.012 2.052 0.014
Super_Spreader* 31 614 1.223 0.417 1.000 1.154 0.000 1.00 2.000 1.000 1.327 -0.240 0.017
No_Distancing* 32 614 1.337 0.473 1.000 1.297 0.000 1.00 2.000 1.000 0.687 -1.530 0.019
No_Mask* 33 614 1.257 0.438 1.000 1.197 0.000 1.00 2.000 1.000 1.107 -0.775 0.018
Travel* 34 614 1.513 0.500 2.000 1.516 0.000 1.00 2.000 1.000 -0.052 -2.001 0.020
Safe_Live_Others* 35 614 2.218 0.800 2.000 2.215 1.483 1.00 4.000 3.000 0.122 -0.577 0.032
Safe_Attend_Class* 36 614 2.762 0.924 3.000 2.827 1.483 1.00 4.000 3.000 -0.381 -0.669 0.037
Safe_F2F_Job* 37 614 2.992 0.877 3.000 3.077 1.483 1.00 4.000 3.000 -0.608 -0.300 0.035
Safe_Underlying_Condition* 38 614 3.526 0.784 4.000 3.707 0.000 1.00 4.000 3.000 -1.727 2.381 0.032
Safe_Gatherings..50* 39 614 3.603 0.721 4.000 3.770 0.000 1.00 4.000 3.000 -1.896 3.082 0.029
Safe_Socials_No_Distancing* 40 614 3.515 0.746 4.000 3.671 0.000 1.00 4.000 3.000 -1.602 2.188 0.030
Safe_Socials_No_Mask* 41 614 3.562 0.748 4.000 3.730 0.000 1.00 4.000 3.000 -1.821 2.875 0.030
Safe_Travel* 42 614 2.871 0.817 3.000 2.907 1.483 1.00 4.000 3.000 -0.352 -0.387 0.033
Risk_Knowledge_Score 43 614 25.049 4.934 26.000 25.732 4.448 8.00 32.000 24.000 -1.309 1.636 0.199
Concern_Others* 44 614 3.015 0.846 3.000 3.077 1.483 1.00 4.000 3.000 -0.496 -0.467 0.034
Change_Others* 45 614 3.590 0.754 4.000 3.764 0.000 1.00 4.000 3.000 -2.014 3.606 0.030
Definition_Vaccine* 46 614 1.954 0.209 2.000 2.000 0.000 1.00 2.000 1.000 -4.346 16.911 0.008
Safety_Vaccine* 47 614 1.853 0.354 2.000 1.941 0.000 1.00 2.000 1.000 -1.994 1.978 0.014
Better_to_be_Sick* 48 614 1.818 0.386 2.000 1.896 0.000 1.00 2.000 1.000 -1.641 0.693 0.016
Over_Vaccination_Harmful* 49 614 1.557 0.497 2.000 1.571 0.000 1.00 2.000 1.000 -0.229 -1.951 0.020
Herd_Immunity* 50 614 1.811 0.392 2.000 1.888 0.000 1.00 2.000 1.000 -1.585 0.515 0.016
Hesitancy_Score* 51 614 4.993 1.062 5.000 5.148 1.483 1.00 6.000 5.000 -1.071 0.941 0.043
Child_Vax* 52 614 3.871 0.421 4.000 3.994 0.000 1.00 4.000 3.000 -4.042 19.344 0.017
Annual_Flu_Shot* 53 614 1.441 0.497 1.000 1.427 0.000 1.00 2.000 1.000 0.236 -1.948 0.020
Flu_Shot_Reason* 54 614 5.265 2.733 5.000 5.376 4.448 1.00 10.000 9.000 -0.228 -1.283 0.110
Meningitis_Required* 55 614 3.865 0.464 4.000 4.000 0.000 1.00 4.000 3.000 -3.988 17.172 0.019
COVID_Required* 56 614 2.743 1.156 3.000 2.803 1.483 1.00 4.000 3.000 -0.184 -1.476 0.047
Family_Accept* 57 614 2.417 1.051 3.000 2.396 1.483 1.00 4.000 3.000 -0.201 -1.289 0.042
Friends_Accept* 58 614 2.143 1.059 2.000 2.057 1.483 1.00 4.000 3.000 0.207 -1.383 0.043
US_Accept* 59 614 2.651 0.685 3.000 2.661 0.000 1.00 4.000 3.000 -0.342 0.033 0.028
CDC_Freq* 60 614 2.244 0.871 2.000 2.189 1.483 1.00 4.000 3.000 0.350 -0.518 0.035
Local_News_Freq* 61 614 2.132 0.878 2.000 2.075 1.483 1.00 4.000 3.000 0.377 -0.589 0.035
National_News_Freq* 62 614 2.391 0.919 2.000 2.364 1.483 1.00 4.000 3.000 0.108 -0.826 0.037
International_News_Freq* 63 614 1.976 0.871 2.000 1.896 1.483 1.00 4.000 3.000 0.580 -0.397 0.035
Social_Media_Freq* 64 614 2.609 1.050 3.000 2.636 1.483 1.00 4.000 3.000 -0.107 -1.195 0.042
Texas_State_Freq* 65 614 2.441 0.881 2.000 2.427 1.483 1.00 4.000 3.000 0.107 -0.701 0.036
College_Friends_Freq* 66 614 2.160 0.891 2.000 2.083 1.483 1.00 4.000 3.000 0.472 -0.463 0.036
Parents_Freq* 67 614 2.386 0.929 2.000 2.358 1.483 1.00 4.000 3.000 0.236 -0.795 0.038
Professors_Freq* 68 614 1.923 0.850 2.000 1.839 1.483 1.00 4.000 3.000 0.654 -0.226 0.034
Adults_Freq* 69 614 1.839 0.809 2.000 1.744 1.483 1.00 4.000 3.000 0.799 0.217 0.033
Doctor_Freq* 70 614 2.125 0.974 2.000 2.033 1.483 1.00 4.000 3.000 0.520 -0.711 0.039
Frequency_Score 71 614 24.226 6.049 24.000 23.974 5.930 11.00 44.000 33.000 0.448 0.319 0.244
Trust_CDC* 72 614 3.163 0.892 3.000 3.256 1.483 1.00 4.000 3.000 -0.681 -0.604 0.036
Trust_Local* 73 614 2.246 0.709 2.000 2.264 0.000 1.00 4.000 3.000 0.186 -0.140 0.029
Trust_National* 74 614 2.275 0.780 2.000 2.278 1.483 1.00 4.000 3.000 0.140 -0.418 0.031
Trust_International* 75 614 2.322 0.794 2.000 2.321 1.483 1.00 4.000 3.000 0.143 -0.431 0.032
Trust_Social_Media* 76 614 1.702 0.621 2.000 1.650 0.000 1.00 4.000 3.000 0.467 0.174 0.025
Trust_Texas_State* 77 614 2.673 0.782 3.000 2.675 1.483 1.00 4.000 3.000 -0.194 -0.348 0.032
Trust_Friends* 78 614 1.932 0.590 2.000 1.904 0.000 1.00 4.000 3.000 0.254 0.756 0.024
Trust_Parents* 79 614 2.436 0.772 2.000 2.431 1.483 1.00 4.000 3.000 0.159 -0.355 0.031
Trust_Professors* 80 614 2.337 0.703 2.000 2.356 0.000 1.00 4.000 3.000 0.182 -0.138 0.028
Trust_Adults* 81 614 2.010 0.604 2.000 1.998 0.000 1.00 4.000 3.000 0.306 0.737 0.024
Trust_Doctor* 82 614 3.199 0.793 3.000 3.278 1.483 1.00 4.000 3.000 -0.662 -0.291 0.032
Trust_Score 83 614 26.295 5.110 26.000 26.488 4.448 11.00 44.000 33.000 -0.274 0.807 0.206
Politics_5Cat* 84 614 2.003 1.122 2.000 1.880 1.483 1.00 4.000 3.000 0.623 -1.093 0.045
Politics_3Cat* 85 614 1.707 0.761 2.000 1.634 1.483 1.00 3.000 2.000 0.546 -1.091 0.031
Race* 86 614 1.769 0.997 1.000 1.575 0.000 1.00 5.000 4.000 1.450 1.672 0.040
White* 87 614 1.497 0.500 1.000 1.496 0.000 1.00 2.000 1.000 0.013 -2.003 0.020
Hispanic* 88 614 1.360 0.480 1.000 1.325 0.000 1.00 2.000 1.000 0.582 -1.664 0.019
Gender* 89 614 1.534 0.543 2.000 1.514 1.483 1.00 3.000 2.000 0.292 -1.068 0.022
Gender_2Cat* 90 614 1.500 0.500 1.500 1.500 0.741 1.00 2.000 1.000 0.000 -2.003 0.020
Age 91 614 20.300 1.499 20.000 20.270 1.483 18.00 24.000 6.000 0.112 -0.912 0.060
Age_Cat* 92 614 3.296 1.492 3.000 3.270 1.483 1.00 6.000 5.000 0.090 -0.960 0.060
Year* 93 614 2.764 1.105 3.000 2.829 1.483 1.00 4.000 3.000 -0.321 -1.255 0.045
Health_Maj_Min* 94 614 1.384 0.487 1.000 1.356 0.000 1.00 2.000 1.000 0.474 -1.778 0.020
Income* 95 614 2.919 1.497 3.000 2.898 1.483 1.00 5.000 4.000 0.069 -1.428 0.060
Insurance* 96 614 1.816 0.388 2.000 1.894 0.000 1.00 2.000 1.000 -1.627 0.647 0.016
par(mfrow=c(2,2))
boxplot(mydata$Behavior_Reverse, horizontal=TRUE, notch=FALSE, col='red', main="Misbehavior", ylim=c(0,1))
boxplot(mydata$Knowledge_Reverse, horizontal=TRUE, notch=FALSE, col='red', main="(Lack of) Knowledge",ylim=c(0,1))
boxplot(mydata$Risk_Knowledge_Reverse, horizontal=TRUE, notch=FALSE, col='red', main="(Lack of) Risk Knowledge",ylim=c(0,1))
boxplot(mydata$Hesitancy, horizontal=TRUE, notch=FALSE, col='red', main="Hesitancy",ylim=c(0,1))

Gender, Q72

#########################################################################}
options(digits=3)

mydata%>%count(Gender)%>%mutate(perc = n / nrow(mydata)) -> mydf1
mydata%>%count(Gender_2Cat)%>%mutate(perc = n / nrow(mydata)) -> mydf2
myprint(mydf1,colnames(mydf1), "Gender")
Gender
Gender n perc
Female 300 0.489
Male 300 0.489
Other 14 0.023
myprint(mydf2,colnames(mydf2), "Gender")
Gender
Gender_2Cat n perc
Female 307 0.5
Male 307 0.5
colnames(mydf1)=c("Gender", "n", "perc")
colnames(mydf2)=c("Gender", "n", "perc")

a=mybar(mydf1,mydf1$n,mydf1$Gender,mydf1$perc, "Gender")
b=mybar(mydf2,mydf2$n,mydf2$Gender,mydf2$perc,"Gender")

print(ggarrange(a,b,nrow=2))

mybar3(mydata$Gender_2Cat,"Gender")

#########################################################################

Ethnicity / Race, Q71

#########################################################################

mydata%>%count(Race)%>%mutate(perc = n / nrow(mydata)) -> mydf1
mydata%>%count(White)%>%mutate(perc = n / nrow(mydata)) -> mydf2
mydata%>%count(Hispanic)%>%mutate(perc = n / nrow(mydata)) -> mydf3

myprint(mydf1,colnames(mydf1))
Race n perc
  1. White
309 0.503
  1. Hisp. White
205 0.334
  1. Other
49 0.080
  1. Black
35 0.057
  1. Hisp. Black
16 0.026
myprint(mydf2,colnames(mydf2))
White n perc
  1. White
309 0.503
  1. Non-White
305 0.497
myprint(mydf3,colnames(mydf3))
Hispanic n perc
  1. Not Hispanic
393 0.64
  1. Hispanic
221 0.36
a=mybar2(mydf1,mydf1$n,mydf1$Race, mydf1$perc,"Race")
b=mybar(mydf2,mydf2$n,mydf2$White,mydf2$perc,"Race")
d=mybar(mydf3, mydf3$n, mydf3$Hispanic, mydf3$perc, "Hispanic")
print(ggarrange(b,d,nrow=2))

print(a)

mybar3(mydata$Race,"Race", 45)

#########################################################################

Year, Q74

#########################################################################
mydata%>%count(Year)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
Year n perc
  1. Freshman
110 0.179
  1. Sophomore
134 0.218
  1. Junior
161 0.262
  1. Senior
209 0.340
mybar(mydf1,mydf1$n,mydf1$Year,mydf1$perc,"Year")

mybar3(mydata$Year,"Year")

#########################################################################

Income, Q76

#########################################################################
mydata%>%count(Income)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
Income n perc
  1. <$25K
157 0.256
  1. <$50K
110 0.179
  1. <$75K
106 0.173
  1. <$100K
108 0.176
  1. >$100K
133 0.217
mybar(mydf1,mydf1$n,mydf1$Income,mydf1$perc,"Household Income")

mybar3(mydata$Income,"Income")

#########################################################################

Politics, Q70

#########################################################################

mydata%>%count(Politics_5Cat)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
Politics_5Cat n perc
  1. Democrat
294 0.479
  1. Independent
115 0.187
  1. Republican
114 0.186
  1. Other
91 0.148
mybar(mydf1,mydf1$n,mydf1$Politics_5Cat,mydf1$perc,"Politics")

mydata%>%count(Politics_3Cat)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
Politics_3Cat n perc
  1. Democrat
294 0.479
  1. Other
206 0.336
  1. Republican
114 0.186
mybar(mydf1,mydf1$n,mydf1$Politics_3Cat,mydf1$perc,"Politics")

mybar3(mydata$Politics_3Cat,"Politics")

#########################################################################

Health Science Field, Q75

#########################################################################

mydata%>%count(Health_Maj_Min)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
Health_Maj_Min n perc
  1. No
378 0.616
  1. Yes
236 0.384
mybar(mydf1,mydf1$n,mydf1$Health_Maj_Min,mydf1$perc,"Health Science Major / Minor")

mybar3(mydata$Health_Maj_Min,"Health Major or Minor")

#########################################################################

Take Vaccine, Q1

#########################################################################

mydata%>%count(Willingness_4Cat)%>%mutate(perc = n / nrow(mydata)) -> mydf1
mydata%>%count(Willingness_2Cat)%>%mutate(perc = n / nrow(mydata)) -> mydf2
colnames(mydf1)=c("Willingness", "n", "perc")
colnames(mydf2)=c("Willingness", "n", "perc")
myprint(mydf1,colnames(mydf1))
Willingness n perc
  1. Never
65 0.106
  1. If Required
70 0.114
  1. Eventually
228 0.371
  1. ASAP
251 0.409
myprint(mydf2,colnames(mydf2))
Willingness n perc
Unwilling 135 0.22
Willing 479 0.78
a=mybar(mydf1,mydf1$n,mydf1$Willingness,mydf1$perc,"Willingness")
b=mybar(mydf2,mydf2$n,mydf2$Willingness,mydf2$perc,"Willingness")
grid.arrange(a,b,nrow=2)

#########################################################################

Age, Q73

#########################################################################

mydata%>%count(Age)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
Age n perc
18 87 0.142
19 114 0.186
20 137 0.223
21 131 0.213
22 96 0.156
23 47 0.077
24 2 0.003
boxplot(mydata$Age, col="red", main="Age", horizontal=TRUE)

mybar3(mydata$Age_Cat,"Age")

#########################################################################

Insurance, Q77

#########################################################################

mydata%>%count(Insurance)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
Insurance n perc
  1. No
113 0.184
  1. Yes
501 0.816
mybar(mydf1,mydf1$n,mydf1$Insurance,mydf1$perc,"Insurance")

mybar3(mydata$Insurance,"Insurance")

#########################################################################

Vaccine Views, Q40-Q44

#########################################################################

mydata%>%count(Child_Vax)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
Child_Vax n perc
  1. Not Sure
5 0.008
  1. None
5 0.008
  1. Partially
54 0.088
  1. Completely
550 0.896
a=mybar2(mydf1,mydf1$n,mydf1$Child_Vax,mydf1$perc,"Child Vaccinations")

mydata%>%count(Annual_Flu_Shot)%>%mutate(perc = n / nrow(mydata)) -> mydf2
myprint(mydf2, colnames(mydf2))
Annual_Flu_Shot n perc
  1. No
343 0.559
  1. Yes
271 0.441
b=mybar2(mydf2,mydf2$n,mydf2$Annual_Flu_Shot,mydf2$perc,"Annual Flu Shot")


mydata%>%count(Meningitis_Required)%>%mutate(perc = n / nrow(mydata)) -> mydf3
myprint(mydf3, colnames(mydf3))
Meningitis_Required n perc
  1. Illegal & Unethical
6 0.010
  1. Illegal but Ethical
12 0.020
  1. Legal but Unethical
41 0.067
  1. Legal & Ethical
555 0.904
d=mybar2(mydf3,mydf3$n,mydf3$Meningitis_Required,mydf3$perc,"Meningitis Required")

mydata%>%count(COVID_Required)%>%mutate(perc = n / nrow(mydata)) -> mydf4
myprint(mydf4, colnames(mydf4))
COVID_Required n perc
  1. Absolutely Not
110 0.179
  1. Probably Not
179 0.292
  1. Probably Yes
84 0.137
  1. Absolutely Yes
241 0.393
e=mybar2(mydf4,mydf4$n,mydf4$COVID_Required,mydf4$perc,"COVID-19 Required")

print(ggarrange(d,e,ncol=2))

print(ggarrange(a,b,ncol=2))

mybar3(mydata$Child_Vax,"Vaccination as Child", 90)

#########################################################################

Personal Familiarity, Q3-5

#########################################################################
mydata%>%count(Greater_Risk_COVID)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
Greater_Risk_COVID n perc
  1. No
176 0.287
  1. Yes
438 0.713
a=mybar(mydf1,mydf1$n,mydf1$Greater_Risk_COVID,mydf1$perc,"Greater Risk COVID")

mydata%>%count(Had_COVID)%>%mutate(perc = n / nrow(mydata)) -> mydf2
myprint(mydf2, colnames(mydf2))
Had_COVID n perc
  1. No
129 0.21
  1. Yes
485 0.79
b=mybar(mydf2,mydf2$n,mydf2$Had_COVID,mydf2$perc,"Had COVID")

mydata%>%count(Died_COVID)%>%mutate(perc = n / nrow(mydata)) -> mydf3
myprint(mydf3, colnames(mydf3))
Died_COVID n perc
  1. No
511 0.832
  1. Yes
103 0.168
d=mybar(mydf3,mydf3$n,mydf3$Died_COVID,mydf3$perc,"Died COVID")

print(ggarrange(a,b,d, nrow=3))

mybar3(mydata$Greater_Risk_COVID,"Greater Risk COVID")

mybar3(mydata$Had_COVID,"Had COVID")

mybar3(mydata$Died_COVID,"Knew Somebody who Died of COVID")

#########################################################################

Behavior, Q6-8

#########################################################################

mydata%>%count(Wash_Hands)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
Wash_Hands n perc
  1. Almost Never
30 0.049
  1. Sometimes
96 0.156
  1. Often
206 0.336
  1. Always
282 0.459
a=mybar2(mydf1,mydf1$n,mydf1$Wash_Hands,mydf1$perc,"Wash Hands")

mydata%>%count(Wear_Mask)%>%mutate(perc = n / nrow(mydata)) -> mydf2
myprint(mydf2, colnames(mydf2))
Wear_Mask n perc
  1. Almost Never
11 0.018
  1. Sometimes
25 0.041
  1. Often
56 0.091
  1. Always
522 0.850
b=mybar2(mydf2,mydf2$n,mydf2$Wear_Mask,mydf2$perc,"Wear Mask")

mydata%>%count(Social_Distance)%>%mutate(perc = n / nrow(mydata)) -> mydf3
myprint(mydf3, colnames(mydf3))
Social_Distance n perc
  1. Almost Never
22 0.036
  1. Sometimes
68 0.111
  1. Often
230 0.375
  1. Always
294 0.479
d=mybar2(mydf3,mydf3$n,mydf3$Social_Distance,mydf3$perc,"Social Distance")

print(ggarrange(a,b,d, ncol=2))
## $`1`

## 
## $`2`

## 
## attr(,"class")
## [1] "list"      "ggarrange"
mybar3(mydata$Wash_Hands,"Wash Hands", 90)

mybar3(mydata$Wear_Mask, "Wear Mask", 90)

mybar3(mydata$Social_Distance, "Social Distance",90)

#######################################################################

Knowledge, Q9-13

#########################################################################

mydata%>%count(Air_Spread)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
Air_Spread n perc
  1. Incorrect
99 0.161
  1. Correct
515 0.839
a=mybar2(mydf1,mydf1$n,mydf1$Air_Spread,mydf1$perc,"Air Spread")

mydata%>%count(Mask_Effective)%>%mutate(perc = n / nrow(mydata)) -> mydf2
myprint(mydf2, colnames(mydf2))
Mask_Effective n perc
  1. Incorrect
40 0.065
  1. Correct
574 0.935
b=mybar2(mydf2,mydf2$n,mydf2$Mask_Effective,mydf2$perc,"Mask Effective")

mydata%>%count(Asymptomatic_Pose_Risk)%>%mutate(perc = n / nrow(mydata)) -> mydf3
myprint(mydf3, colnames(mydf3))
Asymptomatic_Pose_Risk n perc
  1. Incorrect
34 0.055
  1. Correct
580 0.945
d=mybar2(mydf3,mydf3$n,mydf3$Asymptomatic_Pose_Risk,mydf3$perc,"Asymptomatic Risk")

mydata%>%count(College_Deaths)%>%mutate(perc = n / nrow(mydata)) -> mydf4
myprint(mydf4, colnames(mydf4))
College_Deaths n perc
  1. Incorrect
37 0.06
  1. Correct
577 0.94
e=mybar2(mydf4,mydf4$n,mydf4$College_Deaths,mydf4$perc,"College Deaths")

mydata%>%count(Mostly_Mild)%>%mutate(perc = n / nrow(mydata)) -> mydf5
myprint(mydf5, colnames(mydf5))
Mostly_Mild n perc
  1. Incorrect
137 0.223
  1. Correct
477 0.777
f=mybar2(mydf5,mydf5$n,mydf5$Mostly_Mild,mydf5$perc,"Mostly Mild")

print(ggarrange(a,b,d,e,f))

p1=table(mydata$Knowledge_Score)
p1=t(p1)
myprint(round(100*p1/416,2), c("1 Correct", "2 Correct", "3 Correct", "4 Correct", "5 Correct"))
1 Correct 2 Correct 3 Correct 4 Correct 5 Correct
0.96 2.64 11.1 49.5 83.4
mybar3(mydata$Knowledge_Score,"Knowledge Score", 90)

#########################################################################

Likelihood Assessment, Q15-16

#########################################################################

p2=table(mydata$Likely_to_Get)
p3=table(mydata$Likely_Get_Sick)
p1=t(round(100*p1/614,2))


myt=cbind(p2,p3)/614
## Warning in cbind(p2, p3): number of rows of result is not a multiple of vector
## length (arg 1)
myt=round(100*myt,2)
colnames(myt)=c("Likely to Get COVID", "Likely to Get Sick")
myprint(myt, colnames(myt))
Likely to Get COVID Likely to Get Sick
  1. Already Had
11.2 8.14
  1. Extremely Unlikely
33.2 12.05
  1. Unlikely
45.0 48.21
  1. Likely
10.6 23.62
  1. Extremely Likely
11.2 7.98
mybar3(mydata$Likely_Get_Sick,"Likely to Get Sick")

mybar3(mydata$Likely_to_Get, "Likely to Get COVID")

#########################################################################

Risk Factors, Q17-24

#########################################################################
myd=t(round(describe(mydata$Risk_Score),3))
myprint(myd, "Risk Score")
Risk Score
vars 1.000
n 614.000
mean 0.568
sd 0.201
median 0.625
trimmed 0.578
mad 0.185
min 0.000
max 1.000
range 1.000
skew -0.491
kurtosis -0.395
se 0.008
myt=matrix(rep(0, 16), nrow=2)

for (i in 27:34){
  t=table(mydata[,i])
  myt[,i-26]=t
  print(mybar3(mydata[,i], colnames(mydata)[i]))
}

colnames(myt)=colnames(mydata[, 27:34])
rownames(myt)=c("No", "Yes")
myt=round(myt*100/614, 2)
myt=t(myt)
myprint(myt, colnames(myt))
No Yes
Live_Others 8.31 91.7
Class_in_Person 45.28 54.7
Inperson_Job 48.21 51.8
Medical_Condition 85.50 14.5
Super_Spreader 77.69 22.3
No_Distancing 66.29 33.7
No_Mask 74.27 25.7
Travel 48.70 51.3
#########################################################################

Risk Assessment, Q25-32

#########################################################################
myd=t(round(describe(mydata$Risk_Knowledge_Score),3))
myprint(myd, "Risk Knowledge Score")
Risk Knowledge Score
vars 1.000
n 614.000
mean 25.049
sd 4.934
median 26.000
trimmed 25.732
mad 4.448
min 8.000
max 32.000
range 24.000
skew -1.309
kurtosis 1.636
se 0.199
myt=matrix(rep(0, 4*8), nrow=4)

for (i in 35:42){
  t=table(mydata[,i])
  myt[,i-34]=t
  print(mybar3(mydata[,i], colnames(mydata)[i]), 90)
}

colnames(myt)=colnames(mydata[, 35:42])
rownames(myt)=c("Extremely Safe", "Somewhat Safe", "Somewhat Risky", "Extremely Risky")
myt=round(100*myt/614, 2)
myt=t(myt)
myprint(myt, colnames(myt))
Extremely Safe Somewhat Safe Somewhat Risky Extremely Risky
Safe_Live_Others 18.89 44.95 31.6 4.56
Safe_Attend_Class 11.40 23.13 43.3 22.15
Safe_F2F_Job 7.00 17.75 44.3 30.94
Safe_Underlying_Condition 4.07 6.03 23.1 66.78
Safe_Gatherings..50 2.61 6.19 19.5 71.66
Safe_Socials_No_Distancing 3.09 6.03 27.2 63.68
Safe_Socials_No_Mask 3.42 5.37 22.8 68.40
Safe_Travel 5.37 24.43 47.9 22.31
#########################################################################

Concern Self & Others, Q14, Q33-34

#########################################################################
mydata%>%count(Concern_Self)%>%mutate(perc = n / nrow(mydata)) -> mydf1
mydata%>%count(Concern_Others)%>%mutate(perc = n / nrow(mydata)) -> mydf2
mydata%>%count(Change_Others)%>%mutate(perc = n / nrow(mydata)) -> mydf3
myprint(mydf1,colnames(mydf1))
Concern_Self n perc
  1. Not at all
105 0.171
  1. Slightly
204 0.332
  1. Moderately
231 0.376
  1. Extremely
74 0.121
myprint(mydf2,colnames(mydf2))
Concern_Others n perc
  1. Not at All
29 0.047
  1. Slightly
128 0.208
  1. Moderately
262 0.427
  1. Extremely
195 0.318
myprint(mydf3,colnames(mydf3))
Change_Others n perc
  1. Very Unwilling
25 0.041
  1. Somewhat Unwilling
25 0.041
  1. Somewhat Willing
127 0.207
  1. Willing
437 0.712
a=mybar2(mydf1,mydf1$n,mydf1$Concern_Self,mydf1$perc,"Concern for Self")
b=mybar2(mydf2,mydf2$n,mydf2$Concern_Others,mydf2$perc,"Concern for Others")
d=mybar2(mydf3,mydf3$n,mydf3$Change_Others,mydf3$perc,"Change for Others")
a

b

d

mybar3(mydata$Concern_Self,"Concern for Self")

mybar3(mydata$Concern_Others, "Concern for Others")

#########################################################################

Hesitancy, Q35-39

#########################################################################
myt=matrix(rep(0, 2*5), nrow=2)

for (i in 46:50){
  t=table(mydata[,i])
  myt[,i-45]=t
  print(mybar3(mydata[,i], colnames(mydata)[i]))
}

colnames(myt)=colnames(mydata[, 46:50])
rownames(myt)=c("Incorrect", "Correct")
myt=round(100*myt/614, 2)
myt=t(myt)
myprint(myt, colnames(myt))
Incorrect Correct
Definition_Vaccine 4.56 95.4
Safety_Vaccine 14.66 85.3
Better_to_be_Sick 18.24 81.8
Over_Vaccination_Harmful 44.30 55.7
Herd_Immunity 18.89 81.1
p1=table(mydata$Hesitancy_Score)
p1=t(p1)
myprint(round(100*p1/416,2), c("0 Correct","1 Correct", "2 Correct", "3 Correct", "4 Correct", "5 Correct"))
0 Correct 1 Correct 2 Correct 3 Correct 4 Correct 5 Correct
0.96 3.37 8.89 27.2 49.3 57.9
#########################################################################

Scores ~ Vaccine Willingness

describe(mydata)
##                             vars   n   mean     sd median trimmed    mad   min
## ID                             1 614 307.50 177.39 307.50  307.50 227.58  1.00
## Willingness_4Cat*              2 614   3.08   0.97   3.00    3.23   1.48  1.00
## Willingness_2Cat*              3 614   1.78   0.41   2.00    1.85   0.00  1.00
## Behavior_Reverse               4 614   0.16   0.16   0.09    0.13   0.13  0.00
## Knowledge_Reverse              5 614   0.11   0.15   0.00    0.08   0.00  0.00
## Risk_Knowledge_Reverse         6 614   0.22   0.16   0.19    0.20   0.14  0.00
## Hesitancy                      7 614   0.20   0.21   0.20    0.17   0.30  0.00
## Risk_Score                     8 614   0.57   0.20   0.62    0.58   0.19  0.00
## News_Frequency                 9 614   0.55   0.14   0.55    0.54   0.13  0.25
## Trust                         10 614   0.60   0.12   0.59    0.60   0.10  0.25
## Greater_Risk_COVID*           11 614   1.71   0.45   2.00    1.77   0.00  1.00
## Had_COVID*                    12 614   1.79   0.41   2.00    1.86   0.00  1.00
## Died_COVID*                   13 614   1.17   0.37   1.00    1.09   0.00  1.00
## Wash_Hands*                   14 614   3.21   0.88   3.00    3.32   1.48  1.00
## Wear_Mask*                    15 614   3.77   0.60   4.00    3.94   0.00  1.00
## Social_Distance*              16 614   3.30   0.80   3.00    3.41   1.48  1.00
## Behavior_Score                17 614  10.28   1.76  11.00   10.55   1.48  3.00
## Air_Spread*                   18 614   1.84   0.37   2.00    1.92   0.00  1.00
## Mask_Effective*               19 614   1.93   0.25   2.00    2.00   0.00  1.00
## Asymptomatic_Pose_Risk*       20 614   1.94   0.23   2.00    2.00   0.00  1.00
## College_Deaths*               21 614   1.94   0.24   2.00    2.00   0.00  1.00
## Mostly_Mild*                  22 614   1.78   0.42   2.00    1.85   0.00  1.00
## Knowledge_Score*              23 614   4.43   0.76   5.00    4.58   0.00  1.00
## Concern_Self*                 24 614   2.45   0.91   2.00    2.43   1.48  1.00
## Likely_to_Get*                25 614   2.55   0.83   3.00    2.56   1.48  1.00
## Likely_Get_Sick*              26 614   3.11   1.00   3.00    3.14   1.48  1.00
## Live_Others*                  27 614   1.92   0.28   2.00    2.00   0.00  1.00
## Class_in_Person*              28 614   1.55   0.50   2.00    1.56   0.00  1.00
## Inperson_Job*                 29 614   1.52   0.50   2.00    1.52   0.00  1.00
## Medical_Condition*            30 614   1.14   0.35   1.00    1.06   0.00  1.00
## Super_Spreader*               31 614   1.22   0.42   1.00    1.15   0.00  1.00
## No_Distancing*                32 614   1.34   0.47   1.00    1.30   0.00  1.00
## No_Mask*                      33 614   1.26   0.44   1.00    1.20   0.00  1.00
## Travel*                       34 614   1.51   0.50   2.00    1.52   0.00  1.00
## Safe_Live_Others*             35 614   2.22   0.80   2.00    2.22   1.48  1.00
## Safe_Attend_Class*            36 614   2.76   0.92   3.00    2.83   1.48  1.00
## Safe_F2F_Job*                 37 614   2.99   0.88   3.00    3.08   1.48  1.00
## Safe_Underlying_Condition*    38 614   3.53   0.78   4.00    3.71   0.00  1.00
## Safe_Gatherings..50*          39 614   3.60   0.72   4.00    3.77   0.00  1.00
## Safe_Socials_No_Distancing*   40 614   3.51   0.75   4.00    3.67   0.00  1.00
## Safe_Socials_No_Mask*         41 614   3.56   0.75   4.00    3.73   0.00  1.00
## Safe_Travel*                  42 614   2.87   0.82   3.00    2.91   1.48  1.00
## Risk_Knowledge_Score          43 614  25.05   4.93  26.00   25.73   4.45  8.00
## Concern_Others*               44 614   3.01   0.85   3.00    3.08   1.48  1.00
## Change_Others*                45 614   3.59   0.75   4.00    3.76   0.00  1.00
## Definition_Vaccine*           46 614   1.95   0.21   2.00    2.00   0.00  1.00
## Safety_Vaccine*               47 614   1.85   0.35   2.00    1.94   0.00  1.00
## Better_to_be_Sick*            48 614   1.82   0.39   2.00    1.90   0.00  1.00
## Over_Vaccination_Harmful*     49 614   1.56   0.50   2.00    1.57   0.00  1.00
## Herd_Immunity*                50 614   1.81   0.39   2.00    1.89   0.00  1.00
## Hesitancy_Score*              51 614   4.99   1.06   5.00    5.15   1.48  1.00
## Child_Vax*                    52 614   3.87   0.42   4.00    3.99   0.00  1.00
## Annual_Flu_Shot*              53 614   1.44   0.50   1.00    1.43   0.00  1.00
## Flu_Shot_Reason*              54 614   5.27   2.73   5.00    5.38   4.45  1.00
## Meningitis_Required*          55 614   3.86   0.46   4.00    4.00   0.00  1.00
## COVID_Required*               56 614   2.74   1.16   3.00    2.80   1.48  1.00
## Family_Accept*                57 614   2.42   1.05   3.00    2.40   1.48  1.00
## Friends_Accept*               58 614   2.14   1.06   2.00    2.06   1.48  1.00
## US_Accept*                    59 614   2.65   0.68   3.00    2.66   0.00  1.00
## CDC_Freq*                     60 614   2.24   0.87   2.00    2.19   1.48  1.00
## Local_News_Freq*              61 614   2.13   0.88   2.00    2.08   1.48  1.00
## National_News_Freq*           62 614   2.39   0.92   2.00    2.36   1.48  1.00
## International_News_Freq*      63 614   1.98   0.87   2.00    1.90   1.48  1.00
## Social_Media_Freq*            64 614   2.61   1.05   3.00    2.64   1.48  1.00
## Texas_State_Freq*             65 614   2.44   0.88   2.00    2.43   1.48  1.00
## College_Friends_Freq*         66 614   2.16   0.89   2.00    2.08   1.48  1.00
## Parents_Freq*                 67 614   2.39   0.93   2.00    2.36   1.48  1.00
## Professors_Freq*              68 614   1.92   0.85   2.00    1.84   1.48  1.00
## Adults_Freq*                  69 614   1.84   0.81   2.00    1.74   1.48  1.00
## Doctor_Freq*                  70 614   2.13   0.97   2.00    2.03   1.48  1.00
## Frequency_Score               71 614  24.23   6.05  24.00   23.97   5.93 11.00
## Trust_CDC*                    72 614   3.16   0.89   3.00    3.26   1.48  1.00
## Trust_Local*                  73 614   2.25   0.71   2.00    2.26   0.00  1.00
## Trust_National*               74 614   2.28   0.78   2.00    2.28   1.48  1.00
## Trust_International*          75 614   2.32   0.79   2.00    2.32   1.48  1.00
## Trust_Social_Media*           76 614   1.70   0.62   2.00    1.65   0.00  1.00
## Trust_Texas_State*            77 614   2.67   0.78   3.00    2.67   1.48  1.00
## Trust_Friends*                78 614   1.93   0.59   2.00    1.90   0.00  1.00
## Trust_Parents*                79 614   2.44   0.77   2.00    2.43   1.48  1.00
## Trust_Professors*             80 614   2.34   0.70   2.00    2.36   0.00  1.00
## Trust_Adults*                 81 614   2.01   0.60   2.00    2.00   0.00  1.00
## Trust_Doctor*                 82 614   3.20   0.79   3.00    3.28   1.48  1.00
## Trust_Score                   83 614  26.29   5.11  26.00   26.49   4.45 11.00
## Politics_5Cat*                84 614   2.00   1.12   2.00    1.88   1.48  1.00
## Politics_3Cat*                85 614   1.71   0.76   2.00    1.63   1.48  1.00
## Race*                         86 614   1.77   1.00   1.00    1.58   0.00  1.00
## White*                        87 614   1.50   0.50   1.00    1.50   0.00  1.00
## Hispanic*                     88 614   1.36   0.48   1.00    1.33   0.00  1.00
## Gender*                       89 614   1.53   0.54   2.00    1.51   1.48  1.00
## Gender_2Cat*                  90 614   1.50   0.50   1.50    1.50   0.74  1.00
## Age                           91 614  20.30   1.50  20.00   20.27   1.48 18.00
## Age_Cat*                      92 614   3.30   1.49   3.00    3.27   1.48  1.00
## Year*                         93 614   2.76   1.11   3.00    2.83   1.48  1.00
## Health_Maj_Min*               94 614   1.38   0.49   1.00    1.36   0.00  1.00
## Income*                       95 614   2.92   1.50   3.00    2.90   1.48  1.00
## Insurance*                    96 614   1.82   0.39   2.00    1.89   0.00  1.00
##                                max  range  skew kurtosis   se
## ID                          614.00 613.00  0.00    -1.21 7.16
## Willingness_4Cat*             4.00   3.00 -0.86    -0.24 0.04
## Willingness_2Cat*             2.00   1.00 -1.35    -0.18 0.02
## Behavior_Reverse              0.82   0.82  1.47     2.44 0.01
## Knowledge_Reverse             0.80   0.80  1.51     2.68 0.01
## Risk_Knowledge_Reverse        0.77   0.77  1.31     1.64 0.01
## Hesitancy                     1.00   1.00  1.07     0.94 0.01
## Risk_Score                    1.00   1.00 -0.49    -0.40 0.01
## News_Frequency                1.00   0.75  0.45     0.32 0.01
## Trust                         1.00   0.75 -0.27     0.81 0.00
## Greater_Risk_COVID*           2.00   1.00 -0.94    -1.12 0.02
## Had_COVID*                    2.00   1.00 -1.42     0.02 0.02
## Died_COVID*                   2.00   1.00  1.77     1.15 0.02
## Wash_Hands*                   4.00   3.00 -0.84    -0.19 0.04
## Wear_Mask*                    4.00   3.00 -2.91     8.27 0.02
## Social_Distance*              4.00   3.00 -1.00     0.42 0.03
## Behavior_Score               12.00   9.00 -1.47     2.44 0.07
## Air_Spread*                   2.00   1.00 -1.84     1.38 0.01
## Mask_Effective*               2.00   1.00 -3.52    10.38 0.01
## Asymptomatic_Pose_Risk*       2.00   1.00 -3.88    13.06 0.01
## College_Deaths*               2.00   1.00 -3.69    11.61 0.01
## Mostly_Mild*                  2.00   1.00 -1.33    -0.24 0.02
## Knowledge_Score*              5.00   4.00 -1.51     2.68 0.03
## Concern_Self*                 4.00   3.00 -0.04    -0.83 0.04
## Likely_to_Get*                4.00   3.00 -0.19    -0.52 0.03
## Likely_Get_Sick*              5.00   4.00 -0.24     0.02 0.04
## Live_Others*                  2.00   1.00 -3.01     7.10 0.01
## Class_in_Person*              2.00   1.00 -0.19    -1.97 0.02
## Inperson_Job*                 2.00   1.00 -0.07    -2.00 0.02
## Medical_Condition*            2.00   1.00  2.01     2.05 0.01
## Super_Spreader*               2.00   1.00  1.33    -0.24 0.02
## No_Distancing*                2.00   1.00  0.69    -1.53 0.02
## No_Mask*                      2.00   1.00  1.11    -0.77 0.02
## Travel*                       2.00   1.00 -0.05    -2.00 0.02
## Safe_Live_Others*             4.00   3.00  0.12    -0.58 0.03
## Safe_Attend_Class*            4.00   3.00 -0.38    -0.67 0.04
## Safe_F2F_Job*                 4.00   3.00 -0.61    -0.30 0.04
## Safe_Underlying_Condition*    4.00   3.00 -1.73     2.38 0.03
## Safe_Gatherings..50*          4.00   3.00 -1.90     3.08 0.03
## Safe_Socials_No_Distancing*   4.00   3.00 -1.60     2.19 0.03
## Safe_Socials_No_Mask*         4.00   3.00 -1.82     2.88 0.03
## Safe_Travel*                  4.00   3.00 -0.35    -0.39 0.03
## Risk_Knowledge_Score         32.00  24.00 -1.31     1.64 0.20
## Concern_Others*               4.00   3.00 -0.50    -0.47 0.03
## Change_Others*                4.00   3.00 -2.01     3.61 0.03
## Definition_Vaccine*           2.00   1.00 -4.35    16.91 0.01
## Safety_Vaccine*               2.00   1.00 -1.99     1.98 0.01
## Better_to_be_Sick*            2.00   1.00 -1.64     0.69 0.02
## Over_Vaccination_Harmful*     2.00   1.00 -0.23    -1.95 0.02
## Herd_Immunity*                2.00   1.00 -1.59     0.51 0.02
## Hesitancy_Score*              6.00   5.00 -1.07     0.94 0.04
## Child_Vax*                    4.00   3.00 -4.04    19.34 0.02
## Annual_Flu_Shot*              2.00   1.00  0.24    -1.95 0.02
## Flu_Shot_Reason*             10.00   9.00 -0.23    -1.28 0.11
## Meningitis_Required*          4.00   3.00 -3.99    17.17 0.02
## COVID_Required*               4.00   3.00 -0.18    -1.48 0.05
## Family_Accept*                4.00   3.00 -0.20    -1.29 0.04
## Friends_Accept*               4.00   3.00  0.21    -1.38 0.04
## US_Accept*                    4.00   3.00 -0.34     0.03 0.03
## CDC_Freq*                     4.00   3.00  0.35    -0.52 0.04
## Local_News_Freq*              4.00   3.00  0.38    -0.59 0.04
## National_News_Freq*           4.00   3.00  0.11    -0.83 0.04
## International_News_Freq*      4.00   3.00  0.58    -0.40 0.04
## Social_Media_Freq*            4.00   3.00 -0.11    -1.20 0.04
## Texas_State_Freq*             4.00   3.00  0.11    -0.70 0.04
## College_Friends_Freq*         4.00   3.00  0.47    -0.46 0.04
## Parents_Freq*                 4.00   3.00  0.24    -0.80 0.04
## Professors_Freq*              4.00   3.00  0.65    -0.23 0.03
## Adults_Freq*                  4.00   3.00  0.80     0.22 0.03
## Doctor_Freq*                  4.00   3.00  0.52    -0.71 0.04
## Frequency_Score              44.00  33.00  0.45     0.32 0.24
## Trust_CDC*                    4.00   3.00 -0.68    -0.60 0.04
## Trust_Local*                  4.00   3.00  0.19    -0.14 0.03
## Trust_National*               4.00   3.00  0.14    -0.42 0.03
## Trust_International*          4.00   3.00  0.14    -0.43 0.03
## Trust_Social_Media*           4.00   3.00  0.47     0.17 0.03
## Trust_Texas_State*            4.00   3.00 -0.19    -0.35 0.03
## Trust_Friends*                4.00   3.00  0.25     0.76 0.02
## Trust_Parents*                4.00   3.00  0.16    -0.35 0.03
## Trust_Professors*             4.00   3.00  0.18    -0.14 0.03
## Trust_Adults*                 4.00   3.00  0.31     0.74 0.02
## Trust_Doctor*                 4.00   3.00 -0.66    -0.29 0.03
## Trust_Score                  44.00  33.00 -0.27     0.81 0.21
## Politics_5Cat*                4.00   3.00  0.62    -1.09 0.05
## Politics_3Cat*                3.00   2.00  0.55    -1.09 0.03
## Race*                         5.00   4.00  1.45     1.67 0.04
## White*                        2.00   1.00  0.01    -2.00 0.02
## Hispanic*                     2.00   1.00  0.58    -1.66 0.02
## Gender*                       3.00   2.00  0.29    -1.07 0.02
## Gender_2Cat*                  2.00   1.00  0.00    -2.00 0.02
## Age                          24.00   6.00  0.11    -0.91 0.06
## Age_Cat*                      6.00   5.00  0.09    -0.96 0.06
## Year*                         4.00   3.00 -0.32    -1.25 0.04
## Health_Maj_Min*               2.00   1.00  0.47    -1.78 0.02
## Income*                       5.00   4.00  0.07    -1.43 0.06
## Insurance*                    2.00   1.00 -1.63     0.65 0.02
a=boxplot(mydata$Behavior_Reverse~mydata$Willingness_4Cat, plot=F)$stats
a1=boxplot(mydata$Knowledge_Reverse~mydata$Willingness_4Cat, plot=F)$stats
a2=boxplot(mydata$Risk_Knowledge_Reverse~mydata$Willingness_4Cat, plot=F)$stats
a3=boxplot(mydata$Risk_Score~mydata$Willingness_4Cat, plot=F)$stats
a4=boxplot(mydata$Hesitancy~mydata$Willingness_4Cat, plot=F)$stats
colnames(a)=colnames(a1)=colnames(a2)=colnames(a3)=colnames(a4)=levels(mydata$Willingness_4Cat)
rownames(a)=rownames(a1)=rownames(a2)=rownames(a3)=rownames(a4)= c("Min", "1st Quartile", "Median", "3d Quartile", "Max")
a%>%kbl(caption="Behavior")
Behavior
  1. Never
  1. If Required
  1. Eventually
  1. ASAP
Min 0.000 0.000 0.000 0.000
1st Quartile 0.091 0.091 0.091 0.000
Median 0.273 0.182 0.091 0.091
3d Quartile 0.455 0.273 0.227 0.182
Max 0.818 0.545 0.364 0.455
a1%>%kbl(caption="Knowledge")
Knowledge
  1. Never
  1. If Required
  1. Eventually
  1. ASAP
Min 0.0 0.0 0.0 0.0
1st Quartile 0.0 0.0 0.0 0.0
Median 0.2 0.0 0.0 0.0
3d Quartile 0.4 0.2 0.2 0.2
Max 0.8 0.4 0.4 0.4
a2%>%kbl(caption="Risk Knowledge")
Risk Knowledge
  1. Never
  1. If Required
  1. Eventually
  1. ASAP
Min 0.000 0.000 0.000 0.000
1st Quartile 0.194 0.161 0.097 0.097
Median 0.323 0.226 0.161 0.161
3d Quartile 0.581 0.355 0.258 0.258
Max 0.774 0.581 0.484 0.484
a3%>%kbl(caption="Risk Score")
Risk Score
  1. Never
  1. If Required
  1. Eventually
  1. ASAP
Min 0.125 0.000 0.125 0.125
1st Quartile 0.250 0.375 0.500 0.500
Median 0.500 0.625 0.625 0.625
3d Quartile 0.750 0.750 0.750 0.750
Max 0.875 0.875 0.875 1.000
a4%>%kbl(caption="Hesitancy")
Hesitancy
  1. Never
  1. If Required
  1. Eventually
  1. ASAP
Min 0.0 0.0 0.0 0.0
1st Quartile 0.2 0.2 0.0 0.0
Median 0.4 0.3 0.2 0.0
3d Quartile 0.6 0.4 0.2 0.2
Max 1.0 0.6 0.4 0.4

Acceptance, Q45-47

table(mydata$Willingness_4Cat, mydata$Family_Accept)%>%kbl(caption="Family Accept")
Family Accept
Somewhat Likely Somewhat Unlikely Very Likely Very Unlikely
  1. Never
4 14 1 46
  1. If Required
30 18 3 19
  1. Eventually
106 37 73 12
  1. ASAP
42 7 197 5
table(mydata$Willingness_4Cat, mydata$Friends_Accept)%>%kbl(caption="Friends Accept")
Friends Accept
Somewhat Likely Somewhat Unlikely Very Likely Very Unlikely
  1. Never
10 16 6 33
  1. If Required
24 24 10 12
  1. Eventually
127 32 60 9
  1. ASAP
85 22 138 6
table(mydata$Willingness_4Cat, mydata$US_Accept)%>%kbl(caption="US Accept")
US Accept
  1. 0 to 25%
  1. 25 to 50%
  1. 50 to 75%
  1. 75 to 100%
  1. Never
14 36 14 1
  1. If Required
7 42 20 1
  1. Eventually
6 73 143 6
  1. ASAP
3 47 165 36

Frequency Versus Trust, Q48-58,Q59-Q69

orderit=function(i){
  mydata%>%count(mydata[,i])%>%mutate(perc = n / nrow(mydata)) -> mydf
  mydata%>%count(mydata[,i+12])%>%mutate(perc2 = n / nrow(mydata)) -> mydf1
  mydf[,1]=mydf1[,1]=NULL
  mydf=cbind(mydf,mydf1)
  colnames(mydf)=c("n_Freq", "percent_Freq", "n_Trust", "percent_Trust")
  return(mydf)
  }

mymat=matrix(rep(0, 10*4), ncol=4)

for (i in 60:69){
  temp=orderit(i)
  temp$Frequency=c("1 Never", "2 Occasionally", 
                 "3 Often", "4 All the Time")
  temp$Trust=c("1 No Trust", "2 Some Trust", 
                   "3 Trust", "4 Complete Trust")
  print(temp)%>%kbl()%>%kable_classic()
  sub=temp[,c(1,3)]
  z=chisq.test(sub)
  mymat[i-59,1]=colnames(mydata)[i]
  mymat[i-59,2]=round(z$statistic, 3)
  mymat[i-59,3]=z$parameter
  mymat[i-59,4]=round(z$p.value,3)

 b=ggplot(temp, aes(x='',y=n_Freq, fill=Frequency, 
                    label=paste0(n_Freq,'\n', " (", scales::percent(percent_Freq), ")")))+
  geom_bar(position="stack", stat="identity")+
  coord_flip()+
  geom_text(size = 3, position = position_stack(vjust = 0.5))+
  xlab("")+
  ylab("Frequency")+
  theme(legend.position="bottom")+
  theme(legend.title = element_blank())+
  ggtitle(colnames(mydata)[i])
 
d=ggplot(temp, aes(x='',y=n_Trust, fill=Trust, 
                   label=paste0(n_Trust,'\n', " (", scales::percent(percent_Trust), ")")))+
    geom_bar(position="stack", stat="identity")+
  coord_flip()+
  geom_text(size = 3, position = position_stack(vjust = 0.5))+
  xlab("")+
  ylab("Trust")+
  theme(legend.position="bottom")+
  theme(legend.title = element_blank())+
  ggtitle(colnames(mydata)[i+12])

print(ggarrange(b,d,nrow=2))
  
}
##   n_Freq percent_Freq n_Trust percent_Trust      Frequency            Trust
## 1    119       0.1938      26        0.0423        1 Never       1 No Trust
## 2    283       0.4609     124        0.2020 2 Occasionally     2 Some Trust
## 3    155       0.2524     188        0.3062        3 Often          3 Trust
## 4     57       0.0928     276        0.4495 4 All the Time 4 Complete Trust

##   n_Freq percent_Freq n_Trust percent_Trust      Frequency            Trust
## 1    157       0.2557      76        0.1238        1 Never       1 No Trust
## 2    263       0.4283     332        0.5407 2 Occasionally     2 Some Trust
## 3    150       0.2443     185        0.3013        3 Often          3 Trust
## 4     44       0.0717      21        0.0342 4 All the Time 4 Complete Trust

##   n_Freq percent_Freq n_Trust percent_Trust      Frequency            Trust
## 1    110        0.179      93        0.1515        1 Never       1 No Trust
## 2    230        0.375     291        0.4739 2 Occasionally     2 Some Trust
## 3    198        0.322     198        0.3225        3 Often          3 Trust
## 4     76        0.124      32        0.0521 4 All the Time 4 Complete Trust

##   n_Freq percent_Freq n_Trust percent_Trust      Frequency            Trust
## 1    204       0.3322      86        0.1401        1 Never       1 No Trust
## 2    257       0.4186     284        0.4625 2 Occasionally     2 Some Trust
## 3    117       0.1906     204        0.3322        3 Often          3 Trust
## 4     36       0.0586      40        0.0651 4 All the Time 4 Complete Trust

##   n_Freq percent_Freq n_Trust percent_Trust      Frequency            Trust
## 1    111        0.181     233       0.37948        1 Never       1 No Trust
## 2    172        0.280     335       0.54560 2 Occasionally     2 Some Trust
## 3    177        0.288      42       0.06840        3 Often          3 Trust
## 4    154        0.251       4       0.00651 4 All the Time 4 Complete Trust

##   n_Freq percent_Freq n_Trust percent_Trust      Frequency            Trust
## 1     86        0.140      41        0.0668        1 Never       1 No Trust
## 2    247        0.402     198        0.3225 2 Occasionally     2 Some Trust
## 3    205        0.334     296        0.4821        3 Often          3 Trust
## 4     76        0.124      79        0.1287 4 All the Time 4 Complete Trust

##   n_Freq percent_Freq n_Trust percent_Trust      Frequency            Trust
## 1    145       0.2362     124       0.20195        1 Never       1 No Trust
## 2    283       0.4609     413       0.67264 2 Occasionally     2 Some Trust
## 3    129       0.2101      72       0.11726        3 Often          3 Trust
## 4     57       0.0928       5       0.00814 4 All the Time 4 Complete Trust

##   n_Freq percent_Freq n_Trust percent_Trust      Frequency            Trust
## 1    104        0.169      56        0.0912        1 Never       1 No Trust
## 2    257        0.419     285        0.4642 2 Occasionally     2 Some Trust
## 3    165        0.269     222        0.3616        3 Often          3 Trust
## 4     88        0.143      51        0.0831 4 All the Time 4 Complete Trust

##   n_Freq percent_Freq n_Trust percent_Trust      Frequency            Trust
## 1    215       0.3502      56        0.0912        1 Never       1 No Trust
## 2    263       0.4283     322        0.5244 2 Occasionally     2 Some Trust
## 3    104       0.1694     209        0.3404        3 Often          3 Trust
## 4     32       0.0521      27        0.0440 4 All the Time 4 Complete Trust

##   n_Freq percent_Freq n_Trust percent_Trust      Frequency            Trust
## 1    231        0.376     102        0.1661        1 Never       1 No Trust
## 2    278        0.453     411        0.6694 2 Occasionally     2 Some Trust
## 3     78        0.127      94        0.1531        3 Often          3 Trust
## 4     27        0.044       7        0.0114 4 All the Time 4 Complete Trust

colnames(mymat)=c("Freq/Trust Variable", "Chi Squared", "df", "p.value")
myprint(mymat,colnames(mymat))
Freq/Trust Variable Chi Squared df p.value
CDC_Freq 268.966 3 0
Local_News_Freq 47.956 3 0
National_News_Freq 26.492 3 0
International_News_Freq 73.151 3 0
Social_Media_Freq 321.296 3 0
Texas_State_Freq 37.927 3 0
College_Friends_Freq 85.698 3 0
Parents_Freq 34.091 3 0
Professors_Freq 134.886 3 0
Adults_Freq 88.899 3 0

Vaccine Views, Q40-Q44

#########################################################################

mydata%>%count(Child_Vax)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
Child_Vax n perc
  1. Not Sure
5 0.008
  1. None
5 0.008
  1. Partially
54 0.088
  1. Completely
550 0.896
a=mybar2(mydf1,mydf1$n,mydf1$Child_Vax,mydf1$perc,"Child Vaccinations")

mydata%>%count(Annual_Flu_Shot)%>%mutate(perc = n / nrow(mydata)) -> mydf2
myprint(mydf2, colnames(mydf2))
Annual_Flu_Shot n perc
  1. No
343 0.559
  1. Yes
271 0.441
b=mybar2(mydf2,mydf2$n,mydf2$Annual_Flu_Shot,mydf2$perc,"Annual Flu Shot")


mydata%>%count(Meningitis_Required)%>%mutate(perc = n / nrow(mydata)) -> mydf3
myprint(mydf3, colnames(mydf3))
Meningitis_Required n perc
  1. Illegal & Unethical
6 0.010
  1. Illegal but Ethical
12 0.020
  1. Legal but Unethical
41 0.067
  1. Legal & Ethical
555 0.904
d=mybar2(mydf3,mydf3$n,mydf3$Meningitis_Required,mydf3$perc,"Meningitis Required")

mydata%>%count(COVID_Required)%>%mutate(perc = n / nrow(mydata)) -> mydf4
myprint(mydf4, colnames(mydf4))
COVID_Required n perc
  1. Absolutely Not
110 0.179
  1. Probably Not
179 0.292
  1. Probably Yes
84 0.137
  1. Absolutely Yes
241 0.393
e=mybar2(mydf4,mydf4$n,mydf4$COVID_Required,mydf4$perc,"COVID-19 Required")

print(ggarrange(d,e,ncol=2))

print(ggarrange(a,b,ncol=2))

mybar3(mydata$Child_Vax,"Vaccination as Child", 90)

#########################################################################

Correlations

We look at hierarchically-clustered correlations.

#########################################################################


for (i in 4:9){mydata[,i]=as.numeric(mydata[,i])}
mydata$White=as.numeric(mydata$White)
for (i in 89:92){mydata[, i]=as.numeric(mydata[,i])}

mycorr=cor(mydata[, c(4:9)])
corfunction(mycorr)

#########################################################################

Kernel Density Estimates

#########################################################################

mysub=subset(mydata, select=c(Behavior_Reverse, Knowledge_Reverse, Risk_Knowledge_Reverse,  Hesitancy,  News_Frequency, Trust)) #get a subset for plotting
colnames(mysub)=c(  "Behavior %", "Knowledge %", "Risk Knowledge%", "Hesitancy %", "News Freq %", "Trust News %") #set column names
kdepairs(mysub) #plot

#########################################################################

Frequency of Choice

#########################################################################

ASAP=mydata[mydata$Willingness_4Cat=="4. ASAP",60:70]
Eventually=mydata[mydata$Willingness_4Cat=="3. Eventually",60:70]
IfRequired=mydata[mydata$Willingness_4Cat=="2. If Required",60:70]
Never=mydata[mydata$Willingness_4Cat=="1. Never",60:70]
a=describe(ASAP)$mean
b=describe(Eventually)$mean
d=describe(IfRequired)$mean
e=describe(Never)$mean
newdf=round(data.frame(cbind(a,b,d,e)),3)
colnames(newdf)=c("ASAP", "Eventually", "If_Required", "Never")
rownames(newdf)=colnames(mydata)[60:70]
newdf
##                         ASAP Eventually If_Required Never
## CDC_Freq                2.39       2.17        2.03  2.15
## Local_News_Freq         2.23       2.09        1.91  2.14
## National_News_Freq      2.59       2.28        2.11  2.32
## International_News_Freq 2.15       1.83        1.83  1.97
## Social_Media_Freq       2.62       2.64        2.61  2.45
## Texas_State_Freq        2.50       2.44        2.56  2.11
## College_Friends_Freq    2.22       2.16        2.13  1.94
## Parents_Freq            2.45       2.30        2.44  2.38
## Professors_Freq         2.04       1.86        1.96  1.68
## Adults_Freq             1.94       1.75        1.80  1.80
## Doctor_Freq             2.26       1.99        2.09  2.11
subdf=newdf$ASAP[1:11]
names(subdf)=row.names(newdf)[1:11]
subdf=sort(subdf, decreasing=TRUE)

subdf1=newdf$Eventually[1:11]
names(subdf1)=row.names(newdf)[1:11]
subdf1=sort(subdf1, decreasing=TRUE)

subdf2=newdf$If_Required[1:11]
names(subdf2)=row.names(newdf)[1:11]
subdf2=sort(subdf2, decreasing=TRUE)

subdf3=newdf$Never[1:11]
names(subdf3)=row.names(newdf)[1:11]
subdf3=sort(subdf3, decreasing=TRUE)

ASAP=names(subdf[1:5])
Eventually=names(subdf1[1:5])
If_Required=names(subdf2[1:5])
Never=names(subdf3[1:5])

top5=as.data.frame(cbind(ASAP, Eventually, If_Required, Never))
top5%>%kbl()%>%kable_classic(html_font = "Cambria")
ASAP Eventually If_Required Never
Social_Media_Freq Social_Media_Freq Social_Media_Freq Social_Media_Freq
National_News_Freq Texas_State_Freq Texas_State_Freq Parents_Freq
Texas_State_Freq Parents_Freq Parents_Freq National_News_Freq
Parents_Freq National_News_Freq College_Friends_Freq CDC_Freq
CDC_Freq CDC_Freq National_News_Freq Local_News_Freq
#########################################################################

Cross-Tab Tests

mywilcox=function(x){
  c(wilcox.test(x~Willingness_2Cat, data=mydata)$statistic,
  wilcox.test(x~Willingness_2Cat, data=mydata)$p.value, 
  -1*cor(x, as.numeric(mydata$Willingness_2Cat)))
}

mymat=matrix(rep(0,5*3), nrow=5)
mymat[1,1:3]=mywilcox(mydata$Behavior_Reverse)
mymat[2,1:3]=mywilcox(mydata$Knowledge_Reverse)
mymat[3,1:3]=mywilcox(mydata$Risk_Knowledge_Reverse)
mymat[4,1:3]=mywilcox(mydata$Risk_Score)
mymat[5,1:3]=mywilcox(mydata$Hesitancy)

colnames(mymat)=c("Wilcoxon's W", "p", "Point Bi-Serial r")
rownames(mymat)=c("Misbehavior", "Lack of Knowledge", "Lack of Risk Knowledge", "Risk Score", "Hesitancy")
mymat%>%kbl(caption="Unwillingness")%>%kable_classic()
Unwillingness
Wilcoxon’s W p Point Bi-Serial r
Misbehavior 39869 0.000 0.230
Lack of Knowledge 37379 0.002 0.184
Lack of Risk Knowledge 43337 0.000 0.297
Risk Score 26725 0.002 -0.159
Hesitancy 50161 0.000 0.452

Models

Multinomial Y1

options(knitr.kable.NA = '')
#################################################################
mydata=read.csv("D:/Emily/data2.csv", stringsAsFactors = TRUE)
mybase=multinom(relevel(Willingness_4Cat, ref="4. ASAP") ~1, data=mydata)
## # weights:  8 (3 variable)
## initial  value 851.184738 
## final  value 748.367556 
## converged
mymultiA=multinom(relevel(Willingness_4Cat, ref="4. ASAP") ~ 
                      relevel(White, ref="0. White")+
                      relevel(Hispanic, ref="0. Not Hispanic")+
                      relevel(Gender_2Cat, ref="Male")+
                      relevel(Year, ref="4. Senior")+ #colinear with Age
                      relevel(Health_Maj_Min, ref="0. No"), data=mydata)
## # weights:  36 (24 variable)
## initial  value 851.184738 
## iter  10 value 730.791310
## iter  20 value 722.101742
## final  value 722.059439 
## converged
mymultiB=multinom(relevel(Willingness_4Cat, ref="4. ASAP") ~ 
                      relevel(White, ref="0. White")+
                      relevel(Hispanic, ref="0. Not Hispanic")+
                      relevel(Gender_2Cat, ref="Male")+
                      relevel(Year, ref="4. Senior")+ #colinear with Age
                      relevel(Health_Maj_Min, ref="0. No")+
                      relevel(Income, ref="5.  >$100K")+
                      relevel(Insurance, ref="0. No"), data=mydata)
## # weights:  56 (39 variable)
## initial  value 851.184738 
## iter  10 value 727.495372
## iter  20 value 717.854053
## iter  30 value 717.288983
## final  value 717.287622 
## converged
mymultiC=multinom(relevel(Willingness_4Cat, ref="4. ASAP") ~ 
                      relevel(White, ref="0. White")+
                      relevel(Hispanic, ref="0. Not Hispanic")+
                      relevel(Gender_2Cat, ref="Male")+
                      relevel(Year, ref="4. Senior")+ #colinear with Age
                      relevel(Health_Maj_Min, ref="0. No")+
                      relevel(Income, ref="5.  >$100K")+
                      relevel(Insurance, ref="0. No")+
                      relevel(Politics_3Cat, ref="1. Democrat"), 
                      data=mydata)
## # weights:  64 (45 variable)
## initial  value 851.184738 
## iter  10 value 698.399002
## iter  20 value 682.852284
## iter  30 value 681.626896
## iter  40 value 681.565820
## final  value 681.565737 
## converged
mymultiD=multinom(relevel(Willingness_4Cat, ref="4. ASAP") ~ 
                      relevel(White, ref="0. White")+
                      relevel(Hispanic, ref="0. Not Hispanic")+
                      relevel(Gender_2Cat, ref="Male")+
                      relevel(Year, ref="4. Senior")+ #colinear with Age
                      relevel(Health_Maj_Min, ref="0. No")+
                      relevel(Income, ref="5.  >$100K")+
                      relevel(Insurance, ref="0. No")+
                      relevel(Politics_3Cat, ref="1. Democrat")+
                      relevel(Annual_Flu_Shot, ref="1. Yes")+
                      Behavior_Reverse+
                      Knowledge_Reverse+
                      Risk_Knowledge_Reverse+
                      Hesitancy, data=mydata)
## # weights:  84 (60 variable)
## initial  value 851.184738 
## iter  10 value 636.155028
## iter  20 value 609.678109
## iter  30 value 603.717047
## iter  40 value 603.500287
## final  value 603.497270 
## converged
myresults=function(x){
  mys=summary(x)$coefficients/summary(x)$standard.errors
  p=round((1 - pnorm(abs(mys), 0, 1)) * 2,3)
  coef=t(round(exp(coef(x)),3))
  coef=pfunction(t(round((1 - pnorm(abs(mys), 0, 1)) * 2,3)), coef)
  return(coef)}

mymat=matrix(rep(NA, 20*3*4), nrow=20)
mymat[1:8, 1:3]=myresults(mymultiA)
mymat[1:13,4:6]=myresults(mymultiB)
mymat[1:15,7:9]=myresults(mymultiC)
mymat[1:20,10:12]=myresults(mymultiD)

mymat=as.data.frame(mymat)

rownames(mymat)=c("Intercept", "Non-White","Hispanic",
                   "Female","Freshman","Sophomore", "Junior",
                   "Health Major or Minor, Yes",
                "<25K", "25 to 50K", "50 to 75K", "75 to 100K",  
                "Insured, Yes", 
                "Other Party or Independent", "Republican","Flu Shot",
                "Misbehavior Score", "Ignorance Score","Risk Knowledge",
                "Hesitancy Score")

colnames(mymat)=rep(c("Eventually", "If Required", "Never"),4)

mymat%>%kbl(caption="Odds Ratios and p-values")%>%kable_classic(html_font = "Cambria")
Odds Ratios and p-values
Eventually If Required Never Eventually If Required Never Eventually If Required Never Eventually If Required Never
Intercept 0.317*** 0.108*** 1.005 0.388+ 0.119*** 0.981 0.046*** 0.058*** 0.749 0.001*** 0.005*** 0.349*
Non-White 1.086 1.114 0.664 1.051 1.132 0.688 1.912 1.428 0.731 1.812 1.26 0.666
Hispanic 0.317* 1.018 1.477 0.313* 1.004 1.472 0.217** 0.939 1.47 0.296* 1.125 1.652
Female 1.368 2.616** 1.523* 1.334 2.613** 1.542* 2.036* 3.251*** 1.691** 3.777** 4.297*** 1.879**
Freshman 0.488+ 1.202 0.604+ 0.511 1.226 0.597+ 0.36* 1.05 0.583+ 0.192** 0.655 0.513*
Sophomore 0.379* 1.137 0.678 0.388* 1.121 0.701 0.303** 1.062 0.685 0.227** 0.931 0.683
Junior 0.824 1.026 0.8 0.814 1.016 0.796 0.866 1.081 0.818 0.636 0.918 0.795
Health Major or Minor, Yes 1.906* 1.87* 0.966 1.947* 1.909* 0.966 2.009* 1.932* 0.975 3.338** 2.647** 1.1
<25K 1.347 0.943 0.948 1.992 1.044 0.949 2.225 0.949 0.932
25 to 50K 0.644 0.899 0.727 0.943 1.078 0.757 1.153 0.968 0.726
50 to 75K 0.576 0.862 0.87 0.761 0.917 0.865 0.809 1.105 0.968
75 to 100K 0.948 1.173 0.663 1.241 1.352 0.679 1.35 1.46 0.688
Insured, Yes 0.87 0.903 1.22 0.891 0.873 1.211 1.367 1.112 1.328
Other Party or Independent 5.211*** 1.642 1.493+ 4.945** 1.536 1.36
Republican 25.119*** 4.694*** 1.511 6.265** 2.022 0.964
Flu Shot 2.561* 3.39*** 1.312
Misbehavior Score 5.935 4.841 8.431*
Ignorance Score 0.955 0.576 1.141
Risk Knowledge 56.605** 3.297 0.587
Hesitancy Score 1050.988*** 179.365*** 11.942***
mytemp=anova(mybase,mymultiA,mymultiB, mymultiC, mymultiD)[,3]
mytemp=1-mytemp/mytemp[1]
names(mytemp)=c("NullModel", "Dem", "DemEcon","DemEconPol", "DemEconPolScores")
mytemp
##        NullModel              Dem          DemEcon       DemEconPol 
##           0.0000           0.0352           0.0415           0.0893 
## DemEconPolScores 
##           0.1936
#########################################################################

Multinomial Best Subsets

model.grid <- function(n){
     n.list <- rep(list(0:1), n)
     expand.grid(n.list)
}

best.subset <- function(y, x.vars, data){
 length(x.vars) %>%
      model.grid %>%
      apply(1, function(x) which(x > 0, arr.ind = TRUE)) %>%
      map(function(x) x.vars[x]) %>%
      .[2:dim(model.grid(length(x.vars)))[1]] %>%
      map(function(x) multinom(paste0(y, " ~ ", paste(x, collapse = "+")), trace=FALSE, data = mydata)) %>%
      map(function(x) AICc(x)) %>%
      do.call(rbind, .) %>%
      cbind(model.grid(length(x.vars))[-1, ], .) 
     }

y=c('Willingness_4Cat')
xvars=c("White","Hispanic", "Gender_2Cat", "Year", "Health_Maj_Min", 
        "Income", "Insurance", "Politics_3Cat", "Annual_Flu_Shot", "Behavior_Reverse", "Knowledge_Reverse", 
        "Risk_Knowledge_Reverse", "Hesitancy")

mysubsets=best.subset(y,xvars,mydata)
colnames(mysubsets)=c(xvars, "AIC")
mysubsets=mysubsets[order(mysubsets$AIC),]
mysubsets[1,]
##      White Hispanic Gender_2Cat Year Health_Maj_Min Income Insurance
## 7061     0        0           1    0              1      0         0
##      Politics_3Cat Annual_Flu_Shot Behavior_Reverse Knowledge_Reverse
## 7061             1               1                1                 0
##      Risk_Knowledge_Reverse Hesitancy  AIC
## 7061                      1         1 1305

Multinomial Final

myresults=function(x){
  mys=summary(x)$coefficients/summary(x)$standard.errors
  p=round((1 - pnorm(abs(mys), 0, 1)) * 2,3)
  coef=round(exp(coef(x)),3)
  coef=pfunction(round((1 - pnorm(abs(mys), 0, 1)) * 2,3), coef)
  return(coef)}

mymultifin=multinom(relevel(Willingness_4Cat, ref="4. ASAP") ~ 
                      relevel(Gender_2Cat, ref="Male")+
                      relevel(Health_Maj_Min, ref="0. No")+
                      relevel(Politics_3Cat, ref="1. Democrat")+
                      relevel(Annual_Flu_Shot, ref="1. Yes")+
                      Behavior_Reverse+
                      Risk_Knowledge_Reverse+
                      Hesitancy, data=mydata)
## # weights:  40 (27 variable)
## initial  value 851.184738 
## iter  10 value 655.525864
## iter  20 value 624.454870
## iter  30 value 623.963405
## final  value 623.962270 
## converged
mymat=t(myresults(mymultifin))

rownames(mymat)=c("Intercept","Gender","Health Major or Minor, Yes",
                "Other Party or Independent", "Republican","Flu Shot",
                "Behavorial Score", "Risk Knowledge", "Hesitancy Score")

colnames(mymat)=c("Eventually", "If Required", "Never")


mymat%>%kbl(caption="Odds Ratios and p-values")%>%kable_classic(html_font = "Cambria")
Odds Ratios and p-values
Eventually If Required Never
Intercept 0.001*** 0.008*** 0.308***
Gender 3.092** 3.884*** 1.815**
Health Major or Minor, Yes 3.265** 2.628** 1.058
Other Party or Independent 4.26** 1.392 1.371
Republican 4.683** 1.8 1.012
Flu Shot 2.817** 3.259*** 1.334
Behavorial Score 12.544* 4.547 10.272**
Risk Knowledge 13.886* 2.991 0.57
Hesitancy Score 843.46*** 145.711*** 8.213***
temp=1-summary(mymultifin)$deviance/summary(mybase)$deviance
names(temp)=c("McFadden Psuedo-R2")
temp
## McFadden Psuedo-R2 
##              0.166
#mypred=as.numeric(predict(mymultifin, mydata))
#table(as.numeric(mydata$Willingness_4Cat),mypred)

Logistic Regression, FULL

mylmbase=glm(relevel(Willingness_2Cat, ref="Willing") ~ 
                      1, data=mydata, family=binomial)
mylmA=glm(relevel(Willingness_2Cat, ref="Willing") ~ 
                      relevel(White, ref="0. White")+
                      relevel(Hispanic, ref="0. Not Hispanic")+
                      relevel(Gender_2Cat, ref="Male")+
                      relevel(Year, ref="4. Senior")+ #colinear with Age
                      relevel(Health_Maj_Min, ref="0. No")
                      , data=mydata, family=binomial)

mylmB=glm(relevel(Willingness_2Cat, ref="Willing") ~ 
                      relevel(White, ref="0. White")+
                      relevel(Hispanic, ref="0. Not Hispanic")+
                      relevel(Gender_2Cat, ref="Male")+
                      relevel(Year, ref="4. Senior")+ #colinear with Age
                      relevel(Health_Maj_Min, ref="0. No")+
                      relevel(Income, ref="5.  >$100K")+
                      relevel(Insurance, ref="0. No"), data=mydata, family=binomial)

mylmC=glm(relevel(Willingness_2Cat, ref="Willing") ~ 
                      relevel(White, ref="0. White")+
                      relevel(Hispanic, ref="0. Not Hispanic")+
                      relevel(Gender_2Cat, ref="Male")+
                      relevel(Year, ref="4. Senior")+ #colinear with Age
                      relevel(Health_Maj_Min, ref="0. No")+
                      relevel(Income, ref="5.  >$100K")+
                      relevel(Insurance, ref="0. No")+
                      relevel(Politics_3Cat, ref="1. Democrat"), 
                      data=mydata, family=binomial)

mylmD=glm(relevel(Willingness_2Cat, ref="Willing") ~ 
                      relevel(White, ref="0. White")+
                      relevel(Hispanic, ref="0. Not Hispanic")+
                      relevel(Gender_2Cat, ref="Male")+
                      relevel(Year, ref="4. Senior")+ #colinear with Age
                      relevel(Health_Maj_Min, ref="0. No")+
                      relevel(Income, ref="5.  >$100K")+
                      relevel(Insurance, ref="0. No")+
                      relevel(Politics_3Cat, ref="1. Democrat")+
                      relevel(Annual_Flu_Shot, ref="1. Yes")+
                      Behavior_Reverse+
                      Knowledge_Reverse+
                      Risk_Knowledge_Reverse+
                      Hesitancy, data=mydata, family=binomial)
#Risk Score makes the model unstable..Better performance / stability without it.



myresults=function(x){
result=noquote(pfunction(round(
  summary(x)$coefficients[,4],3),round(exp(summary(x)$coefficients[,1]),3)))
  return(result)}

mymat=cbind(c(myresults(mylmA), rep("",12)), 
            c(myresults(mylmB), rep("", 7)),
            c(myresults(mylmC), rep("",5)),
      myresults(mylmD))


rownames(mymat)=c("Intercept", "Non-White","Hispanic",
                   "Female","Freshman","Sophomore", "Junior",
                   "Health Major or Minor, Yes",
                "<25K", "25 to 50K", "50 to 75K", "75 to 100K",  
                "Insured, Yes", 
                "Other Party or Independent", "Republican","Flu Shot",
                "Behavorial Score", "Knowledge Score", "Risk Knowledge", "Hesitancy Score")

colnames(mymat)=rep(c("Odds"),4)



mymat%>%kbl(caption="Odds Ratios and p-values")%>%kable_classic(html_font = "Cambria")
Odds Ratios and p-values
Odds Odds Odds Odds
Intercept 0.19*** 0.22*** 0.072*** 0.005***
Non-White 1.32 1.294 1.835* 1.748
Hispanic 0.524* 0.518* 0.458* 0.558
Female 1.555* 1.529* 1.997** 2.808***
Freshman 1.008 1.052 0.866 0.612
Sophomore 0.83 0.822 0.769 0.729
Junior 1.016 1.013 1.077 0.938
Health Major or Minor, Yes 1.9** 1.932** 1.955** 2.665***
<25K 1.17 1.412 1.337
25 to 50K 0.91 1.18 1.234
50 to 75K 0.783 0.906 1.031
75 to 100K 1.293 1.588 1.746
Insured, Yes 0.808 0.797 1.003
Other Party or Independent 2.021** 1.787*
Republican 7.401*** 2.832**
Flu Shot 2.658***
Behavorial Score 1.755
Knowledge Score 0.671
Risk Knowledge 13.294**
Hesitancy Score 80.386***
#########################################################################


nagelkerke=function(mod){ 1 - mod$deviance / mod$null.deviance}
A=nagelkerke(mylmA)
B=nagelkerke(mylmB)
C=nagelkerke(mylmC)
D=nagelkerke(mylmD)
temp=round(rbind(A,B,C,D), 3)
rownames(temp)=c("Demographics Only", "Plus Income/Insurance", "Plus Politics", "Full")
myprint(temp,"Nagelkerke Psuedo R^2")
Nagelkerke Psuedo R^2
Demographics Only 0.034
Plus Income/Insurance 0.040
Plus Politics 0.123
Full 0.300
mytemp=anova(mybase,mymultiA,mymultiB, mymultiC, mymultiD)[,3]
mytemp=1-mytemp/mytemp[1]
names(mytemp)=c("NullModel", "Dem", "DemEcon","DemEconPol", "DemEconPolScores")
mytemp
##        NullModel              Dem          DemEcon       DemEconPol 
##           0.0000           0.0352           0.0415           0.0893 
## DemEconPolScores 
##           0.1936

LR Best Subsets

best.subset2 <- function(y, x.vars, data){
 length(x.vars) %>%
      model.grid %>%
      apply(1, function(x) which(x > 0, arr.ind = TRUE)) %>%
      map(function(x) x.vars[x]) %>%
      .[2:dim(model.grid(length(x.vars)))[1]] %>%
      map(function(x) 
        glm(paste0(y, " ~ ", paste(x, collapse = "+")), trace=FALSE, 
            data=mydata, family=binomial)) %>%
      map(function(x) AICc(x)) %>%
      do.call(rbind, .) %>%
      cbind(model.grid(length(x.vars))[-1, ], .) 
     }

y2=c('Willingness_2Cat')
xvars2=c("White","Hispanic", "Gender_2Cat", "Year", "Health_Maj_Min", 
        "Income", "Insurance", "Politics_3Cat", "Annual_Flu_Shot", "Behavior_Reverse", "Knowledge_Reverse", 
        "Risk_Score", "Risk_Knowledge_Reverse", "Hesitancy") 
#Risk Score makes model unstable...Much lower AIC without it.

mysubsets2=best.subset2(y2,xvars2,mydata)
colnames(mysubsets2)=c(xvars, "AIC")
mysubsets2=mysubsets2[order(mysubsets2$AIC),]
mysubsets2[1,]
##   White Hispanic Gender_2Cat Year Health_Maj_Min Income Insurance Politics_3Cat
## 2     1        0           0    0              0      0         0             0
##   Annual_Flu_Shot Behavior_Reverse Knowledge_Reverse Risk_Knowledge_Reverse
## 2               0                0                 0                      0
##   Hesitancy AIC  NA
## 2         0   0 651

LR, Final Model

#########################################################################
mylm1=glm(relevel(Willingness_2Cat, ref="Willing") ~
                      relevel(Gender_2Cat, ref="Male")+
                      relevel(Health_Maj_Min, ref="0. No")+
                      relevel(Politics_3Cat, ref="1. Democrat")+
                      relevel(Annual_Flu_Shot, ref="1. Yes")+
                      Risk_Knowledge_Reverse+Hesitancy, data=mydata, family=binomial)

mys=summary(mylm1)

z =mys$coefficients[,3]
p <- round((1 - pnorm(abs(z), 0, 1)) * 2,3)
Odds_Ratio=round(exp(coef(mylm1)),3)
result=rbind(Odds_Ratio,p)
result=t(result)
rownames(result)=c("Intercept", "Female","Health Major or Minor, Yes","Other Party or Independent", "Republican", "Annual Flu Shot",
                   "Risk Knowledge Score","Hesitancy Score")
result%>%kbl(caption="Odds and p-values, Referent category:  Willingness=1, Model for Unwillingness")%>%kable_classic(html_font = "Cambria")
Odds and p-values, Referent category: Willingness=1, Model for Unwillingness
Odds_Ratio p
Intercept 0.007 0.000
Female 2.480 0.000
Health Major or Minor, Yes 2.639 0.000
Other Party or Independent 1.691 0.063
Republican 2.523 0.005
Annual Flu Shot 2.636 0.000
Risk Knowledge Score 11.558 0.001
Hesitancy Score 87.385 0.000
#summary(mylm1)
a=1-mylm1$deviance/mylm1$null.deviance
names(a)=("McFadden R^2")

myprint(a, c(""))
McFadden R^2 0.286
#########################################################################

Classification Accuracy

#########################################################################
prob=as.factor(round(1-predict(mylm1, type="response"),0))

levels(prob)=c("0"="Predict Unwilling", "1"="Predict Willing")
myt=table(prob, mydata$Willingness_2Cat)

myprint(myt,c("Unwilling", "Willing"))
Unwilling Willing
Predict Unwilling 58 26
Predict Willing 77 453
fourfoldplot(myt, color = c("#CC6666", "#99CC99"),
             conf.level = 0, margin = 1, main = "Confusion Matrix")

#########################################################################

Assumptions of LR

a=vif(mylmD)
a%>%kbl()%>%kable_classic()
GVIF Df GVIF^(1/(2*Df))
relevel(White, ref = “0. White”) 2.20 1 1.48
relevel(Hispanic, ref = “0. Not Hispanic”) 2.04 1 1.43
relevel(Gender_2Cat, ref = “Male”) 1.21 1 1.10
relevel(Year, ref = “4. Senior”) 1.19 3 1.03
relevel(Health_Maj_Min, ref = “0. No”) 1.11 1 1.05
relevel(Income, ref = “5. >$100K”) 1.28 4 1.03
relevel(Insurance, ref = “0. No”) 1.14 1 1.07
relevel(Politics_3Cat, ref = “1. Democrat”) 1.40 2 1.09
relevel(Annual_Flu_Shot, ref = “1. Yes”) 1.12 1 1.06
Behavior_Reverse 1.64 1 1.28
Knowledge_Reverse 1.26 1 1.12
Risk_Knowledge_Reverse 1.83 1 1.35
Hesitancy 1.14 1 1.07
#No issue with colinearity
#########################################################################

Accuracy Metrics

acc=sum(diag(myt))/sum(myt)
ppv=myt[2,2]/sum(myt[2,])
npv=myt[1,1]/sum(myt[1,])
sens=myt[2,2]/sum(myt[,2])
spec=myt[1,1]/sum(myt[,1])
mynames=c("Accuracy", "PPV","NPV", "Sensitivity", "Specificity")
values=c(acc, ppv, npv, sens, spec)
names(values)=mynames
myprint(values, "Statistics")
Statistics
Accuracy 0.832
PPV 0.855
NPV 0.690
Sensitivity 0.946
Specificity 0.430

Citations

cite('Amelia')
## 
## To cite Amelia in publications use:
## 
##   James Honaker, Gary King, Matthew Blackwell (2011). Amelia II: A
##   Program for Missing Data. Journal of Statistical Software, 45(7),
##   1-47. URL https://www.jstatsoft.org/v45/i07/.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {{Amelia II}: A Program for Missing Data},
##     author = {James Honaker and Gary King and Matthew Blackwell},
##     journal = {Journal of Statistical Software},
##     year = {2011},
##     volume = {45},
##     number = {7},
##     pages = {1--47},
##     url = {https://www.jstatsoft.org/v45/i07/},
##   }
cite('bestglm')
## 
## To cite package 'bestglm' in publications use:
## 
##   A.I. McLeod, Changjiang Xu and Yuanhao Lai (2020). bestglm: Best
##   Subset GLM and Regression Utilities. R package version 0.37.3.
##   https://CRAN.R-project.org/package=bestglm
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {bestglm: Best Subset GLM and Regression Utilities},
##     author = {A.I. McLeod and Changjiang Xu and Yuanhao Lai},
##     year = {2020},
##     note = {R package version 0.37.3},
##     url = {https://CRAN.R-project.org/package=bestglm},
##   }
## 
## ATTENTION: This citation information has been auto-generated from the
## package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
cite('car')
## 
## To cite the car package in publications use:
## 
##   John Fox and Sanford Weisberg (2019). An {R} Companion to Applied
##   Regression, Third Edition. Thousand Oaks CA: Sage. URL:
##   https://socialsciences.mcmaster.ca/jfox/Books/Companion/
## 
## A BibTeX entry for LaTeX users is
## 
##   @Book{,
##     title = {An {R} Companion to Applied Regression},
##     edition = {Third},
##     author = {John Fox and Sanford Weisberg},
##     year = {2019},
##     publisher = {Sage},
##     address = {Thousand Oaks {CA}},
##     url = {https://socialsciences.mcmaster.ca/jfox/Books/Companion/},
##   }
cite('carData')
## 
## To cite package 'carData' in publications use:
## 
##   John Fox, Sanford Weisberg and Brad Price (2020). carData: Companion
##   to Applied Regression Data Sets. R package version 3.0-4.
##   https://CRAN.R-project.org/package=carData
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {carData: Companion to Applied Regression Data Sets},
##     author = {John Fox and Sanford Weisberg and Brad Price},
##     year = {2020},
##     note = {R package version 3.0-4},
##     url = {https://CRAN.R-project.org/package=carData},
##   }
cite('corrplot')
## 
## To cite corrplot in publications use:
## 
##   Taiyun Wei and Viliam Simko (2021). R package 'corrplot':
##   Visualization of a Correlation Matrix (Version 0.90). Available from
##   https://github.com/taiyun/corrplot
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{corrplot2021,
##     title = {R package 'corrplot': Visualization of a Correlation Matrix},
##     author = {Taiyun Wei and Viliam Simko},
##     year = {2021},
##     note = {(Version 0.90)},
##     url = {https://github.com/taiyun/corrplot},
##   }
cite('dplyr')
## 
## To cite package 'dplyr' in publications use:
## 
##   Hadley Wickham, Romain François, Lionel Henry and Kirill Müller
##   (2021). dplyr: A Grammar of Data Manipulation. R package version
##   1.0.7. https://CRAN.R-project.org/package=dplyr
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {dplyr: A Grammar of Data Manipulation},
##     author = {Hadley Wickham and Romain François and Lionel Henry and Kirill Müller},
##     year = {2021},
##     note = {R package version 1.0.7},
##     url = {https://CRAN.R-project.org/package=dplyr},
##   }
cite('ggcorrplot')
## 
## To cite package 'ggcorrplot' in publications use:
## 
##   Alboukadel Kassambara (2019). ggcorrplot: Visualization of a
##   Correlation Matrix using 'ggplot2'. R package version 0.1.3.
##   https://CRAN.R-project.org/package=ggcorrplot
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {ggcorrplot: Visualization of a Correlation Matrix using 'ggplot2'},
##     author = {Alboukadel Kassambara},
##     year = {2019},
##     note = {R package version 0.1.3},
##     url = {https://CRAN.R-project.org/package=ggcorrplot},
##   }
cite('ggpubr')
## 
## To cite package 'ggpubr' in publications use:
## 
##   Alboukadel Kassambara (2020). ggpubr: 'ggplot2' Based Publication
##   Ready Plots. R package version 0.4.0.
##   https://CRAN.R-project.org/package=ggpubr
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {ggpubr: 'ggplot2' Based Publication Ready Plots},
##     author = {Alboukadel Kassambara},
##     year = {2020},
##     note = {R package version 0.4.0},
##     url = {https://CRAN.R-project.org/package=ggpubr},
##   }
cite('gridExtra')
## 
## To cite package 'gridExtra' in publications use:
## 
##   Baptiste Auguie (2017). gridExtra: Miscellaneous Functions for "Grid"
##   Graphics. R package version 2.3.
##   https://CRAN.R-project.org/package=gridExtra
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {gridExtra: Miscellaneous Functions for "Grid" Graphics},
##     author = {Baptiste Auguie},
##     year = {2017},
##     note = {R package version 2.3},
##     url = {https://CRAN.R-project.org/package=gridExtra},
##   }
cite('gtable')
## 
## To cite package 'gtable' in publications use:
## 
##   Hadley Wickham and Thomas Lin Pedersen (2019). gtable: Arrange
##   'Grobs' in Tables. R package version 0.3.0.
##   https://CRAN.R-project.org/package=gtable
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {gtable: Arrange 'Grobs' in Tables},
##     author = {Hadley Wickham and Thomas Lin Pedersen},
##     year = {2019},
##     note = {R package version 0.3.0},
##     url = {https://CRAN.R-project.org/package=gtable},
##   }
cite('heplots')
## 
## To cite package 'heplots' in publications use:
## 
##   John Fox and Michael Friendly and and Georges Monette (2021).
##   heplots: Visualizing Tests in Multivariate Linear Models. R package
##   version 1.3-8. URL https://CRAN.R-project.org/package=heplots
## 
## To refer to the theory on which this package is based, also cite:
## 
##   Friendly, M. (2007).  HE plots for Multivariate General Linear
##   Models.  Journal of Computational and Graphical Statistics, 2007, 16,
##   421-444
## 
## For use with repeated measures designs also cite:
## 
##   Michael Friendly (2010). HE Plots for Repeated Measures Designs.
##   Journal of Statistical Software, 37(4), 1-40. URL
##   https://www.jstatsoft.org/v37/i04/.
## 
## BibTeX entries for LaTeX users: use 'toBibtex(citation("heplots"))'
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
cite('kableExtra')
## 
## To cite package 'kableExtra' in publications use:
## 
##   Hao Zhu (2021). kableExtra: Construct Complex Table with 'kable' and
##   Pipe Syntax. R package version 1.3.4.
##   https://CRAN.R-project.org/package=kableExtra
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {kableExtra: Construct Complex Table with 'kable' and Pipe Syntax},
##     author = {Hao Zhu},
##     year = {2021},
##     note = {R package version 1.3.4},
##     url = {https://CRAN.R-project.org/package=kableExtra},
##   }
cite('leaps')
## 
## To cite package 'leaps' in publications use:
## 
##   Thomas Lumley based on Fortran code by Alan Miller (2020). leaps:
##   Regression Subset Selection. R package version 3.1.
##   https://CRAN.R-project.org/package=leaps
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {leaps: Regression Subset Selection},
##     author = {Thomas Lumley based on Fortran code by Alan Miller},
##     year = {2020},
##     note = {R package version 3.1},
##     url = {https://CRAN.R-project.org/package=leaps},
##   }
## 
## ATTENTION: This citation information has been auto-generated from the
## package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
cite('magrittr')
## 
## To cite package 'magrittr' in publications use:
## 
##   Stefan Milton Bache and Hadley Wickham (2020). magrittr: A
##   Forward-Pipe Operator for R. R package version 2.0.1.
##   https://CRAN.R-project.org/package=magrittr
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {magrittr: A Forward-Pipe Operator for R},
##     author = {Stefan Milton Bache and Hadley Wickham},
##     year = {2020},
##     note = {R package version 2.0.1},
##     url = {https://CRAN.R-project.org/package=magrittr},
##   }
cite('MANOVA.RM')
## 
## To cite package 'MANOVA.RM' in publications use:
## 
##   Sarah Friedrich, Frank Konietschke and Markus Pauly (2021).
##   MANOVA.RM: Resampling-Based Analysis of Multivariate Data and
##   Repeated Measures Designs. R package version 0.5.1.
##   https://CRAN.R-project.org/package=MANOVA.RM
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {MANOVA.RM: Resampling-Based Analysis of Multivariate Data and Repeated
## Measures Designs},
##     author = {Sarah Friedrich and Frank Konietschke and Markus Pauly},
##     year = {2021},
##     note = {R package version 0.5.1},
##     url = {https://CRAN.R-project.org/package=MANOVA.RM},
##   }
## 
## ATTENTION: This citation information has been auto-generated from the
## package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
cite('MASS')
## 
## To cite the MASS package in publications use:
## 
##   Venables, W. N. & Ripley, B. D. (2002) Modern Applied Statistics with
##   S. Fourth Edition. Springer, New York. ISBN 0-387-95457-0
## 
## A BibTeX entry for LaTeX users is
## 
##   @Book{,
##     title = {Modern Applied Statistics with S},
##     author = {W. N. Venables and B. D. Ripley},
##     publisher = {Springer},
##     edition = {Fourth},
##     address = {New York},
##     year = {2002},
##     note = {ISBN 0-387-95457-0},
##     url = {http://www.stats.ox.ac.uk/pub/MASS4/},
##   }
cite('MVN')
## 
## To cite MVN in publications use:
## 
##   Korkmaz S, Goksuluk D, Zararsiz G. MVN: An R Package for Assessing
##   Multivariate Normality. The R Journal. 2014 6(2):151-162.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {MVN: An R Package for Assessing Multivariate Normality.},
##     author = {Selcuk Korkmaz and Dincer Goksuluk and Gokmen Zararsiz},
##     journal = {The R Journal},
##     year = {2014},
##     volume = {6},
##     number = {2},
##     pages = {151--162},
##     url = {https://journal.r-project.org/archive/2014-2/korkmaz-goksuluk-zararsiz.pdf},
##   }
cite('mvtnorm')
## 
## To cite package mvtnorm in publications use
## 
##   Alan Genz, Frank Bretz, Tetsuhisa Miwa, Xuefei Mi, Friedrich Leisch,
##   Fabian Scheipl, Torsten Hothorn (2021). mvtnorm: Multivariate Normal
##   and t Distributions. R package version 1.1-2. URL
##   http://CRAN.R-project.org/package=mvtnorm
## 
##   Alan Genz, Frank Bretz (2009), Computation of Multivariate Normal and
##   t Probabilities. Lecture Notes in Statistics, Vol. 195.,
##   Springer-Verlag, Heidelberg. ISBN 978-3-642-01688-2
## 
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
cite('nnet')
## 
## To cite the nnet package in publications use:
## 
##   Venables, W. N. & Ripley, B. D. (2002) Modern Applied Statistics with
##   S. Fourth Edition. Springer, New York. ISBN 0-387-95457-0
## 
## A BibTeX entry for LaTeX users is
## 
##   @Book{,
##     title = {Modern Applied Statistics with S},
##     author = {W. N. Venables and B. D. Ripley},
##     publisher = {Springer},
##     edition = {Fourth},
##     address = {New York},
##     year = {2002},
##     note = {ISBN 0-387-95457-0},
##     url = {https://www.stats.ox.ac.uk/pub/MASS4/},
##   }
cite('psych') #to describe
## 
## To cite the psych package in publications use:
## 
##   Revelle, W. (2021) psych: Procedures for Personality and
##   Psychological Research, Northwestern University, Evanston, Illinois,
##   USA, https://CRAN.R-project.org/package=psych Version = 2.1.6,.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {psych: Procedures for Psychological, Psychometric, and Personality Research},
##     author = {William Revelle},
##     organization = { Northwestern University},
##     address = { Evanston, Illinois},
##     year = {2021},
##     note = {R package version 2.1.6},
##     url = {https://CRAN.R-project.org/package=psych},
##   }
cite('purrr')
## 
## To cite package 'purrr' in publications use:
## 
##   Lionel Henry and Hadley Wickham (2020). purrr: Functional Programming
##   Tools. R package version 0.3.4.
##   https://CRAN.R-project.org/package=purrr
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {purrr: Functional Programming Tools},
##     author = {Lionel Henry and Hadley Wickham},
##     year = {2020},
##     note = {R package version 0.3.4},
##     url = {https://CRAN.R-project.org/package=purrr},
##   }
cite('ggplot2')
## 
## To cite ggplot2 in publications, please use:
## 
##   H. Wickham. ggplot2: Elegant Graphics for Data Analysis.
##   Springer-Verlag New York, 2016.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Book{,
##     author = {Hadley Wickham},
##     title = {ggplot2: Elegant Graphics for Data Analysis},
##     publisher = {Springer-Verlag New York},
##     year = {2016},
##     isbn = {978-3-319-24277-4},
##     url = {https://ggplot2.tidyverse.org},
##   }
cite('ggcorrplot')
## 
## To cite package 'ggcorrplot' in publications use:
## 
##   Alboukadel Kassambara (2019). ggcorrplot: Visualization of a
##   Correlation Matrix using 'ggplot2'. R package version 0.1.3.
##   https://CRAN.R-project.org/package=ggcorrplot
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {ggcorrplot: Visualization of a Correlation Matrix using 'ggplot2'},
##     author = {Alboukadel Kassambara},
##     year = {2019},
##     note = {R package version 0.1.3},
##     url = {https://CRAN.R-project.org/package=ggcorrplot},
##   }
cite('qcc')
## 
## To cite qcc in publications use:
## 
##   Scrucca, L. (2004). qcc: an R package for quality control charting
##   and statistical process control. R News 4/1, 11-17.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {qcc: an R package for quality control charting and statistical process control},
##     author = {Luca Scrucca},
##     journal = {R News},
##     year = {2004},
##     pages = {11--17},
##     volume = {4/1},
##     url = {https://cran.r-project.org/doc/Rnews/},
##   }
cite('rcompanion')
## 
## To cite package 'rcompanion' in publications use:
## 
##   Salvatore Mangiafico (2021). rcompanion: Functions to Support
##   Extension Education Program Evaluation. R package version 2.4.1.
##   https://CRAN.R-project.org/package=rcompanion
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {rcompanion: Functions to Support Extension Education Program Evaluation},
##     author = {Salvatore Mangiafico},
##     year = {2021},
##     note = {R package version 2.4.1},
##     url = {https://CRAN.R-project.org/package=rcompanion},
##   }
cite('reticulate') #to use Python in R as well
## 
## To cite package 'reticulate' in publications use:
## 
##   Kevin Ushey, JJ Allaire and Yuan Tang (2021). reticulate: Interface
##   to 'Python'. R package version 1.21.
##   https://CRAN.R-project.org/package=reticulate
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {reticulate: Interface to 'Python'},
##     author = {Kevin Ushey and JJ Allaire and Yuan Tang},
##     year = {2021},
##     note = {R package version 1.21},
##     url = {https://CRAN.R-project.org/package=reticulate},
##   }
cite('reshape2')
## 
## To cite reshape2 in publications use:
## 
##   Hadley Wickham (2007). Reshaping Data with the reshape Package.
##   Journal of Statistical Software, 21(12), 1-20. URL
##   http://www.jstatsoft.org/v21/i12/.
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {Reshaping Data with the {reshape} Package},
##     author = {Hadley Wickham},
##     journal = {Journal of Statistical Software},
##     year = {2007},
##     volume = {21},
##     number = {12},
##     pages = {1--20},
##     url = {http://www.jstatsoft.org/v21/i12/},
##   }
cite('ResourceSelection')
## 
## To cite package 'ResourceSelection' in publications use:
## 
##   Subhash R. Lele, Jonah L. Keim and Peter Solymos (2019).
##   ResourceSelection: Resource Selection (Probability) Functions for
##   Use-Availability Data. R package version 0.3-5.
##   https://CRAN.R-project.org/package=ResourceSelection
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {ResourceSelection: Resource Selection (Probability) Functions for Use-Availability
## Data},
##     author = {Subhash R. Lele and Jonah L. Keim and Peter Solymos},
##     year = {2019},
##     note = {R package version 0.3-5},
##     url = {https://CRAN.R-project.org/package=ResourceSelection},
##   }
cite('rstatix')
## 
## To cite package 'rstatix' in publications use:
## 
##   Alboukadel Kassambara (2021). rstatix: Pipe-Friendly Framework for
##   Basic Statistical Tests. R package version 0.7.0.
##   https://CRAN.R-project.org/package=rstatix
## 
## A BibTeX entry for LaTeX users is
## 
##   @Manual{,
##     title = {rstatix: Pipe-Friendly Framework for Basic Statistical Tests},
##     author = {Alboukadel Kassambara},
##     year = {2021},
##     note = {R package version 0.7.0},
##     url = {https://CRAN.R-project.org/package=rstatix},
##   }
cite('tidyverse')
## 
##   Wickham et al., (2019). Welcome to the tidyverse. Journal of Open
##   Source Software, 4(43), 1686, https://doi.org/10.21105/joss.01686
## 
## A BibTeX entry for LaTeX users is
## 
##   @Article{,
##     title = {Welcome to the {tidyverse}},
##     author = {Hadley Wickham and Mara Averick and Jennifer Bryan and Winston Chang and Lucy D'Agostino McGowan and Romain François and Garrett Grolemund and Alex Hayes and Lionel Henry and Jim Hester and Max Kuhn and Thomas Lin Pedersen and Evan Miller and Stephan Milton Bache and Kirill Müller and Jeroen Ooms and David Robinson and Dana Paige Seidel and Vitalie Spinu and Kohske Takahashi and Davis Vaughan and Claus Wilke and Kara Woo and Hiroaki Yutani},
##     year = {2019},
##     journal = {Journal of Open Source Software},
##     volume = {4},
##     number = {43},
##     pages = {1686},
##     doi = {10.21105/joss.01686},
##   }