Exploratory Data Analysis
Load Libraries/Data
#####################Read and Pre-Clean the Data#######################
require(AICcmodavg)
## Loading required package: AICcmodavg
## Warning: package 'AICcmodavg' was built under R version 4.0.5
require(Amelia)
## Loading required package: Amelia
## Warning: package 'Amelia' was built under R version 4.0.5
## Loading required package: Rcpp
## Warning: package 'Rcpp' was built under R version 4.0.5
## ##
## ## Amelia II: Multiple Imputation
## ## (Version 1.8.0, built: 2021-05-26)
## ## Copyright (C) 2005-2021 James Honaker, Gary King and Matthew Blackwell
## ## Refer to http://gking.harvard.edu/amelia/ for more information
## ##
require(bestglm)
## Loading required package: bestglm
## Warning: package 'bestglm' was built under R version 4.0.5
## Loading required package: leaps
## Warning: package 'leaps' was built under R version 4.0.5
require(car)
## Loading required package: car
## Warning: package 'car' was built under R version 4.0.5
## Loading required package: carData
require(carData)
require(corrplot)
## Loading required package: corrplot
## corrplot 0.90 loaded
require(dplyr)
## Loading required package: dplyr
## Warning: package 'dplyr' was built under R version 4.0.5
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:car':
##
## recode
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
require(ggcorrplot)
## Loading required package: ggcorrplot
## Warning: package 'ggcorrplot' was built under R version 4.0.5
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.5
require(ggpubr)
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.0.5
require(gridExtra)
## Loading required package: gridExtra
## Warning: package 'gridExtra' was built under R version 4.0.5
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
require(gtable)
## Loading required package: gtable
## Warning: package 'gtable' was built under R version 4.0.5
require(heplots)
## Loading required package: heplots
## Warning: package 'heplots' was built under R version 4.0.5
require(kableExtra)
## Loading required package: kableExtra
## Warning: package 'kableExtra' was built under R version 4.0.5
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
require(leaps)
require(magrittr)
## Loading required package: magrittr
## Warning: package 'magrittr' was built under R version 4.0.5
require(MANOVA.RM)
## Loading required package: MANOVA.RM
## Warning: package 'MANOVA.RM' was built under R version 4.0.5
require(MASS)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
require(MVN)
## Loading required package: MVN
## Warning: package 'MVN' was built under R version 4.0.5
require(mvtnorm)
## Loading required package: mvtnorm
## Warning: package 'mvtnorm' was built under R version 4.0.5
require(nnet)
## Loading required package: nnet
## Warning: package 'nnet' was built under R version 4.0.5
require(psych) #to describe
## Loading required package: psych
## Warning: package 'psych' was built under R version 4.0.5
##
## Attaching package: 'psych'
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
## The following object is masked from 'package:car':
##
## logit
require(purrr)
## Loading required package: purrr
## Warning: package 'purrr' was built under R version 4.0.5
##
## Attaching package: 'purrr'
## The following object is masked from 'package:magrittr':
##
## set_names
## The following object is masked from 'package:car':
##
## some
require(ggplot2)
require(ggcorrplot)
require(qcc)
## Loading required package: qcc
## Warning: package 'qcc' was built under R version 4.0.5
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
require(rcompanion)
## Loading required package: rcompanion
## Warning: package 'rcompanion' was built under R version 4.0.5
##
## Attaching package: 'rcompanion'
## The following object is masked from 'package:psych':
##
## phi
require(reticulate) #to use Python in R as well
## Loading required package: reticulate
require(reshape2)
## Loading required package: reshape2
## Warning: package 'reshape2' was built under R version 4.0.5
require(ResourceSelection)
## Loading required package: ResourceSelection
## Warning: package 'ResourceSelection' was built under R version 4.0.5
## ResourceSelection 0.3-5 2019-07-22
require(rstatix)
## Loading required package: rstatix
## Warning: package 'rstatix' was built under R version 4.0.5
##
## Attaching package: 'rstatix'
## The following object is masked from 'package:MASS':
##
## select
## The following object is masked from 'package:ggcorrplot':
##
## cor_pmat
## The following object is masked from 'package:stats':
##
## filter
require(tidyverse)
## Loading required package: tidyverse
## Warning: package 'tidyverse' was built under R version 4.0.5
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v tibble 3.1.2 v stringr 1.4.0
## v tidyr 1.1.3 v forcats 0.5.1
## v readr 2.0.1
## Warning: package 'tibble' was built under R version 4.0.5
## Warning: package 'tidyr' was built under R version 4.0.5
## Warning: package 'readr' was built under R version 4.0.5
## Warning: package 'stringr' was built under R version 4.0.5
## Warning: package 'forcats' was built under R version 4.0.5
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x psych::%+%() masks ggplot2::%+%()
## x psych::alpha() masks ggplot2::alpha()
## x gridExtra::combine() masks dplyr::combine()
## x tidyr::extract() masks magrittr::extract()
## x rstatix::filter() masks dplyr::filter(), stats::filter()
## x kableExtra::group_rows() masks dplyr::group_rows()
## x dplyr::lag() masks stats::lag()
## x dplyr::recode() masks car::recode()
## x rstatix::select() masks MASS::select(), dplyr::select()
## x purrr::set_names() masks magrittr::set_names()
## x purrr::some() masks car::some()
Base Functions
#########################################################################
#Correlation Plot Function
corfunction=function(d){
mycorr=cor(d[, 1:ncol(d)]); p.mat=ggcorrplot::cor_pmat(d[,1:ncol(d)])
myplot=ggcorrplot(mycorr, hc.order=TRUE,type="lower",
colors=c("red", "white","green"),tl.cex = 8,
tl.col = "black", lab=TRUE, lab_size=2, p.mat=p.mat,
insig="pch", pch=4)
print(myplot)}
#Barplot Function, Horizontal
mybar=function(df,y,fill,perc,title){
a=ggplot(df, aes(x='',y, fill=fill, label=paste0(n," (", scales::percent(perc), ")")))+
geom_bar(position="stack", stat="identity", width=.5)+
geom_text(size = 3, position = position_stack(vjust = 0.5))+
xlab("")+
ylab("")+
theme(legend.title = element_blank())+
ggtitle(title)+
theme(legend.text = element_text(size = 8))+
theme(legend.position="right")+
coord_flip()
return(a)
}
#Barplot Function, Vertical
mybar2=function(df,y,fill,perc,title){
a=ggplot(df, aes(x='',y, fill=fill, label=paste0(n," (", scales::percent(perc), ")")))+
geom_bar(position="stack", stat="identity", width=.5)+
geom_text(size = 3, position = position_stack(vjust = 0.5))+
xlab("")+
ylab("")+
theme(legend.title = element_blank())+
ggtitle(title)+
theme(legend.text = element_text(size = 8))+
theme(legend.position="right")
return(a)
}
#Barplot Function, 2 Component
mybar3=function(y,varname,rotate=90){
myt=table(mydata$Willingness_4Cat, y)
myt=apply(myt,2,function(z) z/sum(z))
myt=as.data.frame(myt)
myt$Willingness=rownames(myt)
myt=melt(myt, id.vars="Willingness")
colnames(myt)=c("Willingness", varname,"Percent")
a=ggplot(myt, aes(myt[,2], Percent, fill=Willingness, label=scales::percent(Percent)))+
geom_bar( stat = "identity", position = "stack" ) +
geom_text(size = 3, angle=rotate,position = position_stack(vjust = 0.5))+
coord_flip() +
xlab("")+
ylab("")+
theme_minimal() + theme(legend.title=element_blank(),legend.position = "bottom" )+
ggtitle(varname)
return(a)
}
#Boxplot Function, Y Variable
box=function(df,x,y,title){
g=ggplot(df, aes(x, y , fill=x))+
geom_boxplot(notch=TRUE)+
ggtitle(title)+
ylab("")+
xlab("")+
coord_flip()+
theme(legend.position="none")
return(g)
}
#Boxplot without Y variable
box2=function(df,x,title){
g=ggplot(df, aes(x, fill="red"))+
geom_boxplot()+
ggtitle(title)+
ylab("")+
xlab("")+
theme(legend.position="none")
return(g)
}
#Pretty Print Function
myprint=function(x,y, z=""){
x%>%kbl(col.names=y, caption=z)%>%kable_classic(html_font = "Cambria",full_width=F)
}
#Citation Function
cite=function(x){citation(x)}
#P Function
pfunction=function(p,x){
for (i in 1: length(x)){if(p[i]>.1){x[i]=paste0(x[i],"")} else{
if(p[i]<.001){x[i]=paste0(x[i],"***")} else{
if(p[i]<.01){x[i]=paste0(x[i], "**")} else{
if(p[i]<.05){x[i]=paste0(x[i], "*")} else
{x[i]=paste0(x[i], "+")}
}
}
}
}
return(noquote(x))
}
feedback=c("Boxplots", "Bargraphs", "Correlations", "Printing", "Citations", "P-Values")
myprint(feedback, "Functions")
|
Functions
|
|
Boxplots
|
|
Bargraphs
|
|
Correlations
|
|
Printing
|
|
Citations
|
|
P-Values
|
#########################################################################
Load / Impute
#########################################################################
mydata=read.csv("D:/Emily/data2.csv", stringsAsFactors = TRUE)
myprint(colnames(mydata),"Columns")
|
Columns
|
|
ID
|
|
Willingness_4Cat
|
|
Willingness_2Cat
|
|
Behavior_Reverse
|
|
Knowledge_Reverse
|
|
Risk_Knowledge_Reverse
|
|
Hesitancy
|
|
Risk_Score
|
|
News_Frequency
|
|
Trust
|
|
Greater_Risk_COVID
|
|
Had_COVID
|
|
Died_COVID
|
|
Wash_Hands
|
|
Wear_Mask
|
|
Social_Distance
|
|
Behavior_Score
|
|
Air_Spread
|
|
Mask_Effective
|
|
Asymptomatic_Pose_Risk
|
|
College_Deaths
|
|
Mostly_Mild
|
|
Knowledge_Score
|
|
Concern_Self
|
|
Likely_to_Get
|
|
Likely_Get_Sick
|
|
Live_Others
|
|
Class_in_Person
|
|
Inperson_Job
|
|
Medical_Condition
|
|
Super_Spreader
|
|
No_Distancing
|
|
No_Mask
|
|
Travel
|
|
Safe_Live_Others
|
|
Safe_Attend_Class
|
|
Safe_F2F_Job
|
|
Safe_Underlying_Condition
|
|
Safe_Gatherings..50
|
|
Safe_Socials_No_Distancing
|
|
Safe_Socials_No_Mask
|
|
Safe_Travel
|
|
Risk_Knowledge_Score
|
|
Concern_Others
|
|
Change_Others
|
|
Definition_Vaccine
|
|
Safety_Vaccine
|
|
Better_to_be_Sick
|
|
Over_Vaccination_Harmful
|
|
Herd_Immunity
|
|
Hesitancy_Score
|
|
Child_Vax
|
|
Annual_Flu_Shot
|
|
Flu_Shot_Reason
|
|
Meningitis_Required
|
|
COVID_Required
|
|
Family_Accept
|
|
Friends_Accept
|
|
US_Accept
|
|
CDC_Freq
|
|
Local_News_Freq
|
|
National_News_Freq
|
|
International_News_Freq
|
|
Social_Media_Freq
|
|
Texas_State_Freq
|
|
College_Friends_Freq
|
|
Parents_Freq
|
|
Professors_Freq
|
|
Adults_Freq
|
|
Doctor_Freq
|
|
Frequency_Score
|
|
Trust_CDC
|
|
Trust_Local
|
|
Trust_National
|
|
Trust_International
|
|
Trust_Social_Media
|
|
Trust_Texas_State
|
|
Trust_Friends
|
|
Trust_Parents
|
|
Trust_Professors
|
|
Trust_Adults
|
|
Trust_Doctor
|
|
Trust_Score
|
|
Politics_5Cat
|
|
Politics_3Cat
|
|
Race
|
|
White
|
|
Hispanic
|
|
Gender
|
|
Gender_2Cat
|
|
Age
|
|
Age_Cat
|
|
Year
|
|
Health_Maj_Min
|
|
Income
|
|
Insurance
|
#########################################################################
Check Missing
#########################################################################
missmap(mydata)

#########################################################################
Descriptives
Gender Imputation of Other
tempdata=mydata[mydata$Gender!='Other',] #Set up temporary data
tempdata$Gender=factor(tempdata$Gender) #Reset gender on 2 levels
preddata=mydata[mydata$Gender=='Other',] #Keep a prediction group
###Variables Significant to the Prediction###
mylr=glm(Gender~Race+Age+Year+Willingness_4Cat,
data=tempdata,family='binomial')
###Generate Predictions###
mypred=mylr%>%predict(preddata, type='response')
mypred=round(mypred,0)
mypred[mypred==0]='Female'
mypred[mypred==1]='Male'
###Impute###
mydata$Gender_2Cat=mydata$Gender
myseq=c(2,54,62,94,111,177,186,250,309,316,383,492,522,553)
j=1
for (i in myseq){
mydata$Gender_2Cat[i]=mypred[j]
j=j+1}
mydata$Gender_2Cat=factor(mydata$Gender_2Cat) ##reset 2 levels
table(mydata$Gender_2Cat)%>%kbl()%>%kable_classic(html_font='Cambria')
|
Var1
|
Freq
|
|
Female
|
307
|
|
Male
|
307
|
write.csv(mydata, "D:/Emily/data2.csv", row.names=FALSE)
Overall Descriptives
round(describe(mydata),3)%>%kbl(caption="Descriptives")%>%kable_classic()
Descriptives
|
|
vars
|
n
|
mean
|
sd
|
median
|
trimmed
|
mad
|
min
|
max
|
range
|
skew
|
kurtosis
|
se
|
|
ID
|
1
|
614
|
307.500
|
177.391
|
307.500
|
307.500
|
227.579
|
1.00
|
614.000
|
613.000
|
0.000
|
-1.206
|
7.159
|
|
Willingness_4Cat*
|
2
|
614
|
3.083
|
0.970
|
3.000
|
3.228
|
1.483
|
1.00
|
4.000
|
3.000
|
-0.862
|
-0.245
|
0.039
|
|
Willingness_2Cat*
|
3
|
614
|
1.780
|
0.414
|
2.000
|
1.850
|
0.000
|
1.00
|
2.000
|
1.000
|
-1.349
|
-0.179
|
0.017
|
|
Behavior_Reverse
|
4
|
614
|
0.157
|
0.160
|
0.091
|
0.132
|
0.135
|
0.00
|
0.818
|
0.818
|
1.467
|
2.436
|
0.006
|
|
Knowledge_Reverse
|
5
|
614
|
0.113
|
0.153
|
0.000
|
0.084
|
0.000
|
0.00
|
0.800
|
0.800
|
1.507
|
2.675
|
0.006
|
|
Risk_Knowledge_Reverse
|
6
|
614
|
0.224
|
0.159
|
0.194
|
0.202
|
0.143
|
0.00
|
0.774
|
0.774
|
1.309
|
1.636
|
0.006
|
|
Hesitancy
|
7
|
614
|
0.201
|
0.212
|
0.200
|
0.170
|
0.297
|
0.00
|
1.000
|
1.000
|
1.071
|
0.941
|
0.009
|
|
Risk_Score
|
8
|
614
|
0.568
|
0.201
|
0.625
|
0.578
|
0.185
|
0.00
|
1.000
|
1.000
|
-0.491
|
-0.395
|
0.008
|
|
News_Frequency
|
9
|
614
|
0.551
|
0.137
|
0.545
|
0.545
|
0.135
|
0.25
|
1.000
|
0.750
|
0.448
|
0.319
|
0.006
|
|
Trust
|
10
|
614
|
0.598
|
0.116
|
0.591
|
0.602
|
0.101
|
0.25
|
1.000
|
0.750
|
-0.274
|
0.808
|
0.005
|
|
Greater_Risk_COVID*
|
11
|
614
|
1.713
|
0.453
|
2.000
|
1.766
|
0.000
|
1.00
|
2.000
|
1.000
|
-0.941
|
-1.116
|
0.018
|
|
Had_COVID*
|
12
|
614
|
1.790
|
0.408
|
2.000
|
1.862
|
0.000
|
1.00
|
2.000
|
1.000
|
-1.420
|
0.016
|
0.016
|
|
Died_COVID*
|
13
|
614
|
1.168
|
0.374
|
1.000
|
1.085
|
0.000
|
1.00
|
2.000
|
1.000
|
1.774
|
1.149
|
0.015
|
|
Wash_Hands*
|
14
|
614
|
3.205
|
0.878
|
3.000
|
3.317
|
1.483
|
1.00
|
4.000
|
3.000
|
-0.843
|
-0.186
|
0.035
|
|
Wear_Mask*
|
15
|
614
|
3.774
|
0.604
|
4.000
|
3.937
|
0.000
|
1.00
|
4.000
|
3.000
|
-2.915
|
8.266
|
0.024
|
|
Social_Distance*
|
16
|
614
|
3.296
|
0.804
|
3.000
|
3.415
|
1.483
|
1.00
|
4.000
|
3.000
|
-0.998
|
0.425
|
0.032
|
|
Behavior_Score
|
17
|
614
|
10.275
|
1.763
|
11.000
|
10.551
|
1.483
|
3.00
|
12.000
|
9.000
|
-1.467
|
2.436
|
0.071
|
|
Air_Spread*
|
18
|
614
|
1.839
|
0.368
|
2.000
|
1.923
|
0.000
|
1.00
|
2.000
|
1.000
|
-1.838
|
1.380
|
0.015
|
|
Mask_Effective*
|
19
|
614
|
1.935
|
0.247
|
2.000
|
2.000
|
0.000
|
1.00
|
2.000
|
1.000
|
-3.516
|
10.376
|
0.010
|
|
Asymptomatic_Pose_Risk*
|
20
|
614
|
1.945
|
0.229
|
2.000
|
2.000
|
0.000
|
1.00
|
2.000
|
1.000
|
-3.879
|
13.065
|
0.009
|
|
College_Deaths*
|
21
|
614
|
1.940
|
0.238
|
2.000
|
2.000
|
0.000
|
1.00
|
2.000
|
1.000
|
-3.687
|
11.611
|
0.010
|
|
Mostly_Mild*
|
22
|
614
|
1.777
|
0.417
|
2.000
|
1.846
|
0.000
|
1.00
|
2.000
|
1.000
|
-1.327
|
-0.240
|
0.017
|
|
Knowledge_Score*
|
23
|
614
|
4.435
|
0.763
|
5.000
|
4.581
|
0.000
|
1.00
|
5.000
|
4.000
|
-1.507
|
2.675
|
0.031
|
|
Concern_Self*
|
24
|
614
|
2.446
|
0.912
|
2.000
|
2.433
|
1.483
|
1.00
|
4.000
|
3.000
|
-0.041
|
-0.832
|
0.037
|
|
Likely_to_Get*
|
25
|
614
|
2.549
|
0.828
|
3.000
|
2.561
|
1.483
|
1.00
|
4.000
|
3.000
|
-0.190
|
-0.519
|
0.033
|
|
Likely_Get_Sick*
|
26
|
614
|
3.112
|
0.995
|
3.000
|
3.142
|
1.483
|
1.00
|
5.000
|
4.000
|
-0.236
|
0.022
|
0.040
|
|
Live_Others*
|
27
|
614
|
1.917
|
0.276
|
2.000
|
2.000
|
0.000
|
1.00
|
2.000
|
1.000
|
-3.014
|
7.097
|
0.011
|
|
Class_in_Person*
|
28
|
614
|
1.547
|
0.498
|
2.000
|
1.559
|
0.000
|
1.00
|
2.000
|
1.000
|
-0.189
|
-1.967
|
0.020
|
|
Inperson_Job*
|
29
|
614
|
1.518
|
0.500
|
2.000
|
1.522
|
0.000
|
1.00
|
2.000
|
1.000
|
-0.072
|
-1.998
|
0.020
|
|
Medical_Condition*
|
30
|
614
|
1.145
|
0.352
|
1.000
|
1.057
|
0.000
|
1.00
|
2.000
|
1.000
|
2.012
|
2.052
|
0.014
|
|
Super_Spreader*
|
31
|
614
|
1.223
|
0.417
|
1.000
|
1.154
|
0.000
|
1.00
|
2.000
|
1.000
|
1.327
|
-0.240
|
0.017
|
|
No_Distancing*
|
32
|
614
|
1.337
|
0.473
|
1.000
|
1.297
|
0.000
|
1.00
|
2.000
|
1.000
|
0.687
|
-1.530
|
0.019
|
|
No_Mask*
|
33
|
614
|
1.257
|
0.438
|
1.000
|
1.197
|
0.000
|
1.00
|
2.000
|
1.000
|
1.107
|
-0.775
|
0.018
|
|
Travel*
|
34
|
614
|
1.513
|
0.500
|
2.000
|
1.516
|
0.000
|
1.00
|
2.000
|
1.000
|
-0.052
|
-2.001
|
0.020
|
|
Safe_Live_Others*
|
35
|
614
|
2.218
|
0.800
|
2.000
|
2.215
|
1.483
|
1.00
|
4.000
|
3.000
|
0.122
|
-0.577
|
0.032
|
|
Safe_Attend_Class*
|
36
|
614
|
2.762
|
0.924
|
3.000
|
2.827
|
1.483
|
1.00
|
4.000
|
3.000
|
-0.381
|
-0.669
|
0.037
|
|
Safe_F2F_Job*
|
37
|
614
|
2.992
|
0.877
|
3.000
|
3.077
|
1.483
|
1.00
|
4.000
|
3.000
|
-0.608
|
-0.300
|
0.035
|
|
Safe_Underlying_Condition*
|
38
|
614
|
3.526
|
0.784
|
4.000
|
3.707
|
0.000
|
1.00
|
4.000
|
3.000
|
-1.727
|
2.381
|
0.032
|
|
Safe_Gatherings..50*
|
39
|
614
|
3.603
|
0.721
|
4.000
|
3.770
|
0.000
|
1.00
|
4.000
|
3.000
|
-1.896
|
3.082
|
0.029
|
|
Safe_Socials_No_Distancing*
|
40
|
614
|
3.515
|
0.746
|
4.000
|
3.671
|
0.000
|
1.00
|
4.000
|
3.000
|
-1.602
|
2.188
|
0.030
|
|
Safe_Socials_No_Mask*
|
41
|
614
|
3.562
|
0.748
|
4.000
|
3.730
|
0.000
|
1.00
|
4.000
|
3.000
|
-1.821
|
2.875
|
0.030
|
|
Safe_Travel*
|
42
|
614
|
2.871
|
0.817
|
3.000
|
2.907
|
1.483
|
1.00
|
4.000
|
3.000
|
-0.352
|
-0.387
|
0.033
|
|
Risk_Knowledge_Score
|
43
|
614
|
25.049
|
4.934
|
26.000
|
25.732
|
4.448
|
8.00
|
32.000
|
24.000
|
-1.309
|
1.636
|
0.199
|
|
Concern_Others*
|
44
|
614
|
3.015
|
0.846
|
3.000
|
3.077
|
1.483
|
1.00
|
4.000
|
3.000
|
-0.496
|
-0.467
|
0.034
|
|
Change_Others*
|
45
|
614
|
3.590
|
0.754
|
4.000
|
3.764
|
0.000
|
1.00
|
4.000
|
3.000
|
-2.014
|
3.606
|
0.030
|
|
Definition_Vaccine*
|
46
|
614
|
1.954
|
0.209
|
2.000
|
2.000
|
0.000
|
1.00
|
2.000
|
1.000
|
-4.346
|
16.911
|
0.008
|
|
Safety_Vaccine*
|
47
|
614
|
1.853
|
0.354
|
2.000
|
1.941
|
0.000
|
1.00
|
2.000
|
1.000
|
-1.994
|
1.978
|
0.014
|
|
Better_to_be_Sick*
|
48
|
614
|
1.818
|
0.386
|
2.000
|
1.896
|
0.000
|
1.00
|
2.000
|
1.000
|
-1.641
|
0.693
|
0.016
|
|
Over_Vaccination_Harmful*
|
49
|
614
|
1.557
|
0.497
|
2.000
|
1.571
|
0.000
|
1.00
|
2.000
|
1.000
|
-0.229
|
-1.951
|
0.020
|
|
Herd_Immunity*
|
50
|
614
|
1.811
|
0.392
|
2.000
|
1.888
|
0.000
|
1.00
|
2.000
|
1.000
|
-1.585
|
0.515
|
0.016
|
|
Hesitancy_Score*
|
51
|
614
|
4.993
|
1.062
|
5.000
|
5.148
|
1.483
|
1.00
|
6.000
|
5.000
|
-1.071
|
0.941
|
0.043
|
|
Child_Vax*
|
52
|
614
|
3.871
|
0.421
|
4.000
|
3.994
|
0.000
|
1.00
|
4.000
|
3.000
|
-4.042
|
19.344
|
0.017
|
|
Annual_Flu_Shot*
|
53
|
614
|
1.441
|
0.497
|
1.000
|
1.427
|
0.000
|
1.00
|
2.000
|
1.000
|
0.236
|
-1.948
|
0.020
|
|
Flu_Shot_Reason*
|
54
|
614
|
5.265
|
2.733
|
5.000
|
5.376
|
4.448
|
1.00
|
10.000
|
9.000
|
-0.228
|
-1.283
|
0.110
|
|
Meningitis_Required*
|
55
|
614
|
3.865
|
0.464
|
4.000
|
4.000
|
0.000
|
1.00
|
4.000
|
3.000
|
-3.988
|
17.172
|
0.019
|
|
COVID_Required*
|
56
|
614
|
2.743
|
1.156
|
3.000
|
2.803
|
1.483
|
1.00
|
4.000
|
3.000
|
-0.184
|
-1.476
|
0.047
|
|
Family_Accept*
|
57
|
614
|
2.417
|
1.051
|
3.000
|
2.396
|
1.483
|
1.00
|
4.000
|
3.000
|
-0.201
|
-1.289
|
0.042
|
|
Friends_Accept*
|
58
|
614
|
2.143
|
1.059
|
2.000
|
2.057
|
1.483
|
1.00
|
4.000
|
3.000
|
0.207
|
-1.383
|
0.043
|
|
US_Accept*
|
59
|
614
|
2.651
|
0.685
|
3.000
|
2.661
|
0.000
|
1.00
|
4.000
|
3.000
|
-0.342
|
0.033
|
0.028
|
|
CDC_Freq*
|
60
|
614
|
2.244
|
0.871
|
2.000
|
2.189
|
1.483
|
1.00
|
4.000
|
3.000
|
0.350
|
-0.518
|
0.035
|
|
Local_News_Freq*
|
61
|
614
|
2.132
|
0.878
|
2.000
|
2.075
|
1.483
|
1.00
|
4.000
|
3.000
|
0.377
|
-0.589
|
0.035
|
|
National_News_Freq*
|
62
|
614
|
2.391
|
0.919
|
2.000
|
2.364
|
1.483
|
1.00
|
4.000
|
3.000
|
0.108
|
-0.826
|
0.037
|
|
International_News_Freq*
|
63
|
614
|
1.976
|
0.871
|
2.000
|
1.896
|
1.483
|
1.00
|
4.000
|
3.000
|
0.580
|
-0.397
|
0.035
|
|
Social_Media_Freq*
|
64
|
614
|
2.609
|
1.050
|
3.000
|
2.636
|
1.483
|
1.00
|
4.000
|
3.000
|
-0.107
|
-1.195
|
0.042
|
|
Texas_State_Freq*
|
65
|
614
|
2.441
|
0.881
|
2.000
|
2.427
|
1.483
|
1.00
|
4.000
|
3.000
|
0.107
|
-0.701
|
0.036
|
|
College_Friends_Freq*
|
66
|
614
|
2.160
|
0.891
|
2.000
|
2.083
|
1.483
|
1.00
|
4.000
|
3.000
|
0.472
|
-0.463
|
0.036
|
|
Parents_Freq*
|
67
|
614
|
2.386
|
0.929
|
2.000
|
2.358
|
1.483
|
1.00
|
4.000
|
3.000
|
0.236
|
-0.795
|
0.038
|
|
Professors_Freq*
|
68
|
614
|
1.923
|
0.850
|
2.000
|
1.839
|
1.483
|
1.00
|
4.000
|
3.000
|
0.654
|
-0.226
|
0.034
|
|
Adults_Freq*
|
69
|
614
|
1.839
|
0.809
|
2.000
|
1.744
|
1.483
|
1.00
|
4.000
|
3.000
|
0.799
|
0.217
|
0.033
|
|
Doctor_Freq*
|
70
|
614
|
2.125
|
0.974
|
2.000
|
2.033
|
1.483
|
1.00
|
4.000
|
3.000
|
0.520
|
-0.711
|
0.039
|
|
Frequency_Score
|
71
|
614
|
24.226
|
6.049
|
24.000
|
23.974
|
5.930
|
11.00
|
44.000
|
33.000
|
0.448
|
0.319
|
0.244
|
|
Trust_CDC*
|
72
|
614
|
3.163
|
0.892
|
3.000
|
3.256
|
1.483
|
1.00
|
4.000
|
3.000
|
-0.681
|
-0.604
|
0.036
|
|
Trust_Local*
|
73
|
614
|
2.246
|
0.709
|
2.000
|
2.264
|
0.000
|
1.00
|
4.000
|
3.000
|
0.186
|
-0.140
|
0.029
|
|
Trust_National*
|
74
|
614
|
2.275
|
0.780
|
2.000
|
2.278
|
1.483
|
1.00
|
4.000
|
3.000
|
0.140
|
-0.418
|
0.031
|
|
Trust_International*
|
75
|
614
|
2.322
|
0.794
|
2.000
|
2.321
|
1.483
|
1.00
|
4.000
|
3.000
|
0.143
|
-0.431
|
0.032
|
|
Trust_Social_Media*
|
76
|
614
|
1.702
|
0.621
|
2.000
|
1.650
|
0.000
|
1.00
|
4.000
|
3.000
|
0.467
|
0.174
|
0.025
|
|
Trust_Texas_State*
|
77
|
614
|
2.673
|
0.782
|
3.000
|
2.675
|
1.483
|
1.00
|
4.000
|
3.000
|
-0.194
|
-0.348
|
0.032
|
|
Trust_Friends*
|
78
|
614
|
1.932
|
0.590
|
2.000
|
1.904
|
0.000
|
1.00
|
4.000
|
3.000
|
0.254
|
0.756
|
0.024
|
|
Trust_Parents*
|
79
|
614
|
2.436
|
0.772
|
2.000
|
2.431
|
1.483
|
1.00
|
4.000
|
3.000
|
0.159
|
-0.355
|
0.031
|
|
Trust_Professors*
|
80
|
614
|
2.337
|
0.703
|
2.000
|
2.356
|
0.000
|
1.00
|
4.000
|
3.000
|
0.182
|
-0.138
|
0.028
|
|
Trust_Adults*
|
81
|
614
|
2.010
|
0.604
|
2.000
|
1.998
|
0.000
|
1.00
|
4.000
|
3.000
|
0.306
|
0.737
|
0.024
|
|
Trust_Doctor*
|
82
|
614
|
3.199
|
0.793
|
3.000
|
3.278
|
1.483
|
1.00
|
4.000
|
3.000
|
-0.662
|
-0.291
|
0.032
|
|
Trust_Score
|
83
|
614
|
26.295
|
5.110
|
26.000
|
26.488
|
4.448
|
11.00
|
44.000
|
33.000
|
-0.274
|
0.807
|
0.206
|
|
Politics_5Cat*
|
84
|
614
|
2.003
|
1.122
|
2.000
|
1.880
|
1.483
|
1.00
|
4.000
|
3.000
|
0.623
|
-1.093
|
0.045
|
|
Politics_3Cat*
|
85
|
614
|
1.707
|
0.761
|
2.000
|
1.634
|
1.483
|
1.00
|
3.000
|
2.000
|
0.546
|
-1.091
|
0.031
|
|
Race*
|
86
|
614
|
1.769
|
0.997
|
1.000
|
1.575
|
0.000
|
1.00
|
5.000
|
4.000
|
1.450
|
1.672
|
0.040
|
|
White*
|
87
|
614
|
1.497
|
0.500
|
1.000
|
1.496
|
0.000
|
1.00
|
2.000
|
1.000
|
0.013
|
-2.003
|
0.020
|
|
Hispanic*
|
88
|
614
|
1.360
|
0.480
|
1.000
|
1.325
|
0.000
|
1.00
|
2.000
|
1.000
|
0.582
|
-1.664
|
0.019
|
|
Gender*
|
89
|
614
|
1.534
|
0.543
|
2.000
|
1.514
|
1.483
|
1.00
|
3.000
|
2.000
|
0.292
|
-1.068
|
0.022
|
|
Gender_2Cat*
|
90
|
614
|
1.500
|
0.500
|
1.500
|
1.500
|
0.741
|
1.00
|
2.000
|
1.000
|
0.000
|
-2.003
|
0.020
|
|
Age
|
91
|
614
|
20.300
|
1.499
|
20.000
|
20.270
|
1.483
|
18.00
|
24.000
|
6.000
|
0.112
|
-0.912
|
0.060
|
|
Age_Cat*
|
92
|
614
|
3.296
|
1.492
|
3.000
|
3.270
|
1.483
|
1.00
|
6.000
|
5.000
|
0.090
|
-0.960
|
0.060
|
|
Year*
|
93
|
614
|
2.764
|
1.105
|
3.000
|
2.829
|
1.483
|
1.00
|
4.000
|
3.000
|
-0.321
|
-1.255
|
0.045
|
|
Health_Maj_Min*
|
94
|
614
|
1.384
|
0.487
|
1.000
|
1.356
|
0.000
|
1.00
|
2.000
|
1.000
|
0.474
|
-1.778
|
0.020
|
|
Income*
|
95
|
614
|
2.919
|
1.497
|
3.000
|
2.898
|
1.483
|
1.00
|
5.000
|
4.000
|
0.069
|
-1.428
|
0.060
|
|
Insurance*
|
96
|
614
|
1.816
|
0.388
|
2.000
|
1.894
|
0.000
|
1.00
|
2.000
|
1.000
|
-1.627
|
0.647
|
0.016
|
par(mfrow=c(2,2))
boxplot(mydata$Behavior_Reverse, horizontal=TRUE, notch=FALSE, col='red', main="Misbehavior", ylim=c(0,1))
boxplot(mydata$Knowledge_Reverse, horizontal=TRUE, notch=FALSE, col='red', main="(Lack of) Knowledge",ylim=c(0,1))
boxplot(mydata$Risk_Knowledge_Reverse, horizontal=TRUE, notch=FALSE, col='red', main="(Lack of) Risk Knowledge",ylim=c(0,1))
boxplot(mydata$Hesitancy, horizontal=TRUE, notch=FALSE, col='red', main="Hesitancy",ylim=c(0,1))

Gender, Q72
#########################################################################}
options(digits=3)
mydata%>%count(Gender)%>%mutate(perc = n / nrow(mydata)) -> mydf1
mydata%>%count(Gender_2Cat)%>%mutate(perc = n / nrow(mydata)) -> mydf2
myprint(mydf1,colnames(mydf1), "Gender")
Gender
|
Gender
|
n
|
perc
|
|
Female
|
300
|
0.489
|
|
Male
|
300
|
0.489
|
|
Other
|
14
|
0.023
|
myprint(mydf2,colnames(mydf2), "Gender")
Gender
|
Gender_2Cat
|
n
|
perc
|
|
Female
|
307
|
0.5
|
|
Male
|
307
|
0.5
|
colnames(mydf1)=c("Gender", "n", "perc")
colnames(mydf2)=c("Gender", "n", "perc")
a=mybar(mydf1,mydf1$n,mydf1$Gender,mydf1$perc, "Gender")
b=mybar(mydf2,mydf2$n,mydf2$Gender,mydf2$perc,"Gender")
print(ggarrange(a,b,nrow=2))

mybar3(mydata$Gender_2Cat,"Gender")

#########################################################################
Ethnicity / Race, Q71
#########################################################################
mydata%>%count(Race)%>%mutate(perc = n / nrow(mydata)) -> mydf1
mydata%>%count(White)%>%mutate(perc = n / nrow(mydata)) -> mydf2
mydata%>%count(Hispanic)%>%mutate(perc = n / nrow(mydata)) -> mydf3
myprint(mydf1,colnames(mydf1))
|
Race
|
n
|
perc
|
- White
|
309
|
0.503
|
- Hisp. White
|
205
|
0.334
|
- Other
|
49
|
0.080
|
- Black
|
35
|
0.057
|
- Hisp. Black
|
16
|
0.026
|
myprint(mydf2,colnames(mydf2))
|
White
|
n
|
perc
|
- White
|
309
|
0.503
|
- Non-White
|
305
|
0.497
|
myprint(mydf3,colnames(mydf3))
|
Hispanic
|
n
|
perc
|
- Not Hispanic
|
393
|
0.64
|
- Hispanic
|
221
|
0.36
|
a=mybar2(mydf1,mydf1$n,mydf1$Race, mydf1$perc,"Race")
b=mybar(mydf2,mydf2$n,mydf2$White,mydf2$perc,"Race")
d=mybar(mydf3, mydf3$n, mydf3$Hispanic, mydf3$perc, "Hispanic")
print(ggarrange(b,d,nrow=2))

print(a)

mybar3(mydata$Race,"Race", 45)

#########################################################################
Year, Q74
#########################################################################
mydata%>%count(Year)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
|
Year
|
n
|
perc
|
- Freshman
|
110
|
0.179
|
- Sophomore
|
134
|
0.218
|
- Junior
|
161
|
0.262
|
- Senior
|
209
|
0.340
|
mybar(mydf1,mydf1$n,mydf1$Year,mydf1$perc,"Year")

mybar3(mydata$Year,"Year")

#########################################################################
Income, Q76
#########################################################################
mydata%>%count(Income)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
|
Income
|
n
|
perc
|
- <$25K
|
157
|
0.256
|
- <$50K
|
110
|
0.179
|
- <$75K
|
106
|
0.173
|
- <$100K
|
108
|
0.176
|
- >$100K
|
133
|
0.217
|
mybar(mydf1,mydf1$n,mydf1$Income,mydf1$perc,"Household Income")

mybar3(mydata$Income,"Income")

#########################################################################
Politics, Q70
#########################################################################
mydata%>%count(Politics_5Cat)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
|
Politics_5Cat
|
n
|
perc
|
- Democrat
|
294
|
0.479
|
- Independent
|
115
|
0.187
|
- Republican
|
114
|
0.186
|
- Other
|
91
|
0.148
|
mybar(mydf1,mydf1$n,mydf1$Politics_5Cat,mydf1$perc,"Politics")

mydata%>%count(Politics_3Cat)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
|
Politics_3Cat
|
n
|
perc
|
- Democrat
|
294
|
0.479
|
- Other
|
206
|
0.336
|
- Republican
|
114
|
0.186
|
mybar(mydf1,mydf1$n,mydf1$Politics_3Cat,mydf1$perc,"Politics")

mybar3(mydata$Politics_3Cat,"Politics")

#########################################################################
Health Science Field, Q75
#########################################################################
mydata%>%count(Health_Maj_Min)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
|
Health_Maj_Min
|
n
|
perc
|
- No
|
378
|
0.616
|
- Yes
|
236
|
0.384
|
mybar(mydf1,mydf1$n,mydf1$Health_Maj_Min,mydf1$perc,"Health Science Major / Minor")

mybar3(mydata$Health_Maj_Min,"Health Major or Minor")

#########################################################################
Take Vaccine, Q1
#########################################################################
mydata%>%count(Willingness_4Cat)%>%mutate(perc = n / nrow(mydata)) -> mydf1
mydata%>%count(Willingness_2Cat)%>%mutate(perc = n / nrow(mydata)) -> mydf2
colnames(mydf1)=c("Willingness", "n", "perc")
colnames(mydf2)=c("Willingness", "n", "perc")
myprint(mydf1,colnames(mydf1))
|
Willingness
|
n
|
perc
|
- Never
|
65
|
0.106
|
- If Required
|
70
|
0.114
|
- Eventually
|
228
|
0.371
|
- ASAP
|
251
|
0.409
|
myprint(mydf2,colnames(mydf2))
|
Willingness
|
n
|
perc
|
|
Unwilling
|
135
|
0.22
|
|
Willing
|
479
|
0.78
|
a=mybar(mydf1,mydf1$n,mydf1$Willingness,mydf1$perc,"Willingness")
b=mybar(mydf2,mydf2$n,mydf2$Willingness,mydf2$perc,"Willingness")
grid.arrange(a,b,nrow=2)

#########################################################################
Age, Q73
#########################################################################
mydata%>%count(Age)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
|
Age
|
n
|
perc
|
|
18
|
87
|
0.142
|
|
19
|
114
|
0.186
|
|
20
|
137
|
0.223
|
|
21
|
131
|
0.213
|
|
22
|
96
|
0.156
|
|
23
|
47
|
0.077
|
|
24
|
2
|
0.003
|
boxplot(mydata$Age, col="red", main="Age", horizontal=TRUE)

mybar3(mydata$Age_Cat,"Age")

#########################################################################
Insurance, Q77
#########################################################################
mydata%>%count(Insurance)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
|
Insurance
|
n
|
perc
|
- No
|
113
|
0.184
|
- Yes
|
501
|
0.816
|
mybar(mydf1,mydf1$n,mydf1$Insurance,mydf1$perc,"Insurance")

mybar3(mydata$Insurance,"Insurance")

#########################################################################
Vaccine Views, Q40-Q44
#########################################################################
mydata%>%count(Child_Vax)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
|
Child_Vax
|
n
|
perc
|
- Not Sure
|
5
|
0.008
|
- None
|
5
|
0.008
|
- Partially
|
54
|
0.088
|
- Completely
|
550
|
0.896
|
a=mybar2(mydf1,mydf1$n,mydf1$Child_Vax,mydf1$perc,"Child Vaccinations")
mydata%>%count(Annual_Flu_Shot)%>%mutate(perc = n / nrow(mydata)) -> mydf2
myprint(mydf2, colnames(mydf2))
|
Annual_Flu_Shot
|
n
|
perc
|
- No
|
343
|
0.559
|
- Yes
|
271
|
0.441
|
b=mybar2(mydf2,mydf2$n,mydf2$Annual_Flu_Shot,mydf2$perc,"Annual Flu Shot")
mydata%>%count(Meningitis_Required)%>%mutate(perc = n / nrow(mydata)) -> mydf3
myprint(mydf3, colnames(mydf3))
|
Meningitis_Required
|
n
|
perc
|
- Illegal & Unethical
|
6
|
0.010
|
- Illegal but Ethical
|
12
|
0.020
|
- Legal but Unethical
|
41
|
0.067
|
- Legal & Ethical
|
555
|
0.904
|
d=mybar2(mydf3,mydf3$n,mydf3$Meningitis_Required,mydf3$perc,"Meningitis Required")
mydata%>%count(COVID_Required)%>%mutate(perc = n / nrow(mydata)) -> mydf4
myprint(mydf4, colnames(mydf4))
|
COVID_Required
|
n
|
perc
|
- Absolutely Not
|
110
|
0.179
|
- Probably Not
|
179
|
0.292
|
- Probably Yes
|
84
|
0.137
|
- Absolutely Yes
|
241
|
0.393
|
e=mybar2(mydf4,mydf4$n,mydf4$COVID_Required,mydf4$perc,"COVID-19 Required")
print(ggarrange(d,e,ncol=2))

print(ggarrange(a,b,ncol=2))

mybar3(mydata$Child_Vax,"Vaccination as Child", 90)

#########################################################################
Personal Familiarity, Q3-5
#########################################################################
mydata%>%count(Greater_Risk_COVID)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
|
Greater_Risk_COVID
|
n
|
perc
|
- No
|
176
|
0.287
|
- Yes
|
438
|
0.713
|
a=mybar(mydf1,mydf1$n,mydf1$Greater_Risk_COVID,mydf1$perc,"Greater Risk COVID")
mydata%>%count(Had_COVID)%>%mutate(perc = n / nrow(mydata)) -> mydf2
myprint(mydf2, colnames(mydf2))
|
Had_COVID
|
n
|
perc
|
- No
|
129
|
0.21
|
- Yes
|
485
|
0.79
|
b=mybar(mydf2,mydf2$n,mydf2$Had_COVID,mydf2$perc,"Had COVID")
mydata%>%count(Died_COVID)%>%mutate(perc = n / nrow(mydata)) -> mydf3
myprint(mydf3, colnames(mydf3))
|
Died_COVID
|
n
|
perc
|
- No
|
511
|
0.832
|
- Yes
|
103
|
0.168
|
d=mybar(mydf3,mydf3$n,mydf3$Died_COVID,mydf3$perc,"Died COVID")
print(ggarrange(a,b,d, nrow=3))

mybar3(mydata$Greater_Risk_COVID,"Greater Risk COVID")

mybar3(mydata$Had_COVID,"Had COVID")

mybar3(mydata$Died_COVID,"Knew Somebody who Died of COVID")

#########################################################################
Behavior, Q6-8
#########################################################################
mydata%>%count(Wash_Hands)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
|
Wash_Hands
|
n
|
perc
|
- Almost Never
|
30
|
0.049
|
- Sometimes
|
96
|
0.156
|
- Often
|
206
|
0.336
|
- Always
|
282
|
0.459
|
a=mybar2(mydf1,mydf1$n,mydf1$Wash_Hands,mydf1$perc,"Wash Hands")
mydata%>%count(Wear_Mask)%>%mutate(perc = n / nrow(mydata)) -> mydf2
myprint(mydf2, colnames(mydf2))
|
Wear_Mask
|
n
|
perc
|
- Almost Never
|
11
|
0.018
|
- Sometimes
|
25
|
0.041
|
- Often
|
56
|
0.091
|
- Always
|
522
|
0.850
|
b=mybar2(mydf2,mydf2$n,mydf2$Wear_Mask,mydf2$perc,"Wear Mask")
mydata%>%count(Social_Distance)%>%mutate(perc = n / nrow(mydata)) -> mydf3
myprint(mydf3, colnames(mydf3))
|
Social_Distance
|
n
|
perc
|
- Almost Never
|
22
|
0.036
|
- Sometimes
|
68
|
0.111
|
- Often
|
230
|
0.375
|
- Always
|
294
|
0.479
|
d=mybar2(mydf3,mydf3$n,mydf3$Social_Distance,mydf3$perc,"Social Distance")
print(ggarrange(a,b,d, ncol=2))
## $`1`

##
## $`2`

##
## attr(,"class")
## [1] "list" "ggarrange"
mybar3(mydata$Wash_Hands,"Wash Hands", 90)

mybar3(mydata$Wear_Mask, "Wear Mask", 90)

mybar3(mydata$Social_Distance, "Social Distance",90)

#######################################################################
Knowledge, Q9-13
#########################################################################
mydata%>%count(Air_Spread)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
|
Air_Spread
|
n
|
perc
|
- Incorrect
|
99
|
0.161
|
- Correct
|
515
|
0.839
|
a=mybar2(mydf1,mydf1$n,mydf1$Air_Spread,mydf1$perc,"Air Spread")
mydata%>%count(Mask_Effective)%>%mutate(perc = n / nrow(mydata)) -> mydf2
myprint(mydf2, colnames(mydf2))
|
Mask_Effective
|
n
|
perc
|
- Incorrect
|
40
|
0.065
|
- Correct
|
574
|
0.935
|
b=mybar2(mydf2,mydf2$n,mydf2$Mask_Effective,mydf2$perc,"Mask Effective")
mydata%>%count(Asymptomatic_Pose_Risk)%>%mutate(perc = n / nrow(mydata)) -> mydf3
myprint(mydf3, colnames(mydf3))
|
Asymptomatic_Pose_Risk
|
n
|
perc
|
- Incorrect
|
34
|
0.055
|
- Correct
|
580
|
0.945
|
d=mybar2(mydf3,mydf3$n,mydf3$Asymptomatic_Pose_Risk,mydf3$perc,"Asymptomatic Risk")
mydata%>%count(College_Deaths)%>%mutate(perc = n / nrow(mydata)) -> mydf4
myprint(mydf4, colnames(mydf4))
|
College_Deaths
|
n
|
perc
|
- Incorrect
|
37
|
0.06
|
- Correct
|
577
|
0.94
|
e=mybar2(mydf4,mydf4$n,mydf4$College_Deaths,mydf4$perc,"College Deaths")
mydata%>%count(Mostly_Mild)%>%mutate(perc = n / nrow(mydata)) -> mydf5
myprint(mydf5, colnames(mydf5))
|
Mostly_Mild
|
n
|
perc
|
- Incorrect
|
137
|
0.223
|
- Correct
|
477
|
0.777
|
f=mybar2(mydf5,mydf5$n,mydf5$Mostly_Mild,mydf5$perc,"Mostly Mild")
print(ggarrange(a,b,d,e,f))

p1=table(mydata$Knowledge_Score)
p1=t(p1)
myprint(round(100*p1/416,2), c("1 Correct", "2 Correct", "3 Correct", "4 Correct", "5 Correct"))
|
1 Correct
|
2 Correct
|
3 Correct
|
4 Correct
|
5 Correct
|
|
0.96
|
2.64
|
11.1
|
49.5
|
83.4
|
mybar3(mydata$Knowledge_Score,"Knowledge Score", 90)

#########################################################################
Likelihood Assessment, Q15-16
#########################################################################
p2=table(mydata$Likely_to_Get)
p3=table(mydata$Likely_Get_Sick)
p1=t(round(100*p1/614,2))
myt=cbind(p2,p3)/614
## Warning in cbind(p2, p3): number of rows of result is not a multiple of vector
## length (arg 1)
myt=round(100*myt,2)
colnames(myt)=c("Likely to Get COVID", "Likely to Get Sick")
myprint(myt, colnames(myt))
|
|
Likely to Get COVID
|
Likely to Get Sick
|
- Already Had
|
11.2
|
8.14
|
- Extremely Unlikely
|
33.2
|
12.05
|
- Unlikely
|
45.0
|
48.21
|
- Likely
|
10.6
|
23.62
|
- Extremely Likely
|
11.2
|
7.98
|
mybar3(mydata$Likely_Get_Sick,"Likely to Get Sick")

mybar3(mydata$Likely_to_Get, "Likely to Get COVID")

#########################################################################
Risk Factors, Q17-24
#########################################################################
myd=t(round(describe(mydata$Risk_Score),3))
myprint(myd, "Risk Score")
|
|
Risk Score
|
|
vars
|
1.000
|
|
n
|
614.000
|
|
mean
|
0.568
|
|
sd
|
0.201
|
|
median
|
0.625
|
|
trimmed
|
0.578
|
|
mad
|
0.185
|
|
min
|
0.000
|
|
max
|
1.000
|
|
range
|
1.000
|
|
skew
|
-0.491
|
|
kurtosis
|
-0.395
|
|
se
|
0.008
|
myt=matrix(rep(0, 16), nrow=2)
for (i in 27:34){
t=table(mydata[,i])
myt[,i-26]=t
print(mybar3(mydata[,i], colnames(mydata)[i]))
}








colnames(myt)=colnames(mydata[, 27:34])
rownames(myt)=c("No", "Yes")
myt=round(myt*100/614, 2)
myt=t(myt)
myprint(myt, colnames(myt))
|
|
No
|
Yes
|
|
Live_Others
|
8.31
|
91.7
|
|
Class_in_Person
|
45.28
|
54.7
|
|
Inperson_Job
|
48.21
|
51.8
|
|
Medical_Condition
|
85.50
|
14.5
|
|
Super_Spreader
|
77.69
|
22.3
|
|
No_Distancing
|
66.29
|
33.7
|
|
No_Mask
|
74.27
|
25.7
|
|
Travel
|
48.70
|
51.3
|
#########################################################################
Risk Assessment, Q25-32
#########################################################################
myd=t(round(describe(mydata$Risk_Knowledge_Score),3))
myprint(myd, "Risk Knowledge Score")
|
|
Risk Knowledge Score
|
|
vars
|
1.000
|
|
n
|
614.000
|
|
mean
|
25.049
|
|
sd
|
4.934
|
|
median
|
26.000
|
|
trimmed
|
25.732
|
|
mad
|
4.448
|
|
min
|
8.000
|
|
max
|
32.000
|
|
range
|
24.000
|
|
skew
|
-1.309
|
|
kurtosis
|
1.636
|
|
se
|
0.199
|
myt=matrix(rep(0, 4*8), nrow=4)
for (i in 35:42){
t=table(mydata[,i])
myt[,i-34]=t
print(mybar3(mydata[,i], colnames(mydata)[i]), 90)
}








colnames(myt)=colnames(mydata[, 35:42])
rownames(myt)=c("Extremely Safe", "Somewhat Safe", "Somewhat Risky", "Extremely Risky")
myt=round(100*myt/614, 2)
myt=t(myt)
myprint(myt, colnames(myt))
|
|
Extremely Safe
|
Somewhat Safe
|
Somewhat Risky
|
Extremely Risky
|
|
Safe_Live_Others
|
18.89
|
44.95
|
31.6
|
4.56
|
|
Safe_Attend_Class
|
11.40
|
23.13
|
43.3
|
22.15
|
|
Safe_F2F_Job
|
7.00
|
17.75
|
44.3
|
30.94
|
|
Safe_Underlying_Condition
|
4.07
|
6.03
|
23.1
|
66.78
|
|
Safe_Gatherings..50
|
2.61
|
6.19
|
19.5
|
71.66
|
|
Safe_Socials_No_Distancing
|
3.09
|
6.03
|
27.2
|
63.68
|
|
Safe_Socials_No_Mask
|
3.42
|
5.37
|
22.8
|
68.40
|
|
Safe_Travel
|
5.37
|
24.43
|
47.9
|
22.31
|
#########################################################################
Concern Self & Others, Q14, Q33-34
#########################################################################
mydata%>%count(Concern_Self)%>%mutate(perc = n / nrow(mydata)) -> mydf1
mydata%>%count(Concern_Others)%>%mutate(perc = n / nrow(mydata)) -> mydf2
mydata%>%count(Change_Others)%>%mutate(perc = n / nrow(mydata)) -> mydf3
myprint(mydf1,colnames(mydf1))
|
Concern_Self
|
n
|
perc
|
- Not at all
|
105
|
0.171
|
- Slightly
|
204
|
0.332
|
- Moderately
|
231
|
0.376
|
- Extremely
|
74
|
0.121
|
myprint(mydf2,colnames(mydf2))
|
Concern_Others
|
n
|
perc
|
- Not at All
|
29
|
0.047
|
- Slightly
|
128
|
0.208
|
- Moderately
|
262
|
0.427
|
- Extremely
|
195
|
0.318
|
myprint(mydf3,colnames(mydf3))
|
Change_Others
|
n
|
perc
|
- Very Unwilling
|
25
|
0.041
|
- Somewhat Unwilling
|
25
|
0.041
|
- Somewhat Willing
|
127
|
0.207
|
- Willing
|
437
|
0.712
|
a=mybar2(mydf1,mydf1$n,mydf1$Concern_Self,mydf1$perc,"Concern for Self")
b=mybar2(mydf2,mydf2$n,mydf2$Concern_Others,mydf2$perc,"Concern for Others")
d=mybar2(mydf3,mydf3$n,mydf3$Change_Others,mydf3$perc,"Change for Others")
a

b

d

mybar3(mydata$Concern_Self,"Concern for Self")

mybar3(mydata$Concern_Others, "Concern for Others")

#########################################################################
Hesitancy, Q35-39
#########################################################################
myt=matrix(rep(0, 2*5), nrow=2)
for (i in 46:50){
t=table(mydata[,i])
myt[,i-45]=t
print(mybar3(mydata[,i], colnames(mydata)[i]))
}





colnames(myt)=colnames(mydata[, 46:50])
rownames(myt)=c("Incorrect", "Correct")
myt=round(100*myt/614, 2)
myt=t(myt)
myprint(myt, colnames(myt))
|
|
Incorrect
|
Correct
|
|
Definition_Vaccine
|
4.56
|
95.4
|
|
Safety_Vaccine
|
14.66
|
85.3
|
|
Better_to_be_Sick
|
18.24
|
81.8
|
|
Over_Vaccination_Harmful
|
44.30
|
55.7
|
|
Herd_Immunity
|
18.89
|
81.1
|
p1=table(mydata$Hesitancy_Score)
p1=t(p1)
myprint(round(100*p1/416,2), c("0 Correct","1 Correct", "2 Correct", "3 Correct", "4 Correct", "5 Correct"))
|
0 Correct
|
1 Correct
|
2 Correct
|
3 Correct
|
4 Correct
|
5 Correct
|
|
0.96
|
3.37
|
8.89
|
27.2
|
49.3
|
57.9
|
#########################################################################
Scores ~ Vaccine Willingness
describe(mydata)
## vars n mean sd median trimmed mad min
## ID 1 614 307.50 177.39 307.50 307.50 227.58 1.00
## Willingness_4Cat* 2 614 3.08 0.97 3.00 3.23 1.48 1.00
## Willingness_2Cat* 3 614 1.78 0.41 2.00 1.85 0.00 1.00
## Behavior_Reverse 4 614 0.16 0.16 0.09 0.13 0.13 0.00
## Knowledge_Reverse 5 614 0.11 0.15 0.00 0.08 0.00 0.00
## Risk_Knowledge_Reverse 6 614 0.22 0.16 0.19 0.20 0.14 0.00
## Hesitancy 7 614 0.20 0.21 0.20 0.17 0.30 0.00
## Risk_Score 8 614 0.57 0.20 0.62 0.58 0.19 0.00
## News_Frequency 9 614 0.55 0.14 0.55 0.54 0.13 0.25
## Trust 10 614 0.60 0.12 0.59 0.60 0.10 0.25
## Greater_Risk_COVID* 11 614 1.71 0.45 2.00 1.77 0.00 1.00
## Had_COVID* 12 614 1.79 0.41 2.00 1.86 0.00 1.00
## Died_COVID* 13 614 1.17 0.37 1.00 1.09 0.00 1.00
## Wash_Hands* 14 614 3.21 0.88 3.00 3.32 1.48 1.00
## Wear_Mask* 15 614 3.77 0.60 4.00 3.94 0.00 1.00
## Social_Distance* 16 614 3.30 0.80 3.00 3.41 1.48 1.00
## Behavior_Score 17 614 10.28 1.76 11.00 10.55 1.48 3.00
## Air_Spread* 18 614 1.84 0.37 2.00 1.92 0.00 1.00
## Mask_Effective* 19 614 1.93 0.25 2.00 2.00 0.00 1.00
## Asymptomatic_Pose_Risk* 20 614 1.94 0.23 2.00 2.00 0.00 1.00
## College_Deaths* 21 614 1.94 0.24 2.00 2.00 0.00 1.00
## Mostly_Mild* 22 614 1.78 0.42 2.00 1.85 0.00 1.00
## Knowledge_Score* 23 614 4.43 0.76 5.00 4.58 0.00 1.00
## Concern_Self* 24 614 2.45 0.91 2.00 2.43 1.48 1.00
## Likely_to_Get* 25 614 2.55 0.83 3.00 2.56 1.48 1.00
## Likely_Get_Sick* 26 614 3.11 1.00 3.00 3.14 1.48 1.00
## Live_Others* 27 614 1.92 0.28 2.00 2.00 0.00 1.00
## Class_in_Person* 28 614 1.55 0.50 2.00 1.56 0.00 1.00
## Inperson_Job* 29 614 1.52 0.50 2.00 1.52 0.00 1.00
## Medical_Condition* 30 614 1.14 0.35 1.00 1.06 0.00 1.00
## Super_Spreader* 31 614 1.22 0.42 1.00 1.15 0.00 1.00
## No_Distancing* 32 614 1.34 0.47 1.00 1.30 0.00 1.00
## No_Mask* 33 614 1.26 0.44 1.00 1.20 0.00 1.00
## Travel* 34 614 1.51 0.50 2.00 1.52 0.00 1.00
## Safe_Live_Others* 35 614 2.22 0.80 2.00 2.22 1.48 1.00
## Safe_Attend_Class* 36 614 2.76 0.92 3.00 2.83 1.48 1.00
## Safe_F2F_Job* 37 614 2.99 0.88 3.00 3.08 1.48 1.00
## Safe_Underlying_Condition* 38 614 3.53 0.78 4.00 3.71 0.00 1.00
## Safe_Gatherings..50* 39 614 3.60 0.72 4.00 3.77 0.00 1.00
## Safe_Socials_No_Distancing* 40 614 3.51 0.75 4.00 3.67 0.00 1.00
## Safe_Socials_No_Mask* 41 614 3.56 0.75 4.00 3.73 0.00 1.00
## Safe_Travel* 42 614 2.87 0.82 3.00 2.91 1.48 1.00
## Risk_Knowledge_Score 43 614 25.05 4.93 26.00 25.73 4.45 8.00
## Concern_Others* 44 614 3.01 0.85 3.00 3.08 1.48 1.00
## Change_Others* 45 614 3.59 0.75 4.00 3.76 0.00 1.00
## Definition_Vaccine* 46 614 1.95 0.21 2.00 2.00 0.00 1.00
## Safety_Vaccine* 47 614 1.85 0.35 2.00 1.94 0.00 1.00
## Better_to_be_Sick* 48 614 1.82 0.39 2.00 1.90 0.00 1.00
## Over_Vaccination_Harmful* 49 614 1.56 0.50 2.00 1.57 0.00 1.00
## Herd_Immunity* 50 614 1.81 0.39 2.00 1.89 0.00 1.00
## Hesitancy_Score* 51 614 4.99 1.06 5.00 5.15 1.48 1.00
## Child_Vax* 52 614 3.87 0.42 4.00 3.99 0.00 1.00
## Annual_Flu_Shot* 53 614 1.44 0.50 1.00 1.43 0.00 1.00
## Flu_Shot_Reason* 54 614 5.27 2.73 5.00 5.38 4.45 1.00
## Meningitis_Required* 55 614 3.86 0.46 4.00 4.00 0.00 1.00
## COVID_Required* 56 614 2.74 1.16 3.00 2.80 1.48 1.00
## Family_Accept* 57 614 2.42 1.05 3.00 2.40 1.48 1.00
## Friends_Accept* 58 614 2.14 1.06 2.00 2.06 1.48 1.00
## US_Accept* 59 614 2.65 0.68 3.00 2.66 0.00 1.00
## CDC_Freq* 60 614 2.24 0.87 2.00 2.19 1.48 1.00
## Local_News_Freq* 61 614 2.13 0.88 2.00 2.08 1.48 1.00
## National_News_Freq* 62 614 2.39 0.92 2.00 2.36 1.48 1.00
## International_News_Freq* 63 614 1.98 0.87 2.00 1.90 1.48 1.00
## Social_Media_Freq* 64 614 2.61 1.05 3.00 2.64 1.48 1.00
## Texas_State_Freq* 65 614 2.44 0.88 2.00 2.43 1.48 1.00
## College_Friends_Freq* 66 614 2.16 0.89 2.00 2.08 1.48 1.00
## Parents_Freq* 67 614 2.39 0.93 2.00 2.36 1.48 1.00
## Professors_Freq* 68 614 1.92 0.85 2.00 1.84 1.48 1.00
## Adults_Freq* 69 614 1.84 0.81 2.00 1.74 1.48 1.00
## Doctor_Freq* 70 614 2.13 0.97 2.00 2.03 1.48 1.00
## Frequency_Score 71 614 24.23 6.05 24.00 23.97 5.93 11.00
## Trust_CDC* 72 614 3.16 0.89 3.00 3.26 1.48 1.00
## Trust_Local* 73 614 2.25 0.71 2.00 2.26 0.00 1.00
## Trust_National* 74 614 2.28 0.78 2.00 2.28 1.48 1.00
## Trust_International* 75 614 2.32 0.79 2.00 2.32 1.48 1.00
## Trust_Social_Media* 76 614 1.70 0.62 2.00 1.65 0.00 1.00
## Trust_Texas_State* 77 614 2.67 0.78 3.00 2.67 1.48 1.00
## Trust_Friends* 78 614 1.93 0.59 2.00 1.90 0.00 1.00
## Trust_Parents* 79 614 2.44 0.77 2.00 2.43 1.48 1.00
## Trust_Professors* 80 614 2.34 0.70 2.00 2.36 0.00 1.00
## Trust_Adults* 81 614 2.01 0.60 2.00 2.00 0.00 1.00
## Trust_Doctor* 82 614 3.20 0.79 3.00 3.28 1.48 1.00
## Trust_Score 83 614 26.29 5.11 26.00 26.49 4.45 11.00
## Politics_5Cat* 84 614 2.00 1.12 2.00 1.88 1.48 1.00
## Politics_3Cat* 85 614 1.71 0.76 2.00 1.63 1.48 1.00
## Race* 86 614 1.77 1.00 1.00 1.58 0.00 1.00
## White* 87 614 1.50 0.50 1.00 1.50 0.00 1.00
## Hispanic* 88 614 1.36 0.48 1.00 1.33 0.00 1.00
## Gender* 89 614 1.53 0.54 2.00 1.51 1.48 1.00
## Gender_2Cat* 90 614 1.50 0.50 1.50 1.50 0.74 1.00
## Age 91 614 20.30 1.50 20.00 20.27 1.48 18.00
## Age_Cat* 92 614 3.30 1.49 3.00 3.27 1.48 1.00
## Year* 93 614 2.76 1.11 3.00 2.83 1.48 1.00
## Health_Maj_Min* 94 614 1.38 0.49 1.00 1.36 0.00 1.00
## Income* 95 614 2.92 1.50 3.00 2.90 1.48 1.00
## Insurance* 96 614 1.82 0.39 2.00 1.89 0.00 1.00
## max range skew kurtosis se
## ID 614.00 613.00 0.00 -1.21 7.16
## Willingness_4Cat* 4.00 3.00 -0.86 -0.24 0.04
## Willingness_2Cat* 2.00 1.00 -1.35 -0.18 0.02
## Behavior_Reverse 0.82 0.82 1.47 2.44 0.01
## Knowledge_Reverse 0.80 0.80 1.51 2.68 0.01
## Risk_Knowledge_Reverse 0.77 0.77 1.31 1.64 0.01
## Hesitancy 1.00 1.00 1.07 0.94 0.01
## Risk_Score 1.00 1.00 -0.49 -0.40 0.01
## News_Frequency 1.00 0.75 0.45 0.32 0.01
## Trust 1.00 0.75 -0.27 0.81 0.00
## Greater_Risk_COVID* 2.00 1.00 -0.94 -1.12 0.02
## Had_COVID* 2.00 1.00 -1.42 0.02 0.02
## Died_COVID* 2.00 1.00 1.77 1.15 0.02
## Wash_Hands* 4.00 3.00 -0.84 -0.19 0.04
## Wear_Mask* 4.00 3.00 -2.91 8.27 0.02
## Social_Distance* 4.00 3.00 -1.00 0.42 0.03
## Behavior_Score 12.00 9.00 -1.47 2.44 0.07
## Air_Spread* 2.00 1.00 -1.84 1.38 0.01
## Mask_Effective* 2.00 1.00 -3.52 10.38 0.01
## Asymptomatic_Pose_Risk* 2.00 1.00 -3.88 13.06 0.01
## College_Deaths* 2.00 1.00 -3.69 11.61 0.01
## Mostly_Mild* 2.00 1.00 -1.33 -0.24 0.02
## Knowledge_Score* 5.00 4.00 -1.51 2.68 0.03
## Concern_Self* 4.00 3.00 -0.04 -0.83 0.04
## Likely_to_Get* 4.00 3.00 -0.19 -0.52 0.03
## Likely_Get_Sick* 5.00 4.00 -0.24 0.02 0.04
## Live_Others* 2.00 1.00 -3.01 7.10 0.01
## Class_in_Person* 2.00 1.00 -0.19 -1.97 0.02
## Inperson_Job* 2.00 1.00 -0.07 -2.00 0.02
## Medical_Condition* 2.00 1.00 2.01 2.05 0.01
## Super_Spreader* 2.00 1.00 1.33 -0.24 0.02
## No_Distancing* 2.00 1.00 0.69 -1.53 0.02
## No_Mask* 2.00 1.00 1.11 -0.77 0.02
## Travel* 2.00 1.00 -0.05 -2.00 0.02
## Safe_Live_Others* 4.00 3.00 0.12 -0.58 0.03
## Safe_Attend_Class* 4.00 3.00 -0.38 -0.67 0.04
## Safe_F2F_Job* 4.00 3.00 -0.61 -0.30 0.04
## Safe_Underlying_Condition* 4.00 3.00 -1.73 2.38 0.03
## Safe_Gatherings..50* 4.00 3.00 -1.90 3.08 0.03
## Safe_Socials_No_Distancing* 4.00 3.00 -1.60 2.19 0.03
## Safe_Socials_No_Mask* 4.00 3.00 -1.82 2.88 0.03
## Safe_Travel* 4.00 3.00 -0.35 -0.39 0.03
## Risk_Knowledge_Score 32.00 24.00 -1.31 1.64 0.20
## Concern_Others* 4.00 3.00 -0.50 -0.47 0.03
## Change_Others* 4.00 3.00 -2.01 3.61 0.03
## Definition_Vaccine* 2.00 1.00 -4.35 16.91 0.01
## Safety_Vaccine* 2.00 1.00 -1.99 1.98 0.01
## Better_to_be_Sick* 2.00 1.00 -1.64 0.69 0.02
## Over_Vaccination_Harmful* 2.00 1.00 -0.23 -1.95 0.02
## Herd_Immunity* 2.00 1.00 -1.59 0.51 0.02
## Hesitancy_Score* 6.00 5.00 -1.07 0.94 0.04
## Child_Vax* 4.00 3.00 -4.04 19.34 0.02
## Annual_Flu_Shot* 2.00 1.00 0.24 -1.95 0.02
## Flu_Shot_Reason* 10.00 9.00 -0.23 -1.28 0.11
## Meningitis_Required* 4.00 3.00 -3.99 17.17 0.02
## COVID_Required* 4.00 3.00 -0.18 -1.48 0.05
## Family_Accept* 4.00 3.00 -0.20 -1.29 0.04
## Friends_Accept* 4.00 3.00 0.21 -1.38 0.04
## US_Accept* 4.00 3.00 -0.34 0.03 0.03
## CDC_Freq* 4.00 3.00 0.35 -0.52 0.04
## Local_News_Freq* 4.00 3.00 0.38 -0.59 0.04
## National_News_Freq* 4.00 3.00 0.11 -0.83 0.04
## International_News_Freq* 4.00 3.00 0.58 -0.40 0.04
## Social_Media_Freq* 4.00 3.00 -0.11 -1.20 0.04
## Texas_State_Freq* 4.00 3.00 0.11 -0.70 0.04
## College_Friends_Freq* 4.00 3.00 0.47 -0.46 0.04
## Parents_Freq* 4.00 3.00 0.24 -0.80 0.04
## Professors_Freq* 4.00 3.00 0.65 -0.23 0.03
## Adults_Freq* 4.00 3.00 0.80 0.22 0.03
## Doctor_Freq* 4.00 3.00 0.52 -0.71 0.04
## Frequency_Score 44.00 33.00 0.45 0.32 0.24
## Trust_CDC* 4.00 3.00 -0.68 -0.60 0.04
## Trust_Local* 4.00 3.00 0.19 -0.14 0.03
## Trust_National* 4.00 3.00 0.14 -0.42 0.03
## Trust_International* 4.00 3.00 0.14 -0.43 0.03
## Trust_Social_Media* 4.00 3.00 0.47 0.17 0.03
## Trust_Texas_State* 4.00 3.00 -0.19 -0.35 0.03
## Trust_Friends* 4.00 3.00 0.25 0.76 0.02
## Trust_Parents* 4.00 3.00 0.16 -0.35 0.03
## Trust_Professors* 4.00 3.00 0.18 -0.14 0.03
## Trust_Adults* 4.00 3.00 0.31 0.74 0.02
## Trust_Doctor* 4.00 3.00 -0.66 -0.29 0.03
## Trust_Score 44.00 33.00 -0.27 0.81 0.21
## Politics_5Cat* 4.00 3.00 0.62 -1.09 0.05
## Politics_3Cat* 3.00 2.00 0.55 -1.09 0.03
## Race* 5.00 4.00 1.45 1.67 0.04
## White* 2.00 1.00 0.01 -2.00 0.02
## Hispanic* 2.00 1.00 0.58 -1.66 0.02
## Gender* 3.00 2.00 0.29 -1.07 0.02
## Gender_2Cat* 2.00 1.00 0.00 -2.00 0.02
## Age 24.00 6.00 0.11 -0.91 0.06
## Age_Cat* 6.00 5.00 0.09 -0.96 0.06
## Year* 4.00 3.00 -0.32 -1.25 0.04
## Health_Maj_Min* 2.00 1.00 0.47 -1.78 0.02
## Income* 5.00 4.00 0.07 -1.43 0.06
## Insurance* 2.00 1.00 -1.63 0.65 0.02
a=boxplot(mydata$Behavior_Reverse~mydata$Willingness_4Cat, plot=F)$stats
a1=boxplot(mydata$Knowledge_Reverse~mydata$Willingness_4Cat, plot=F)$stats
a2=boxplot(mydata$Risk_Knowledge_Reverse~mydata$Willingness_4Cat, plot=F)$stats
a3=boxplot(mydata$Risk_Score~mydata$Willingness_4Cat, plot=F)$stats
a4=boxplot(mydata$Hesitancy~mydata$Willingness_4Cat, plot=F)$stats
colnames(a)=colnames(a1)=colnames(a2)=colnames(a3)=colnames(a4)=levels(mydata$Willingness_4Cat)
rownames(a)=rownames(a1)=rownames(a2)=rownames(a3)=rownames(a4)= c("Min", "1st Quartile", "Median", "3d Quartile", "Max")
a%>%kbl(caption="Behavior")
Behavior
|
|
- Never
|
- If Required
|
- Eventually
|
- ASAP
|
|
Min
|
0.000
|
0.000
|
0.000
|
0.000
|
|
1st Quartile
|
0.091
|
0.091
|
0.091
|
0.000
|
|
Median
|
0.273
|
0.182
|
0.091
|
0.091
|
|
3d Quartile
|
0.455
|
0.273
|
0.227
|
0.182
|
|
Max
|
0.818
|
0.545
|
0.364
|
0.455
|
a1%>%kbl(caption="Knowledge")
Knowledge
|
|
- Never
|
- If Required
|
- Eventually
|
- ASAP
|
|
Min
|
0.0
|
0.0
|
0.0
|
0.0
|
|
1st Quartile
|
0.0
|
0.0
|
0.0
|
0.0
|
|
Median
|
0.2
|
0.0
|
0.0
|
0.0
|
|
3d Quartile
|
0.4
|
0.2
|
0.2
|
0.2
|
|
Max
|
0.8
|
0.4
|
0.4
|
0.4
|
a2%>%kbl(caption="Risk Knowledge")
Risk Knowledge
|
|
- Never
|
- If Required
|
- Eventually
|
- ASAP
|
|
Min
|
0.000
|
0.000
|
0.000
|
0.000
|
|
1st Quartile
|
0.194
|
0.161
|
0.097
|
0.097
|
|
Median
|
0.323
|
0.226
|
0.161
|
0.161
|
|
3d Quartile
|
0.581
|
0.355
|
0.258
|
0.258
|
|
Max
|
0.774
|
0.581
|
0.484
|
0.484
|
a3%>%kbl(caption="Risk Score")
Risk Score
|
|
- Never
|
- If Required
|
- Eventually
|
- ASAP
|
|
Min
|
0.125
|
0.000
|
0.125
|
0.125
|
|
1st Quartile
|
0.250
|
0.375
|
0.500
|
0.500
|
|
Median
|
0.500
|
0.625
|
0.625
|
0.625
|
|
3d Quartile
|
0.750
|
0.750
|
0.750
|
0.750
|
|
Max
|
0.875
|
0.875
|
0.875
|
1.000
|
a4%>%kbl(caption="Hesitancy")
Hesitancy
|
|
- Never
|
- If Required
|
- Eventually
|
- ASAP
|
|
Min
|
0.0
|
0.0
|
0.0
|
0.0
|
|
1st Quartile
|
0.2
|
0.2
|
0.0
|
0.0
|
|
Median
|
0.4
|
0.3
|
0.2
|
0.0
|
|
3d Quartile
|
0.6
|
0.4
|
0.2
|
0.2
|
|
Max
|
1.0
|
0.6
|
0.4
|
0.4
|
Acceptance, Q45-47
table(mydata$Willingness_4Cat, mydata$Family_Accept)%>%kbl(caption="Family Accept")
Family Accept
|
|
Somewhat Likely
|
Somewhat Unlikely
|
Very Likely
|
Very Unlikely
|
- Never
|
4
|
14
|
1
|
46
|
- If Required
|
30
|
18
|
3
|
19
|
- Eventually
|
106
|
37
|
73
|
12
|
- ASAP
|
42
|
7
|
197
|
5
|
table(mydata$Willingness_4Cat, mydata$Friends_Accept)%>%kbl(caption="Friends Accept")
Friends Accept
|
|
Somewhat Likely
|
Somewhat Unlikely
|
Very Likely
|
Very Unlikely
|
- Never
|
10
|
16
|
6
|
33
|
- If Required
|
24
|
24
|
10
|
12
|
- Eventually
|
127
|
32
|
60
|
9
|
- ASAP
|
85
|
22
|
138
|
6
|
table(mydata$Willingness_4Cat, mydata$US_Accept)%>%kbl(caption="US Accept")
US Accept
|
|
- 0 to 25%
|
- 25 to 50%
|
- 50 to 75%
|
- 75 to 100%
|
- Never
|
14
|
36
|
14
|
1
|
- If Required
|
7
|
42
|
20
|
1
|
- Eventually
|
6
|
73
|
143
|
6
|
- ASAP
|
3
|
47
|
165
|
36
|
Frequency Versus Trust, Q48-58,Q59-Q69
orderit=function(i){
mydata%>%count(mydata[,i])%>%mutate(perc = n / nrow(mydata)) -> mydf
mydata%>%count(mydata[,i+12])%>%mutate(perc2 = n / nrow(mydata)) -> mydf1
mydf[,1]=mydf1[,1]=NULL
mydf=cbind(mydf,mydf1)
colnames(mydf)=c("n_Freq", "percent_Freq", "n_Trust", "percent_Trust")
return(mydf)
}
mymat=matrix(rep(0, 10*4), ncol=4)
for (i in 60:69){
temp=orderit(i)
temp$Frequency=c("1 Never", "2 Occasionally",
"3 Often", "4 All the Time")
temp$Trust=c("1 No Trust", "2 Some Trust",
"3 Trust", "4 Complete Trust")
print(temp)%>%kbl()%>%kable_classic()
sub=temp[,c(1,3)]
z=chisq.test(sub)
mymat[i-59,1]=colnames(mydata)[i]
mymat[i-59,2]=round(z$statistic, 3)
mymat[i-59,3]=z$parameter
mymat[i-59,4]=round(z$p.value,3)
b=ggplot(temp, aes(x='',y=n_Freq, fill=Frequency,
label=paste0(n_Freq,'\n', " (", scales::percent(percent_Freq), ")")))+
geom_bar(position="stack", stat="identity")+
coord_flip()+
geom_text(size = 3, position = position_stack(vjust = 0.5))+
xlab("")+
ylab("Frequency")+
theme(legend.position="bottom")+
theme(legend.title = element_blank())+
ggtitle(colnames(mydata)[i])
d=ggplot(temp, aes(x='',y=n_Trust, fill=Trust,
label=paste0(n_Trust,'\n', " (", scales::percent(percent_Trust), ")")))+
geom_bar(position="stack", stat="identity")+
coord_flip()+
geom_text(size = 3, position = position_stack(vjust = 0.5))+
xlab("")+
ylab("Trust")+
theme(legend.position="bottom")+
theme(legend.title = element_blank())+
ggtitle(colnames(mydata)[i+12])
print(ggarrange(b,d,nrow=2))
}
## n_Freq percent_Freq n_Trust percent_Trust Frequency Trust
## 1 119 0.1938 26 0.0423 1 Never 1 No Trust
## 2 283 0.4609 124 0.2020 2 Occasionally 2 Some Trust
## 3 155 0.2524 188 0.3062 3 Often 3 Trust
## 4 57 0.0928 276 0.4495 4 All the Time 4 Complete Trust

## n_Freq percent_Freq n_Trust percent_Trust Frequency Trust
## 1 157 0.2557 76 0.1238 1 Never 1 No Trust
## 2 263 0.4283 332 0.5407 2 Occasionally 2 Some Trust
## 3 150 0.2443 185 0.3013 3 Often 3 Trust
## 4 44 0.0717 21 0.0342 4 All the Time 4 Complete Trust

## n_Freq percent_Freq n_Trust percent_Trust Frequency Trust
## 1 110 0.179 93 0.1515 1 Never 1 No Trust
## 2 230 0.375 291 0.4739 2 Occasionally 2 Some Trust
## 3 198 0.322 198 0.3225 3 Often 3 Trust
## 4 76 0.124 32 0.0521 4 All the Time 4 Complete Trust

## n_Freq percent_Freq n_Trust percent_Trust Frequency Trust
## 1 204 0.3322 86 0.1401 1 Never 1 No Trust
## 2 257 0.4186 284 0.4625 2 Occasionally 2 Some Trust
## 3 117 0.1906 204 0.3322 3 Often 3 Trust
## 4 36 0.0586 40 0.0651 4 All the Time 4 Complete Trust

## n_Freq percent_Freq n_Trust percent_Trust Frequency Trust
## 1 111 0.181 233 0.37948 1 Never 1 No Trust
## 2 172 0.280 335 0.54560 2 Occasionally 2 Some Trust
## 3 177 0.288 42 0.06840 3 Often 3 Trust
## 4 154 0.251 4 0.00651 4 All the Time 4 Complete Trust

## n_Freq percent_Freq n_Trust percent_Trust Frequency Trust
## 1 86 0.140 41 0.0668 1 Never 1 No Trust
## 2 247 0.402 198 0.3225 2 Occasionally 2 Some Trust
## 3 205 0.334 296 0.4821 3 Often 3 Trust
## 4 76 0.124 79 0.1287 4 All the Time 4 Complete Trust

## n_Freq percent_Freq n_Trust percent_Trust Frequency Trust
## 1 145 0.2362 124 0.20195 1 Never 1 No Trust
## 2 283 0.4609 413 0.67264 2 Occasionally 2 Some Trust
## 3 129 0.2101 72 0.11726 3 Often 3 Trust
## 4 57 0.0928 5 0.00814 4 All the Time 4 Complete Trust

## n_Freq percent_Freq n_Trust percent_Trust Frequency Trust
## 1 104 0.169 56 0.0912 1 Never 1 No Trust
## 2 257 0.419 285 0.4642 2 Occasionally 2 Some Trust
## 3 165 0.269 222 0.3616 3 Often 3 Trust
## 4 88 0.143 51 0.0831 4 All the Time 4 Complete Trust

## n_Freq percent_Freq n_Trust percent_Trust Frequency Trust
## 1 215 0.3502 56 0.0912 1 Never 1 No Trust
## 2 263 0.4283 322 0.5244 2 Occasionally 2 Some Trust
## 3 104 0.1694 209 0.3404 3 Often 3 Trust
## 4 32 0.0521 27 0.0440 4 All the Time 4 Complete Trust

## n_Freq percent_Freq n_Trust percent_Trust Frequency Trust
## 1 231 0.376 102 0.1661 1 Never 1 No Trust
## 2 278 0.453 411 0.6694 2 Occasionally 2 Some Trust
## 3 78 0.127 94 0.1531 3 Often 3 Trust
## 4 27 0.044 7 0.0114 4 All the Time 4 Complete Trust

colnames(mymat)=c("Freq/Trust Variable", "Chi Squared", "df", "p.value")
myprint(mymat,colnames(mymat))
|
Freq/Trust Variable
|
Chi Squared
|
df
|
p.value
|
|
CDC_Freq
|
268.966
|
3
|
0
|
|
Local_News_Freq
|
47.956
|
3
|
0
|
|
National_News_Freq
|
26.492
|
3
|
0
|
|
International_News_Freq
|
73.151
|
3
|
0
|
|
Social_Media_Freq
|
321.296
|
3
|
0
|
|
Texas_State_Freq
|
37.927
|
3
|
0
|
|
College_Friends_Freq
|
85.698
|
3
|
0
|
|
Parents_Freq
|
34.091
|
3
|
0
|
|
Professors_Freq
|
134.886
|
3
|
0
|
|
Adults_Freq
|
88.899
|
3
|
0
|
Vaccine Views, Q40-Q44
#########################################################################
mydata%>%count(Child_Vax)%>%mutate(perc = n / nrow(mydata)) -> mydf1
myprint(mydf1, colnames(mydf1))
|
Child_Vax
|
n
|
perc
|
- Not Sure
|
5
|
0.008
|
- None
|
5
|
0.008
|
- Partially
|
54
|
0.088
|
- Completely
|
550
|
0.896
|
a=mybar2(mydf1,mydf1$n,mydf1$Child_Vax,mydf1$perc,"Child Vaccinations")
mydata%>%count(Annual_Flu_Shot)%>%mutate(perc = n / nrow(mydata)) -> mydf2
myprint(mydf2, colnames(mydf2))
|
Annual_Flu_Shot
|
n
|
perc
|
- No
|
343
|
0.559
|
- Yes
|
271
|
0.441
|
b=mybar2(mydf2,mydf2$n,mydf2$Annual_Flu_Shot,mydf2$perc,"Annual Flu Shot")
mydata%>%count(Meningitis_Required)%>%mutate(perc = n / nrow(mydata)) -> mydf3
myprint(mydf3, colnames(mydf3))
|
Meningitis_Required
|
n
|
perc
|
- Illegal & Unethical
|
6
|
0.010
|
- Illegal but Ethical
|
12
|
0.020
|
- Legal but Unethical
|
41
|
0.067
|
- Legal & Ethical
|
555
|
0.904
|
d=mybar2(mydf3,mydf3$n,mydf3$Meningitis_Required,mydf3$perc,"Meningitis Required")
mydata%>%count(COVID_Required)%>%mutate(perc = n / nrow(mydata)) -> mydf4
myprint(mydf4, colnames(mydf4))
|
COVID_Required
|
n
|
perc
|
- Absolutely Not
|
110
|
0.179
|
- Probably Not
|
179
|
0.292
|
- Probably Yes
|
84
|
0.137
|
- Absolutely Yes
|
241
|
0.393
|
e=mybar2(mydf4,mydf4$n,mydf4$COVID_Required,mydf4$perc,"COVID-19 Required")
print(ggarrange(d,e,ncol=2))

print(ggarrange(a,b,ncol=2))

mybar3(mydata$Child_Vax,"Vaccination as Child", 90)

#########################################################################
Correlations
We look at hierarchically-clustered correlations.
#########################################################################
for (i in 4:9){mydata[,i]=as.numeric(mydata[,i])}
mydata$White=as.numeric(mydata$White)
for (i in 89:92){mydata[, i]=as.numeric(mydata[,i])}
mycorr=cor(mydata[, c(4:9)])
corfunction(mycorr)

#########################################################################
Kernel Density Estimates
#########################################################################
mysub=subset(mydata, select=c(Behavior_Reverse, Knowledge_Reverse, Risk_Knowledge_Reverse, Hesitancy, News_Frequency, Trust)) #get a subset for plotting
colnames(mysub)=c( "Behavior %", "Knowledge %", "Risk Knowledge%", "Hesitancy %", "News Freq %", "Trust News %") #set column names
kdepairs(mysub) #plot

#########################################################################
Frequency of Choice
#########################################################################
ASAP=mydata[mydata$Willingness_4Cat=="4. ASAP",60:70]
Eventually=mydata[mydata$Willingness_4Cat=="3. Eventually",60:70]
IfRequired=mydata[mydata$Willingness_4Cat=="2. If Required",60:70]
Never=mydata[mydata$Willingness_4Cat=="1. Never",60:70]
a=describe(ASAP)$mean
b=describe(Eventually)$mean
d=describe(IfRequired)$mean
e=describe(Never)$mean
newdf=round(data.frame(cbind(a,b,d,e)),3)
colnames(newdf)=c("ASAP", "Eventually", "If_Required", "Never")
rownames(newdf)=colnames(mydata)[60:70]
newdf
## ASAP Eventually If_Required Never
## CDC_Freq 2.39 2.17 2.03 2.15
## Local_News_Freq 2.23 2.09 1.91 2.14
## National_News_Freq 2.59 2.28 2.11 2.32
## International_News_Freq 2.15 1.83 1.83 1.97
## Social_Media_Freq 2.62 2.64 2.61 2.45
## Texas_State_Freq 2.50 2.44 2.56 2.11
## College_Friends_Freq 2.22 2.16 2.13 1.94
## Parents_Freq 2.45 2.30 2.44 2.38
## Professors_Freq 2.04 1.86 1.96 1.68
## Adults_Freq 1.94 1.75 1.80 1.80
## Doctor_Freq 2.26 1.99 2.09 2.11
subdf=newdf$ASAP[1:11]
names(subdf)=row.names(newdf)[1:11]
subdf=sort(subdf, decreasing=TRUE)
subdf1=newdf$Eventually[1:11]
names(subdf1)=row.names(newdf)[1:11]
subdf1=sort(subdf1, decreasing=TRUE)
subdf2=newdf$If_Required[1:11]
names(subdf2)=row.names(newdf)[1:11]
subdf2=sort(subdf2, decreasing=TRUE)
subdf3=newdf$Never[1:11]
names(subdf3)=row.names(newdf)[1:11]
subdf3=sort(subdf3, decreasing=TRUE)
ASAP=names(subdf[1:5])
Eventually=names(subdf1[1:5])
If_Required=names(subdf2[1:5])
Never=names(subdf3[1:5])
top5=as.data.frame(cbind(ASAP, Eventually, If_Required, Never))
top5%>%kbl()%>%kable_classic(html_font = "Cambria")
|
ASAP
|
Eventually
|
If_Required
|
Never
|
|
Social_Media_Freq
|
Social_Media_Freq
|
Social_Media_Freq
|
Social_Media_Freq
|
|
National_News_Freq
|
Texas_State_Freq
|
Texas_State_Freq
|
Parents_Freq
|
|
Texas_State_Freq
|
Parents_Freq
|
Parents_Freq
|
National_News_Freq
|
|
Parents_Freq
|
National_News_Freq
|
College_Friends_Freq
|
CDC_Freq
|
|
CDC_Freq
|
CDC_Freq
|
National_News_Freq
|
Local_News_Freq
|
#########################################################################
Models
Multinomial Y1
options(knitr.kable.NA = '')
#################################################################
mydata=read.csv("D:/Emily/data2.csv", stringsAsFactors = TRUE)
mybase=multinom(relevel(Willingness_4Cat, ref="4. ASAP") ~1, data=mydata)
## # weights: 8 (3 variable)
## initial value 851.184738
## final value 748.367556
## converged
mymultiA=multinom(relevel(Willingness_4Cat, ref="4. ASAP") ~
relevel(White, ref="0. White")+
relevel(Hispanic, ref="0. Not Hispanic")+
relevel(Gender_2Cat, ref="Male")+
relevel(Year, ref="4. Senior")+ #colinear with Age
relevel(Health_Maj_Min, ref="0. No"), data=mydata)
## # weights: 36 (24 variable)
## initial value 851.184738
## iter 10 value 730.791310
## iter 20 value 722.101742
## final value 722.059439
## converged
mymultiB=multinom(relevel(Willingness_4Cat, ref="4. ASAP") ~
relevel(White, ref="0. White")+
relevel(Hispanic, ref="0. Not Hispanic")+
relevel(Gender_2Cat, ref="Male")+
relevel(Year, ref="4. Senior")+ #colinear with Age
relevel(Health_Maj_Min, ref="0. No")+
relevel(Income, ref="5. >$100K")+
relevel(Insurance, ref="0. No"), data=mydata)
## # weights: 56 (39 variable)
## initial value 851.184738
## iter 10 value 727.495372
## iter 20 value 717.854053
## iter 30 value 717.288983
## final value 717.287622
## converged
mymultiC=multinom(relevel(Willingness_4Cat, ref="4. ASAP") ~
relevel(White, ref="0. White")+
relevel(Hispanic, ref="0. Not Hispanic")+
relevel(Gender_2Cat, ref="Male")+
relevel(Year, ref="4. Senior")+ #colinear with Age
relevel(Health_Maj_Min, ref="0. No")+
relevel(Income, ref="5. >$100K")+
relevel(Insurance, ref="0. No")+
relevel(Politics_3Cat, ref="1. Democrat"),
data=mydata)
## # weights: 64 (45 variable)
## initial value 851.184738
## iter 10 value 698.399002
## iter 20 value 682.852284
## iter 30 value 681.626896
## iter 40 value 681.565820
## final value 681.565737
## converged
mymultiD=multinom(relevel(Willingness_4Cat, ref="4. ASAP") ~
relevel(White, ref="0. White")+
relevel(Hispanic, ref="0. Not Hispanic")+
relevel(Gender_2Cat, ref="Male")+
relevel(Year, ref="4. Senior")+ #colinear with Age
relevel(Health_Maj_Min, ref="0. No")+
relevel(Income, ref="5. >$100K")+
relevel(Insurance, ref="0. No")+
relevel(Politics_3Cat, ref="1. Democrat")+
relevel(Annual_Flu_Shot, ref="1. Yes")+
Behavior_Reverse+
Knowledge_Reverse+
Risk_Knowledge_Reverse+
Hesitancy, data=mydata)
## # weights: 84 (60 variable)
## initial value 851.184738
## iter 10 value 636.155028
## iter 20 value 609.678109
## iter 30 value 603.717047
## iter 40 value 603.500287
## final value 603.497270
## converged
myresults=function(x){
mys=summary(x)$coefficients/summary(x)$standard.errors
p=round((1 - pnorm(abs(mys), 0, 1)) * 2,3)
coef=t(round(exp(coef(x)),3))
coef=pfunction(t(round((1 - pnorm(abs(mys), 0, 1)) * 2,3)), coef)
return(coef)}
mymat=matrix(rep(NA, 20*3*4), nrow=20)
mymat[1:8, 1:3]=myresults(mymultiA)
mymat[1:13,4:6]=myresults(mymultiB)
mymat[1:15,7:9]=myresults(mymultiC)
mymat[1:20,10:12]=myresults(mymultiD)
mymat=as.data.frame(mymat)
rownames(mymat)=c("Intercept", "Non-White","Hispanic",
"Female","Freshman","Sophomore", "Junior",
"Health Major or Minor, Yes",
"<25K", "25 to 50K", "50 to 75K", "75 to 100K",
"Insured, Yes",
"Other Party or Independent", "Republican","Flu Shot",
"Misbehavior Score", "Ignorance Score","Risk Knowledge",
"Hesitancy Score")
colnames(mymat)=rep(c("Eventually", "If Required", "Never"),4)
mymat%>%kbl(caption="Odds Ratios and p-values")%>%kable_classic(html_font = "Cambria")
Odds Ratios and p-values
|
|
Eventually
|
If Required
|
Never
|
Eventually
|
If Required
|
Never
|
Eventually
|
If Required
|
Never
|
Eventually
|
If Required
|
Never
|
|
Intercept
|
0.317***
|
0.108***
|
1.005
|
0.388+
|
0.119***
|
0.981
|
0.046***
|
0.058***
|
0.749
|
0.001***
|
0.005***
|
0.349*
|
|
Non-White
|
1.086
|
1.114
|
0.664
|
1.051
|
1.132
|
0.688
|
1.912
|
1.428
|
0.731
|
1.812
|
1.26
|
0.666
|
|
Hispanic
|
0.317*
|
1.018
|
1.477
|
0.313*
|
1.004
|
1.472
|
0.217**
|
0.939
|
1.47
|
0.296*
|
1.125
|
1.652
|
|
Female
|
1.368
|
2.616**
|
1.523*
|
1.334
|
2.613**
|
1.542*
|
2.036*
|
3.251***
|
1.691**
|
3.777**
|
4.297***
|
1.879**
|
|
Freshman
|
0.488+
|
1.202
|
0.604+
|
0.511
|
1.226
|
0.597+
|
0.36*
|
1.05
|
0.583+
|
0.192**
|
0.655
|
0.513*
|
|
Sophomore
|
0.379*
|
1.137
|
0.678
|
0.388*
|
1.121
|
0.701
|
0.303**
|
1.062
|
0.685
|
0.227**
|
0.931
|
0.683
|
|
Junior
|
0.824
|
1.026
|
0.8
|
0.814
|
1.016
|
0.796
|
0.866
|
1.081
|
0.818
|
0.636
|
0.918
|
0.795
|
|
Health Major or Minor, Yes
|
1.906*
|
1.87*
|
0.966
|
1.947*
|
1.909*
|
0.966
|
2.009*
|
1.932*
|
0.975
|
3.338**
|
2.647**
|
1.1
|
|
<25K
|
|
|
|
1.347
|
0.943
|
0.948
|
1.992
|
1.044
|
0.949
|
2.225
|
0.949
|
0.932
|
|
25 to 50K
|
|
|
|
0.644
|
0.899
|
0.727
|
0.943
|
1.078
|
0.757
|
1.153
|
0.968
|
0.726
|
|
50 to 75K
|
|
|
|
0.576
|
0.862
|
0.87
|
0.761
|
0.917
|
0.865
|
0.809
|
1.105
|
0.968
|
|
75 to 100K
|
|
|
|
0.948
|
1.173
|
0.663
|
1.241
|
1.352
|
0.679
|
1.35
|
1.46
|
0.688
|
|
Insured, Yes
|
|
|
|
0.87
|
0.903
|
1.22
|
0.891
|
0.873
|
1.211
|
1.367
|
1.112
|
1.328
|
|
Other Party or Independent
|
|
|
|
|
|
|
5.211***
|
1.642
|
1.493+
|
4.945**
|
1.536
|
1.36
|
|
Republican
|
|
|
|
|
|
|
25.119***
|
4.694***
|
1.511
|
6.265**
|
2.022
|
0.964
|
|
Flu Shot
|
|
|
|
|
|
|
|
|
|
2.561*
|
3.39***
|
1.312
|
|
Misbehavior Score
|
|
|
|
|
|
|
|
|
|
5.935
|
4.841
|
8.431*
|
|
Ignorance Score
|
|
|
|
|
|
|
|
|
|
0.955
|
0.576
|
1.141
|
|
Risk Knowledge
|
|
|
|
|
|
|
|
|
|
56.605**
|
3.297
|
0.587
|
|
Hesitancy Score
|
|
|
|
|
|
|
|
|
|
1050.988***
|
179.365***
|
11.942***
|
mytemp=anova(mybase,mymultiA,mymultiB, mymultiC, mymultiD)[,3]
mytemp=1-mytemp/mytemp[1]
names(mytemp)=c("NullModel", "Dem", "DemEcon","DemEconPol", "DemEconPolScores")
mytemp
## NullModel Dem DemEcon DemEconPol
## 0.0000 0.0352 0.0415 0.0893
## DemEconPolScores
## 0.1936
#########################################################################
Multinomial Best Subsets
model.grid <- function(n){
n.list <- rep(list(0:1), n)
expand.grid(n.list)
}
best.subset <- function(y, x.vars, data){
length(x.vars) %>%
model.grid %>%
apply(1, function(x) which(x > 0, arr.ind = TRUE)) %>%
map(function(x) x.vars[x]) %>%
.[2:dim(model.grid(length(x.vars)))[1]] %>%
map(function(x) multinom(paste0(y, " ~ ", paste(x, collapse = "+")), trace=FALSE, data = mydata)) %>%
map(function(x) AICc(x)) %>%
do.call(rbind, .) %>%
cbind(model.grid(length(x.vars))[-1, ], .)
}
y=c('Willingness_4Cat')
xvars=c("White","Hispanic", "Gender_2Cat", "Year", "Health_Maj_Min",
"Income", "Insurance", "Politics_3Cat", "Annual_Flu_Shot", "Behavior_Reverse", "Knowledge_Reverse",
"Risk_Knowledge_Reverse", "Hesitancy")
mysubsets=best.subset(y,xvars,mydata)
colnames(mysubsets)=c(xvars, "AIC")
mysubsets=mysubsets[order(mysubsets$AIC),]
mysubsets[1,]
## White Hispanic Gender_2Cat Year Health_Maj_Min Income Insurance
## 7061 0 0 1 0 1 0 0
## Politics_3Cat Annual_Flu_Shot Behavior_Reverse Knowledge_Reverse
## 7061 1 1 1 0
## Risk_Knowledge_Reverse Hesitancy AIC
## 7061 1 1 1305
Multinomial Final
myresults=function(x){
mys=summary(x)$coefficients/summary(x)$standard.errors
p=round((1 - pnorm(abs(mys), 0, 1)) * 2,3)
coef=round(exp(coef(x)),3)
coef=pfunction(round((1 - pnorm(abs(mys), 0, 1)) * 2,3), coef)
return(coef)}
mymultifin=multinom(relevel(Willingness_4Cat, ref="4. ASAP") ~
relevel(Gender_2Cat, ref="Male")+
relevel(Health_Maj_Min, ref="0. No")+
relevel(Politics_3Cat, ref="1. Democrat")+
relevel(Annual_Flu_Shot, ref="1. Yes")+
Behavior_Reverse+
Risk_Knowledge_Reverse+
Hesitancy, data=mydata)
## # weights: 40 (27 variable)
## initial value 851.184738
## iter 10 value 655.525864
## iter 20 value 624.454870
## iter 30 value 623.963405
## final value 623.962270
## converged
mymat=t(myresults(mymultifin))
rownames(mymat)=c("Intercept","Gender","Health Major or Minor, Yes",
"Other Party or Independent", "Republican","Flu Shot",
"Behavorial Score", "Risk Knowledge", "Hesitancy Score")
colnames(mymat)=c("Eventually", "If Required", "Never")
mymat%>%kbl(caption="Odds Ratios and p-values")%>%kable_classic(html_font = "Cambria")
Odds Ratios and p-values
|
|
Eventually
|
If Required
|
Never
|
|
Intercept
|
0.001***
|
0.008***
|
0.308***
|
|
Gender
|
3.092**
|
3.884***
|
1.815**
|
|
Health Major or Minor, Yes
|
3.265**
|
2.628**
|
1.058
|
|
Other Party or Independent
|
4.26**
|
1.392
|
1.371
|
|
Republican
|
4.683**
|
1.8
|
1.012
|
|
Flu Shot
|
2.817**
|
3.259***
|
1.334
|
|
Behavorial Score
|
12.544*
|
4.547
|
10.272**
|
|
Risk Knowledge
|
13.886*
|
2.991
|
0.57
|
|
Hesitancy Score
|
843.46***
|
145.711***
|
8.213***
|
temp=1-summary(mymultifin)$deviance/summary(mybase)$deviance
names(temp)=c("McFadden Psuedo-R2")
temp
## McFadden Psuedo-R2
## 0.166
#mypred=as.numeric(predict(mymultifin, mydata))
#table(as.numeric(mydata$Willingness_4Cat),mypred)
Logistic Regression, FULL
mylmbase=glm(relevel(Willingness_2Cat, ref="Willing") ~
1, data=mydata, family=binomial)
mylmA=glm(relevel(Willingness_2Cat, ref="Willing") ~
relevel(White, ref="0. White")+
relevel(Hispanic, ref="0. Not Hispanic")+
relevel(Gender_2Cat, ref="Male")+
relevel(Year, ref="4. Senior")+ #colinear with Age
relevel(Health_Maj_Min, ref="0. No")
, data=mydata, family=binomial)
mylmB=glm(relevel(Willingness_2Cat, ref="Willing") ~
relevel(White, ref="0. White")+
relevel(Hispanic, ref="0. Not Hispanic")+
relevel(Gender_2Cat, ref="Male")+
relevel(Year, ref="4. Senior")+ #colinear with Age
relevel(Health_Maj_Min, ref="0. No")+
relevel(Income, ref="5. >$100K")+
relevel(Insurance, ref="0. No"), data=mydata, family=binomial)
mylmC=glm(relevel(Willingness_2Cat, ref="Willing") ~
relevel(White, ref="0. White")+
relevel(Hispanic, ref="0. Not Hispanic")+
relevel(Gender_2Cat, ref="Male")+
relevel(Year, ref="4. Senior")+ #colinear with Age
relevel(Health_Maj_Min, ref="0. No")+
relevel(Income, ref="5. >$100K")+
relevel(Insurance, ref="0. No")+
relevel(Politics_3Cat, ref="1. Democrat"),
data=mydata, family=binomial)
mylmD=glm(relevel(Willingness_2Cat, ref="Willing") ~
relevel(White, ref="0. White")+
relevel(Hispanic, ref="0. Not Hispanic")+
relevel(Gender_2Cat, ref="Male")+
relevel(Year, ref="4. Senior")+ #colinear with Age
relevel(Health_Maj_Min, ref="0. No")+
relevel(Income, ref="5. >$100K")+
relevel(Insurance, ref="0. No")+
relevel(Politics_3Cat, ref="1. Democrat")+
relevel(Annual_Flu_Shot, ref="1. Yes")+
Behavior_Reverse+
Knowledge_Reverse+
Risk_Knowledge_Reverse+
Hesitancy, data=mydata, family=binomial)
#Risk Score makes the model unstable..Better performance / stability without it.
myresults=function(x){
result=noquote(pfunction(round(
summary(x)$coefficients[,4],3),round(exp(summary(x)$coefficients[,1]),3)))
return(result)}
mymat=cbind(c(myresults(mylmA), rep("",12)),
c(myresults(mylmB), rep("", 7)),
c(myresults(mylmC), rep("",5)),
myresults(mylmD))
rownames(mymat)=c("Intercept", "Non-White","Hispanic",
"Female","Freshman","Sophomore", "Junior",
"Health Major or Minor, Yes",
"<25K", "25 to 50K", "50 to 75K", "75 to 100K",
"Insured, Yes",
"Other Party or Independent", "Republican","Flu Shot",
"Behavorial Score", "Knowledge Score", "Risk Knowledge", "Hesitancy Score")
colnames(mymat)=rep(c("Odds"),4)
mymat%>%kbl(caption="Odds Ratios and p-values")%>%kable_classic(html_font = "Cambria")
Odds Ratios and p-values
|
|
Odds
|
Odds
|
Odds
|
Odds
|
|
Intercept
|
0.19***
|
0.22***
|
0.072***
|
0.005***
|
|
Non-White
|
1.32
|
1.294
|
1.835*
|
1.748
|
|
Hispanic
|
0.524*
|
0.518*
|
0.458*
|
0.558
|
|
Female
|
1.555*
|
1.529*
|
1.997**
|
2.808***
|
|
Freshman
|
1.008
|
1.052
|
0.866
|
0.612
|
|
Sophomore
|
0.83
|
0.822
|
0.769
|
0.729
|
|
Junior
|
1.016
|
1.013
|
1.077
|
0.938
|
|
Health Major or Minor, Yes
|
1.9**
|
1.932**
|
1.955**
|
2.665***
|
|
<25K
|
|
1.17
|
1.412
|
1.337
|
|
25 to 50K
|
|
0.91
|
1.18
|
1.234
|
|
50 to 75K
|
|
0.783
|
0.906
|
1.031
|
|
75 to 100K
|
|
1.293
|
1.588
|
1.746
|
|
Insured, Yes
|
|
0.808
|
0.797
|
1.003
|
|
Other Party or Independent
|
|
|
2.021**
|
1.787*
|
|
Republican
|
|
|
7.401***
|
2.832**
|
|
Flu Shot
|
|
|
|
2.658***
|
|
Behavorial Score
|
|
|
|
1.755
|
|
Knowledge Score
|
|
|
|
0.671
|
|
Risk Knowledge
|
|
|
|
13.294**
|
|
Hesitancy Score
|
|
|
|
80.386***
|
#########################################################################
nagelkerke=function(mod){ 1 - mod$deviance / mod$null.deviance}
A=nagelkerke(mylmA)
B=nagelkerke(mylmB)
C=nagelkerke(mylmC)
D=nagelkerke(mylmD)
temp=round(rbind(A,B,C,D), 3)
rownames(temp)=c("Demographics Only", "Plus Income/Insurance", "Plus Politics", "Full")
myprint(temp,"Nagelkerke Psuedo R^2")
|
|
Nagelkerke Psuedo R^2
|
|
Demographics Only
|
0.034
|
|
Plus Income/Insurance
|
0.040
|
|
Plus Politics
|
0.123
|
|
Full
|
0.300
|
mytemp=anova(mybase,mymultiA,mymultiB, mymultiC, mymultiD)[,3]
mytemp=1-mytemp/mytemp[1]
names(mytemp)=c("NullModel", "Dem", "DemEcon","DemEconPol", "DemEconPolScores")
mytemp
## NullModel Dem DemEcon DemEconPol
## 0.0000 0.0352 0.0415 0.0893
## DemEconPolScores
## 0.1936
LR Best Subsets
best.subset2 <- function(y, x.vars, data){
length(x.vars) %>%
model.grid %>%
apply(1, function(x) which(x > 0, arr.ind = TRUE)) %>%
map(function(x) x.vars[x]) %>%
.[2:dim(model.grid(length(x.vars)))[1]] %>%
map(function(x)
glm(paste0(y, " ~ ", paste(x, collapse = "+")), trace=FALSE,
data=mydata, family=binomial)) %>%
map(function(x) AICc(x)) %>%
do.call(rbind, .) %>%
cbind(model.grid(length(x.vars))[-1, ], .)
}
y2=c('Willingness_2Cat')
xvars2=c("White","Hispanic", "Gender_2Cat", "Year", "Health_Maj_Min",
"Income", "Insurance", "Politics_3Cat", "Annual_Flu_Shot", "Behavior_Reverse", "Knowledge_Reverse",
"Risk_Score", "Risk_Knowledge_Reverse", "Hesitancy")
#Risk Score makes model unstable...Much lower AIC without it.
mysubsets2=best.subset2(y2,xvars2,mydata)
colnames(mysubsets2)=c(xvars, "AIC")
mysubsets2=mysubsets2[order(mysubsets2$AIC),]
mysubsets2[1,]
## White Hispanic Gender_2Cat Year Health_Maj_Min Income Insurance Politics_3Cat
## 2 1 0 0 0 0 0 0 0
## Annual_Flu_Shot Behavior_Reverse Knowledge_Reverse Risk_Knowledge_Reverse
## 2 0 0 0 0
## Hesitancy AIC NA
## 2 0 0 651
LR, Final Model
#########################################################################
mylm1=glm(relevel(Willingness_2Cat, ref="Willing") ~
relevel(Gender_2Cat, ref="Male")+
relevel(Health_Maj_Min, ref="0. No")+
relevel(Politics_3Cat, ref="1. Democrat")+
relevel(Annual_Flu_Shot, ref="1. Yes")+
Risk_Knowledge_Reverse+Hesitancy, data=mydata, family=binomial)
mys=summary(mylm1)
z =mys$coefficients[,3]
p <- round((1 - pnorm(abs(z), 0, 1)) * 2,3)
Odds_Ratio=round(exp(coef(mylm1)),3)
result=rbind(Odds_Ratio,p)
result=t(result)
rownames(result)=c("Intercept", "Female","Health Major or Minor, Yes","Other Party or Independent", "Republican", "Annual Flu Shot",
"Risk Knowledge Score","Hesitancy Score")
result%>%kbl(caption="Odds and p-values, Referent category: Willingness=1, Model for Unwillingness")%>%kable_classic(html_font = "Cambria")
Odds and p-values, Referent category: Willingness=1, Model for Unwillingness
|
|
Odds_Ratio
|
p
|
|
Intercept
|
0.007
|
0.000
|
|
Female
|
2.480
|
0.000
|
|
Health Major or Minor, Yes
|
2.639
|
0.000
|
|
Other Party or Independent
|
1.691
|
0.063
|
|
Republican
|
2.523
|
0.005
|
|
Annual Flu Shot
|
2.636
|
0.000
|
|
Risk Knowledge Score
|
11.558
|
0.001
|
|
Hesitancy Score
|
87.385
|
0.000
|
#summary(mylm1)
a=1-mylm1$deviance/mylm1$null.deviance
names(a)=("McFadden R^2")
myprint(a, c(""))
#########################################################################
Classification Accuracy
#########################################################################
prob=as.factor(round(1-predict(mylm1, type="response"),0))
levels(prob)=c("0"="Predict Unwilling", "1"="Predict Willing")
myt=table(prob, mydata$Willingness_2Cat)
myprint(myt,c("Unwilling", "Willing"))
|
|
Unwilling
|
Willing
|
|
Predict Unwilling
|
58
|
26
|
|
Predict Willing
|
77
|
453
|
fourfoldplot(myt, color = c("#CC6666", "#99CC99"),
conf.level = 0, margin = 1, main = "Confusion Matrix")

#########################################################################
Assumptions of LR
a=vif(mylmD)
a%>%kbl()%>%kable_classic()
|
|
GVIF
|
Df
|
GVIF^(1/(2*Df))
|
|
relevel(White, ref = “0. White”)
|
2.20
|
1
|
1.48
|
|
relevel(Hispanic, ref = “0. Not Hispanic”)
|
2.04
|
1
|
1.43
|
|
relevel(Gender_2Cat, ref = “Male”)
|
1.21
|
1
|
1.10
|
|
relevel(Year, ref = “4. Senior”)
|
1.19
|
3
|
1.03
|
|
relevel(Health_Maj_Min, ref = “0. No”)
|
1.11
|
1
|
1.05
|
|
relevel(Income, ref = “5. >$100K”)
|
1.28
|
4
|
1.03
|
|
relevel(Insurance, ref = “0. No”)
|
1.14
|
1
|
1.07
|
|
relevel(Politics_3Cat, ref = “1. Democrat”)
|
1.40
|
2
|
1.09
|
|
relevel(Annual_Flu_Shot, ref = “1. Yes”)
|
1.12
|
1
|
1.06
|
|
Behavior_Reverse
|
1.64
|
1
|
1.28
|
|
Knowledge_Reverse
|
1.26
|
1
|
1.12
|
|
Risk_Knowledge_Reverse
|
1.83
|
1
|
1.35
|
|
Hesitancy
|
1.14
|
1
|
1.07
|
#No issue with colinearity
#########################################################################
Accuracy Metrics
acc=sum(diag(myt))/sum(myt)
ppv=myt[2,2]/sum(myt[2,])
npv=myt[1,1]/sum(myt[1,])
sens=myt[2,2]/sum(myt[,2])
spec=myt[1,1]/sum(myt[,1])
mynames=c("Accuracy", "PPV","NPV", "Sensitivity", "Specificity")
values=c(acc, ppv, npv, sens, spec)
names(values)=mynames
myprint(values, "Statistics")
|
|
Statistics
|
|
Accuracy
|
0.832
|
|
PPV
|
0.855
|
|
NPV
|
0.690
|
|
Sensitivity
|
0.946
|
|
Specificity
|
0.430
|
Citations
cite('Amelia')
##
## To cite Amelia in publications use:
##
## James Honaker, Gary King, Matthew Blackwell (2011). Amelia II: A
## Program for Missing Data. Journal of Statistical Software, 45(7),
## 1-47. URL https://www.jstatsoft.org/v45/i07/.
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {{Amelia II}: A Program for Missing Data},
## author = {James Honaker and Gary King and Matthew Blackwell},
## journal = {Journal of Statistical Software},
## year = {2011},
## volume = {45},
## number = {7},
## pages = {1--47},
## url = {https://www.jstatsoft.org/v45/i07/},
## }
cite('bestglm')
##
## To cite package 'bestglm' in publications use:
##
## A.I. McLeod, Changjiang Xu and Yuanhao Lai (2020). bestglm: Best
## Subset GLM and Regression Utilities. R package version 0.37.3.
## https://CRAN.R-project.org/package=bestglm
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {bestglm: Best Subset GLM and Regression Utilities},
## author = {A.I. McLeod and Changjiang Xu and Yuanhao Lai},
## year = {2020},
## note = {R package version 0.37.3},
## url = {https://CRAN.R-project.org/package=bestglm},
## }
##
## ATTENTION: This citation information has been auto-generated from the
## package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
cite('car')
##
## To cite the car package in publications use:
##
## John Fox and Sanford Weisberg (2019). An {R} Companion to Applied
## Regression, Third Edition. Thousand Oaks CA: Sage. URL:
## https://socialsciences.mcmaster.ca/jfox/Books/Companion/
##
## A BibTeX entry for LaTeX users is
##
## @Book{,
## title = {An {R} Companion to Applied Regression},
## edition = {Third},
## author = {John Fox and Sanford Weisberg},
## year = {2019},
## publisher = {Sage},
## address = {Thousand Oaks {CA}},
## url = {https://socialsciences.mcmaster.ca/jfox/Books/Companion/},
## }
cite('carData')
##
## To cite package 'carData' in publications use:
##
## John Fox, Sanford Weisberg and Brad Price (2020). carData: Companion
## to Applied Regression Data Sets. R package version 3.0-4.
## https://CRAN.R-project.org/package=carData
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {carData: Companion to Applied Regression Data Sets},
## author = {John Fox and Sanford Weisberg and Brad Price},
## year = {2020},
## note = {R package version 3.0-4},
## url = {https://CRAN.R-project.org/package=carData},
## }
cite('corrplot')
##
## To cite corrplot in publications use:
##
## Taiyun Wei and Viliam Simko (2021). R package 'corrplot':
## Visualization of a Correlation Matrix (Version 0.90). Available from
## https://github.com/taiyun/corrplot
##
## A BibTeX entry for LaTeX users is
##
## @Manual{corrplot2021,
## title = {R package 'corrplot': Visualization of a Correlation Matrix},
## author = {Taiyun Wei and Viliam Simko},
## year = {2021},
## note = {(Version 0.90)},
## url = {https://github.com/taiyun/corrplot},
## }
cite('dplyr')
##
## To cite package 'dplyr' in publications use:
##
## Hadley Wickham, Romain François, Lionel Henry and Kirill Müller
## (2021). dplyr: A Grammar of Data Manipulation. R package version
## 1.0.7. https://CRAN.R-project.org/package=dplyr
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {dplyr: A Grammar of Data Manipulation},
## author = {Hadley Wickham and Romain François and Lionel Henry and Kirill Müller},
## year = {2021},
## note = {R package version 1.0.7},
## url = {https://CRAN.R-project.org/package=dplyr},
## }
cite('ggcorrplot')
##
## To cite package 'ggcorrplot' in publications use:
##
## Alboukadel Kassambara (2019). ggcorrplot: Visualization of a
## Correlation Matrix using 'ggplot2'. R package version 0.1.3.
## https://CRAN.R-project.org/package=ggcorrplot
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {ggcorrplot: Visualization of a Correlation Matrix using 'ggplot2'},
## author = {Alboukadel Kassambara},
## year = {2019},
## note = {R package version 0.1.3},
## url = {https://CRAN.R-project.org/package=ggcorrplot},
## }
cite('ggpubr')
##
## To cite package 'ggpubr' in publications use:
##
## Alboukadel Kassambara (2020). ggpubr: 'ggplot2' Based Publication
## Ready Plots. R package version 0.4.0.
## https://CRAN.R-project.org/package=ggpubr
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {ggpubr: 'ggplot2' Based Publication Ready Plots},
## author = {Alboukadel Kassambara},
## year = {2020},
## note = {R package version 0.4.0},
## url = {https://CRAN.R-project.org/package=ggpubr},
## }
cite('gridExtra')
##
## To cite package 'gridExtra' in publications use:
##
## Baptiste Auguie (2017). gridExtra: Miscellaneous Functions for "Grid"
## Graphics. R package version 2.3.
## https://CRAN.R-project.org/package=gridExtra
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {gridExtra: Miscellaneous Functions for "Grid" Graphics},
## author = {Baptiste Auguie},
## year = {2017},
## note = {R package version 2.3},
## url = {https://CRAN.R-project.org/package=gridExtra},
## }
cite('gtable')
##
## To cite package 'gtable' in publications use:
##
## Hadley Wickham and Thomas Lin Pedersen (2019). gtable: Arrange
## 'Grobs' in Tables. R package version 0.3.0.
## https://CRAN.R-project.org/package=gtable
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {gtable: Arrange 'Grobs' in Tables},
## author = {Hadley Wickham and Thomas Lin Pedersen},
## year = {2019},
## note = {R package version 0.3.0},
## url = {https://CRAN.R-project.org/package=gtable},
## }
cite('heplots')
##
## To cite package 'heplots' in publications use:
##
## John Fox and Michael Friendly and and Georges Monette (2021).
## heplots: Visualizing Tests in Multivariate Linear Models. R package
## version 1.3-8. URL https://CRAN.R-project.org/package=heplots
##
## To refer to the theory on which this package is based, also cite:
##
## Friendly, M. (2007). HE plots for Multivariate General Linear
## Models. Journal of Computational and Graphical Statistics, 2007, 16,
## 421-444
##
## For use with repeated measures designs also cite:
##
## Michael Friendly (2010). HE Plots for Repeated Measures Designs.
## Journal of Statistical Software, 37(4), 1-40. URL
## https://www.jstatsoft.org/v37/i04/.
##
## BibTeX entries for LaTeX users: use 'toBibtex(citation("heplots"))'
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
cite('kableExtra')
##
## To cite package 'kableExtra' in publications use:
##
## Hao Zhu (2021). kableExtra: Construct Complex Table with 'kable' and
## Pipe Syntax. R package version 1.3.4.
## https://CRAN.R-project.org/package=kableExtra
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {kableExtra: Construct Complex Table with 'kable' and Pipe Syntax},
## author = {Hao Zhu},
## year = {2021},
## note = {R package version 1.3.4},
## url = {https://CRAN.R-project.org/package=kableExtra},
## }
cite('leaps')
##
## To cite package 'leaps' in publications use:
##
## Thomas Lumley based on Fortran code by Alan Miller (2020). leaps:
## Regression Subset Selection. R package version 3.1.
## https://CRAN.R-project.org/package=leaps
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {leaps: Regression Subset Selection},
## author = {Thomas Lumley based on Fortran code by Alan Miller},
## year = {2020},
## note = {R package version 3.1},
## url = {https://CRAN.R-project.org/package=leaps},
## }
##
## ATTENTION: This citation information has been auto-generated from the
## package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
cite('magrittr')
##
## To cite package 'magrittr' in publications use:
##
## Stefan Milton Bache and Hadley Wickham (2020). magrittr: A
## Forward-Pipe Operator for R. R package version 2.0.1.
## https://CRAN.R-project.org/package=magrittr
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {magrittr: A Forward-Pipe Operator for R},
## author = {Stefan Milton Bache and Hadley Wickham},
## year = {2020},
## note = {R package version 2.0.1},
## url = {https://CRAN.R-project.org/package=magrittr},
## }
cite('MANOVA.RM')
##
## To cite package 'MANOVA.RM' in publications use:
##
## Sarah Friedrich, Frank Konietschke and Markus Pauly (2021).
## MANOVA.RM: Resampling-Based Analysis of Multivariate Data and
## Repeated Measures Designs. R package version 0.5.1.
## https://CRAN.R-project.org/package=MANOVA.RM
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {MANOVA.RM: Resampling-Based Analysis of Multivariate Data and Repeated
## Measures Designs},
## author = {Sarah Friedrich and Frank Konietschke and Markus Pauly},
## year = {2021},
## note = {R package version 0.5.1},
## url = {https://CRAN.R-project.org/package=MANOVA.RM},
## }
##
## ATTENTION: This citation information has been auto-generated from the
## package DESCRIPTION file and may need manual editing, see
## 'help("citation")'.
cite('MASS')
##
## To cite the MASS package in publications use:
##
## Venables, W. N. & Ripley, B. D. (2002) Modern Applied Statistics with
## S. Fourth Edition. Springer, New York. ISBN 0-387-95457-0
##
## A BibTeX entry for LaTeX users is
##
## @Book{,
## title = {Modern Applied Statistics with S},
## author = {W. N. Venables and B. D. Ripley},
## publisher = {Springer},
## edition = {Fourth},
## address = {New York},
## year = {2002},
## note = {ISBN 0-387-95457-0},
## url = {http://www.stats.ox.ac.uk/pub/MASS4/},
## }
cite('MVN')
##
## To cite MVN in publications use:
##
## Korkmaz S, Goksuluk D, Zararsiz G. MVN: An R Package for Assessing
## Multivariate Normality. The R Journal. 2014 6(2):151-162.
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {MVN: An R Package for Assessing Multivariate Normality.},
## author = {Selcuk Korkmaz and Dincer Goksuluk and Gokmen Zararsiz},
## journal = {The R Journal},
## year = {2014},
## volume = {6},
## number = {2},
## pages = {151--162},
## url = {https://journal.r-project.org/archive/2014-2/korkmaz-goksuluk-zararsiz.pdf},
## }
cite('mvtnorm')
##
## To cite package mvtnorm in publications use
##
## Alan Genz, Frank Bretz, Tetsuhisa Miwa, Xuefei Mi, Friedrich Leisch,
## Fabian Scheipl, Torsten Hothorn (2021). mvtnorm: Multivariate Normal
## and t Distributions. R package version 1.1-2. URL
## http://CRAN.R-project.org/package=mvtnorm
##
## Alan Genz, Frank Bretz (2009), Computation of Multivariate Normal and
## t Probabilities. Lecture Notes in Statistics, Vol. 195.,
## Springer-Verlag, Heidelberg. ISBN 978-3-642-01688-2
##
## To see these entries in BibTeX format, use 'print(<citation>,
## bibtex=TRUE)', 'toBibtex(.)', or set
## 'options(citation.bibtex.max=999)'.
cite('nnet')
##
## To cite the nnet package in publications use:
##
## Venables, W. N. & Ripley, B. D. (2002) Modern Applied Statistics with
## S. Fourth Edition. Springer, New York. ISBN 0-387-95457-0
##
## A BibTeX entry for LaTeX users is
##
## @Book{,
## title = {Modern Applied Statistics with S},
## author = {W. N. Venables and B. D. Ripley},
## publisher = {Springer},
## edition = {Fourth},
## address = {New York},
## year = {2002},
## note = {ISBN 0-387-95457-0},
## url = {https://www.stats.ox.ac.uk/pub/MASS4/},
## }
cite('psych') #to describe
##
## To cite the psych package in publications use:
##
## Revelle, W. (2021) psych: Procedures for Personality and
## Psychological Research, Northwestern University, Evanston, Illinois,
## USA, https://CRAN.R-project.org/package=psych Version = 2.1.6,.
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {psych: Procedures for Psychological, Psychometric, and Personality Research},
## author = {William Revelle},
## organization = { Northwestern University},
## address = { Evanston, Illinois},
## year = {2021},
## note = {R package version 2.1.6},
## url = {https://CRAN.R-project.org/package=psych},
## }
cite('purrr')
##
## To cite package 'purrr' in publications use:
##
## Lionel Henry and Hadley Wickham (2020). purrr: Functional Programming
## Tools. R package version 0.3.4.
## https://CRAN.R-project.org/package=purrr
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {purrr: Functional Programming Tools},
## author = {Lionel Henry and Hadley Wickham},
## year = {2020},
## note = {R package version 0.3.4},
## url = {https://CRAN.R-project.org/package=purrr},
## }
cite('ggplot2')
##
## To cite ggplot2 in publications, please use:
##
## H. Wickham. ggplot2: Elegant Graphics for Data Analysis.
## Springer-Verlag New York, 2016.
##
## A BibTeX entry for LaTeX users is
##
## @Book{,
## author = {Hadley Wickham},
## title = {ggplot2: Elegant Graphics for Data Analysis},
## publisher = {Springer-Verlag New York},
## year = {2016},
## isbn = {978-3-319-24277-4},
## url = {https://ggplot2.tidyverse.org},
## }
cite('ggcorrplot')
##
## To cite package 'ggcorrplot' in publications use:
##
## Alboukadel Kassambara (2019). ggcorrplot: Visualization of a
## Correlation Matrix using 'ggplot2'. R package version 0.1.3.
## https://CRAN.R-project.org/package=ggcorrplot
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {ggcorrplot: Visualization of a Correlation Matrix using 'ggplot2'},
## author = {Alboukadel Kassambara},
## year = {2019},
## note = {R package version 0.1.3},
## url = {https://CRAN.R-project.org/package=ggcorrplot},
## }
cite('qcc')
##
## To cite qcc in publications use:
##
## Scrucca, L. (2004). qcc: an R package for quality control charting
## and statistical process control. R News 4/1, 11-17.
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {qcc: an R package for quality control charting and statistical process control},
## author = {Luca Scrucca},
## journal = {R News},
## year = {2004},
## pages = {11--17},
## volume = {4/1},
## url = {https://cran.r-project.org/doc/Rnews/},
## }
cite('rcompanion')
##
## To cite package 'rcompanion' in publications use:
##
## Salvatore Mangiafico (2021). rcompanion: Functions to Support
## Extension Education Program Evaluation. R package version 2.4.1.
## https://CRAN.R-project.org/package=rcompanion
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {rcompanion: Functions to Support Extension Education Program Evaluation},
## author = {Salvatore Mangiafico},
## year = {2021},
## note = {R package version 2.4.1},
## url = {https://CRAN.R-project.org/package=rcompanion},
## }
cite('reticulate') #to use Python in R as well
##
## To cite package 'reticulate' in publications use:
##
## Kevin Ushey, JJ Allaire and Yuan Tang (2021). reticulate: Interface
## to 'Python'. R package version 1.21.
## https://CRAN.R-project.org/package=reticulate
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {reticulate: Interface to 'Python'},
## author = {Kevin Ushey and JJ Allaire and Yuan Tang},
## year = {2021},
## note = {R package version 1.21},
## url = {https://CRAN.R-project.org/package=reticulate},
## }
cite('reshape2')
##
## To cite reshape2 in publications use:
##
## Hadley Wickham (2007). Reshaping Data with the reshape Package.
## Journal of Statistical Software, 21(12), 1-20. URL
## http://www.jstatsoft.org/v21/i12/.
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {Reshaping Data with the {reshape} Package},
## author = {Hadley Wickham},
## journal = {Journal of Statistical Software},
## year = {2007},
## volume = {21},
## number = {12},
## pages = {1--20},
## url = {http://www.jstatsoft.org/v21/i12/},
## }
cite('ResourceSelection')
##
## To cite package 'ResourceSelection' in publications use:
##
## Subhash R. Lele, Jonah L. Keim and Peter Solymos (2019).
## ResourceSelection: Resource Selection (Probability) Functions for
## Use-Availability Data. R package version 0.3-5.
## https://CRAN.R-project.org/package=ResourceSelection
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {ResourceSelection: Resource Selection (Probability) Functions for Use-Availability
## Data},
## author = {Subhash R. Lele and Jonah L. Keim and Peter Solymos},
## year = {2019},
## note = {R package version 0.3-5},
## url = {https://CRAN.R-project.org/package=ResourceSelection},
## }
cite('rstatix')
##
## To cite package 'rstatix' in publications use:
##
## Alboukadel Kassambara (2021). rstatix: Pipe-Friendly Framework for
## Basic Statistical Tests. R package version 0.7.0.
## https://CRAN.R-project.org/package=rstatix
##
## A BibTeX entry for LaTeX users is
##
## @Manual{,
## title = {rstatix: Pipe-Friendly Framework for Basic Statistical Tests},
## author = {Alboukadel Kassambara},
## year = {2021},
## note = {R package version 0.7.0},
## url = {https://CRAN.R-project.org/package=rstatix},
## }
cite('tidyverse')
##
## Wickham et al., (2019). Welcome to the tidyverse. Journal of Open
## Source Software, 4(43), 1686, https://doi.org/10.21105/joss.01686
##
## A BibTeX entry for LaTeX users is
##
## @Article{,
## title = {Welcome to the {tidyverse}},
## author = {Hadley Wickham and Mara Averick and Jennifer Bryan and Winston Chang and Lucy D'Agostino McGowan and Romain François and Garrett Grolemund and Alex Hayes and Lionel Henry and Jim Hester and Max Kuhn and Thomas Lin Pedersen and Evan Miller and Stephan Milton Bache and Kirill Müller and Jeroen Ooms and David Robinson and Dana Paige Seidel and Vitalie Spinu and Kohske Takahashi and Davis Vaughan and Claus Wilke and Kara Woo and Hiroaki Yutani},
## year = {2019},
## journal = {Journal of Open Source Software},
## volume = {4},
## number = {43},
## pages = {1686},
## doi = {10.21105/joss.01686},
## }