### DESCRIPTIVE STATISTICS ###

#how many motorcyclists vs car drivers?
janitor::tabyl(mydata$vehicle.f)
##  mydata$vehicle.f   n    percent valid_percent
##     motorcyclists 194 0.50129199     0.5159574
##       car drivers 182 0.47028424     0.4840426
##              <NA>  11 0.02842377            NA
descriptives(mydata, vars = vars(mhc, ss), missing = TRUE)
## 
##  DESCRIPTIVES
## 
##  Descriptives                                    
##  ─────────────────────────────────────────────── 
##                          mhc          ss         
##  ─────────────────────────────────────────────── 
##    N                           319         387   
##    Missing                      68           0   
##    Mean                   4.033260    15.50904   
##    Median                 4.071429          18   
##    Standard deviation    0.9326433    9.841058   
##    Minimum                1.142857           0   
##    Maximum                6.000000          35   
##  ───────────────────────────────────────────────
descriptives(mydata, vars = vars(openness, extraversion, agreeableness, conscientiousness, neuroticism), missing = TRUE)
## 
##  DESCRIPTIVES
## 
##  Descriptives                                                                                             
##  ──────────────────────────────────────────────────────────────────────────────────────────────────────── 
##                          openness     extraversion    agreeableness    conscientiousness    neuroticism   
##  ──────────────────────────────────────────────────────────────────────────────────────────────────────── 
##    N                           329             329              329                  329            328   
##    Missing                      58              58               58                   58             59   
##    Mean                   4.451476        4.420810         4.614573             4.601860       3.768427   
##    Median                 4.375000        4.375000         4.555556             4.444444       3.888889   
##    Standard deviation    0.8442381       0.7682969        0.9370719            0.6621849      0.8382753   
##    Minimum                2.000000        2.125000         2.222222             2.888889       1.125000   
##    Maximum                6.888889        6.500000         7.000000             6.625000       5.875000   
##  ────────────────────────────────────────────────────────────────────────────────────────────────────────
descriptives(mydata, vars = vars(hours.ride, years.riding, near.crashes), missing = TRUE)
## 
##  DESCRIPTIVES
## 
##  Descriptives                                                         
##  ──────────────────────────────────────────────────────────────────── 
##                          hours.ride    years.riding    near.crashes   
##  ──────────────────────────────────────────────────────────────────── 
##    N                            157             162             157   
##    Missing                      230             225             230   
##    Mean                    16.12739        14.64198        8.159236   
##    Median                        12        9.000000               3   
##    Standard deviation      12.35673        14.22561        15.98738   
##    Minimum                        1               0               0   
##    Maximum                       70              56             100   
##  ────────────────────────────────────────────────────────────────────
janitor::tabyl(mydata$ethnicity.f)
##  mydata$ethnicity.f   n    percent valid_percent
##               white 343 0.88630491    0.93715847
##               asian  18 0.04651163    0.04918033
##               black   5 0.01291990    0.01366120
##          indigenous   0 0.00000000    0.00000000
##                <NA>  21 0.05426357            NA
janitor::tabyl(mydata$income.f)
##  mydata$income.f   n   percent valid_percent
##  under $20k/year  41 0.1059432     0.2113402
##    $20-$40k/year  39 0.1007752     0.2010309
##    $40-$60k/year  57 0.1472868     0.2938144
##    $60-$80k/year  57 0.1472868     0.2938144
##   $80-$100k/year   0 0.0000000     0.0000000
##  over $100k/year   0 0.0000000     0.0000000
##             <NA> 193 0.4987080            NA
janitor::tabyl(mydata$gender.f)
##  mydata$gender.f   n     percent valid_percent
##            woman 193 0.498708010    0.51193634
##              men 182 0.470284238    0.48275862
##       non-binary   2 0.005167959    0.00530504
##             <NA>  10 0.025839793            NA
janitor::tabyl(mydata$gear.f)
##       mydata$gear.f   n   percent valid_percent
##     wears full gear 133 0.3436693     0.8209877
##  wears partial goal  29 0.0749354     0.1790123
##                <NA> 225 0.5813953            NA
janitor::tabyl(mydata$checks.f)
##  mydata$checks.f   n     percent valid_percent
##           always 120 0.310077519    0.74534161
##          usually  34 0.087855297    0.21118012
##        sometimes   4 0.010335917    0.02484472
##           rarely   1 0.002583979    0.00621118
##            never   2 0.005167959    0.01242236
##             <NA> 226 0.583979328            NA
janitor::tabyl(mydata$passenger.f)
##  mydata$passenger.f   n     percent valid_percent
##              always   1 0.002583979    0.00621118
##             usually  11 0.028423773    0.06832298
##           sometimes  32 0.082687339    0.19875776
##              rarely  50 0.129198966    0.31055901
##               never  67 0.173126615    0.41614907
##                <NA> 226 0.583979328            NA
janitor::tabyl(mydata$speed.f)
##  mydata$speed.f   n    percent valid_percent
##         highway  22 0.05684755     0.1358025
##       backroads 140 0.36175711     0.8641975
##            <NA> 225 0.58139535            NA
janitor::tabyl(mydata$style.f)
##  mydata$style.f   n   percent valid_percent
##           sport  93 0.2403101     0.5776398
##         cruiser  68 0.1757106     0.4223602
##            <NA> 226 0.5839793            NA
janitor::tabyl(mydata$accident.f)
##  mydata$accident.f   n   percent valid_percent
##   been in accident  50 0.1291990      0.308642
##       no accidents 112 0.2894057      0.691358
##               <NA> 225 0.5813953            NA
janitor::tabyl(mydata$safety.f)
##  mydata$safety.f   n    percent valid_percent
##    safety course 136 0.35142119     0.8395062
##        no course  26 0.06718346     0.1604938
##             <NA> 225 0.58139535            NA
janitor::tabyl(mydata$limit.f)
##  mydata$limit.f   n     percent valid_percent
##          always  18 0.046511628    0.11042945
##         usually  44 0.113695090    0.26993865
##       sometimes  68 0.175710594    0.41717791
##          rarely  30 0.077519380    0.18404908
##           never   3 0.007751938    0.01840491
##            <NA> 224 0.578811370            NA
#looking at key variables by group
tapply(mydata$conscientiousness, mydata$vehicle.f, summary)
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.889   4.667   5.111   5.046   5.444   6.625      30 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   3.000   4.000   4.111   4.161   4.333   5.000      17
tapply(mydata$openness, mydata$vehicle.f, summary)
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   3.000   4.667   5.000   5.049   5.444   6.889      30 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.000   3.500   3.875   3.858   4.125   5.500      17
tapply(mydata$agreeableness, mydata$vehicle.f, summary)
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   3.000   4.778   5.222   5.237   5.778   7.000      30 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.222   3.667   4.000   3.996   4.333   6.667      17
tapply(mydata$extraversion, mydata$vehicle.f, summary)
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.125   3.844   4.375   4.425   5.031   6.500      30 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.125   4.000   4.375   4.417   4.750   5.750      17
tapply(mydata$neuroticism, mydata$vehicle.f, summary)
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.125   2.625   3.625   3.474   4.125   5.875      30 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.125   3.778   4.000   4.063   4.333   5.222      18
tapply(mydata$ss.disinhib, mydata$vehicle.f, summary)
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    4.00    3.49    6.00    9.00 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   2.000   4.000   4.077   6.000  10.000
tapply(mydata$ss.boredom, mydata$vehicle.f, summary)
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   2.000   1.964   3.000   9.000 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    1.00    1.72    2.75    9.00
tapply(mydata$ss.experience, mydata$vehicle.f, summary)
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   5.000   4.351   7.000  10.000 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   3.000   5.000   4.764   7.000  10.000
tapply(mydata$ss.thrill, mydata$vehicle.f, summary)
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   6.000   5.108   8.000  10.000 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.250   5.000   4.621   7.000  10.000
tapply(mydata$mhc, mydata$vehicle.f, summary)
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.143   3.500   4.286   4.120   4.786   6.000      36 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.786   3.286   4.000   3.948   4.643   5.786      21

#########################################################################################################
                            # t-tests to see if there are differences by vehicle driver group
#########################################################################################################

tapply(mydata$ss, mydata$vehicle.f, summary) 
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00   19.00   15.76   24.00   35.00 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00   11.00   18.00   16.18   23.00   32.00
#motorcyclists have lower ss (15.76) than car (16.18)

w1 <- wilcox.test(ss ~ vehicle, data = mydata, exact = FALSE)
w1
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  ss by vehicle
## W = 18284, p-value = 0.5476
## alternative hypothesis: true location shift is not equal to 0
#not significantly different

#########################################################################################################

tapply(mydata$ss.disinhib, mydata$vehicle.f, summary)  
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    0.00    4.00    3.49    6.00    9.00 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   2.000   4.000   4.077   6.000  10.000
#motorcyclists have lower ss.disinh (3.49) than car (4.08)

w2 <- wilcox.test(ss.disinhib ~ vehicle.f, data = mydata, exact = FALSE)
w2  # significantly different
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  ss.disinhib by vehicle.f
## W = 15543, p-value = 0.0429
## alternative hypothesis: true location shift is not equal to 0
#########################################################################################################

tapply(mydata$ss.thrill, mydata$vehicle.f, summary) 
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   6.000   5.108   8.000  10.000 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   1.250   5.000   4.621   7.000  10.000
#motorcyclists have lower ss.thill (5.11) than car (4.62)

w3 <- wilcox.test(ss.thrill ~ vehicle.f, data = mydata, exact = FALSE)
w3  #not different
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  ss.thrill by vehicle.f
## W = 19140, p-value = 0.1545
## alternative hypothesis: true location shift is not equal to 0
#########################################################################################################

tapply(mydata$ss.experience, mydata$vehicle.f, summary) 
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   0.000   5.000   4.351   7.000  10.000 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.000   3.000   5.000   4.764   7.000  10.000
#motorcyclists have lower ss.exp (4.35) than car (4.76)

w4 <- wilcox.test(ss.experience ~ vehicle.f, data = mydata, exact = FALSE)
w4  #not different
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  ss.experience by vehicle.f
## W = 16681, p-value = 0.351
## alternative hypothesis: true location shift is not equal to 0
#########################################################################################################

tapply(mydata$mhc, mydata$vehicle.f, summary) 
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.143   3.500   4.286   4.120   4.786   6.000      36 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.786   3.286   4.000   3.948   4.643   5.786      21
#motorcyclists have higher well-being (4.12) than car (3.95)

w5 <- wilcox.test(mhc ~ vehicle.f, data = mydata, exact = FALSE)
w5  #not different
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  mhc by vehicle.f
## W = 14247, p-value = 0.06355
## alternative hypothesis: true location shift is not equal to 0
#########################################################################################################

tapply(mydata$extraversion, mydata$vehicle.f, summary) 
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.125   3.844   4.375   4.425   5.031   6.500      30 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.125   4.000   4.375   4.417   4.750   5.750      17
#motorcyclists have higher extraversion (4.43) than car (4.42)

w6 <- wilcox.test(extraversion ~ vehicle.f, data = mydata, exact = FALSE)
w6  #not different
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  extraversion by vehicle.f
## W = 13297, p-value = 0.7872
## alternative hypothesis: true location shift is not equal to 0
#########################################################################################################

tapply(mydata$openness, mydata$vehicle.f, summary) 
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   3.000   4.667   5.000   5.049   5.444   6.889      30 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.000   3.500   3.875   3.858   4.125   5.500      17
#motorcyclists have higher openness (5.05) than car (3.86)

w7 <- wilcox.test(openness ~ vehicle.f, data = mydata, exact = FALSE)
w7  #sig different
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  openness by vehicle.f
## W = 24968, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
########################################################################################################

tapply(mydata$neuroticism, mydata$vehicle.f, summary) 
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   1.125   2.625   3.625   3.474   4.125   5.875      30 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.125   3.778   4.000   4.063   4.333   5.222      18
#motorcyclists have lower openness (3.48) than car (4.06)

w8 <- wilcox.test(neuroticism ~ vehicle.f, data = mydata, exact = FALSE)
w8  #sig different
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  neuroticism by vehicle.f
## W = 8177.5, p-value = 8.14e-10
## alternative hypothesis: true location shift is not equal to 0
########################################################################################################

tapply(mydata$agreeableness, mydata$vehicle.f, summary) 
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   3.000   4.778   5.222   5.237   5.778   7.000      30 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.222   3.667   4.000   3.996   4.333   6.667      17
#motorcyclists have higher agreeableness (5.24) than car (4.0)

w9 <- wilcox.test(agreeableness ~ vehicle.f, data = mydata, exact = FALSE)
w9  #sig different
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  agreeableness by vehicle.f
## W = 24110, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
########################################################################################################

tapply(mydata$conscientiousness, mydata$vehicle.f, summary) 
## $motorcyclists
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   2.889   4.667   5.111   5.046   5.444   6.625      30 
## 
## $`car drivers`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   3.000   4.000   4.111   4.161   4.333   5.000      17
#motorcyclists have higher conscientiousness (5.05) than car (4.16)

w10 <- wilcox.test(conscientiousness ~ vehicle.f, data = mydata, exact = FALSE)
w10  #sig different
## 
##  Wilcoxon rank sum test with continuity correction
## 
## data:  conscientiousness by vehicle.f
## W = 24394, p-value < 2.2e-16
## alternative hypothesis: true location shift is not equal to 0
#########################################################################################################