#Loading packages

library(tidyverse) # For general data wrangling using dplyr and ggplot for figures
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(tidyr) # For the drop_na function which allows me to drop NA values
library(knitr) #For knitting the file
library(conflicted) # Resolves conflicts by letting me choose which package I want a function from
library(haven) #reading in SPSS file into r
library(Hmisc) #For correlation matrix
library(jtools)
library(extrafont) #For Times New Roman font in plots and figures
## Registering fonts with R
library(lmtest) #For stepwise regression
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

#Reading the data

HOCUS <- read_sav("HOCUS_2024Analyses_renamed.sav")
View(HOCUS) #reading SPSS into Rstudio

rstudioapi::writeRStudioPreference("data_viewer_max_columns", 500L)
## NULL
HOCUS_AUS <- dplyr::filter(HOCUS, Countries == 9|PostcodeZipCode == "Australia", Age >= 50, CannabisUseYearsTotal !=  0)

#Descriptives - Demographics

#AGE, GENDER, COUNTRY OF BIRTH
HOCUS_AUS %>% 
  dplyr::summarise(MeanAge = mean(Age),SDAge = sd(Age), MinAge = min(Age), MaxAge = max(Age))
## # A tibble: 1 × 4
##   MeanAge SDAge MinAge MaxAge
##     <dbl> <dbl>  <dbl>  <dbl>
## 1    56.4  4.73     50     69
HOCUS_AUS %>% 
  group_by(GenderMale, GenderFemale) %>% 
  dplyr::summarise(
    MeanAge = mean(Age), .groups = "drop",
    SDAge = sd(Age),
    Total = n())
## # A tibble: 2 × 5
##   GenderMale GenderFemale MeanAge SDAge Total
##   <dbl+lbl>  <dbl+lbl>      <dbl> <dbl> <int>
## 1  1 [Male]  NA              55.3  4.99    43
## 2 NA          1 [Female]     57.6  4.15    37
HOCUS_AUS %>% 
 count(CountryOfBirth)
## # A tibble: 7 × 2
##   CountryOfBirth                                                 n
##   <dbl+lbl>                                                  <int>
## 1   9 [Australia]                                               68
## 2  10 [Austria]                                                  1
## 3  48 [Denmark]                                                  1
## 4  61 [France]                                                   1
## 5 122 [Netherlands]                                              1
## 6 123 [New Zealand]                                              4
## 7 185 [United Kingdom of Great Britain and Northern Ireland]     4
#MARTIAL STATUS
HOCUS_AUS %>% 
  group_by(GenderMale, GenderFemale) %>% 
  count(MartialStatusSingle, MaritalStatusMarried, MartialStatusDefacto, MartialStatusWidowed)
## # A tibble: 8 × 7
## # Groups:   GenderMale, GenderFemale [2]
##   GenderMale GenderFemale MartialStatusSingle MaritalStatusMarried
##   <dbl+lbl>  <dbl+lbl>    <dbl+lbl>           <dbl+lbl>           
## 1  1 [Male]  NA            1 [Single]         NA                  
## 2  1 [Male]  NA           NA                   1 [Married]        
## 3  1 [Male]  NA           NA                  NA                  
## 4  1 [Male]  NA           NA                  NA                  
## 5 NA          1 [Female]   1 [Single]         NA                  
## 6 NA          1 [Female]  NA                   1 [Married]        
## 7 NA          1 [Female]  NA                  NA                  
## 8 NA          1 [Female]  NA                  NA                  
## # ℹ 3 more variables: MartialStatusDefacto <dbl+lbl>,
## #   MartialStatusWidowed <dbl+lbl>, n <int>
#EDUCATION
education <- HOCUS_AUS %>% 
  count(LevelOfEducation) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  rename(count = n)
education$LevelOfEducation[education$LevelOfEducation == "1"] <- "Grade 12" 
education$LevelOfEducation[education$LevelOfEducation == "2"] <- "Grade 11"
education$LevelOfEducation[education$LevelOfEducation == "3"] <- "Grade 10" 
education$LevelOfEducation[education$LevelOfEducation == "4"] <- "Grade 9" 
education$LevelOfEducation[education$LevelOfEducation == "5"] <- "Grade 8"
education$LevelOfEducation[education$LevelOfEducation == "6"] <- "Grade 7" 

education$LevelOfEducation <- factor(education$LevelOfEducation, levels = c("Grade 7", "Grade 8", "Grade 9", "Grade 10", "Grade 11", "Grade 12"))

ggplot(education, aes(y=LevelOfEducation, x=count)) +
  geom_bar(stat="identity") +
  scale_x_continuous(limits = c(0,40))

education_further <- HOCUS_AUS %>% 
  group_by(FurtherEducationOrTraining) %>% 
  count(SpecifyTraining)

#EMPLOYMENT
employment <- HOCUS_AUS %>% 
  count(EmploymentCurrent) %>% 
  na.omit(employment) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  rename(count = n)
employment$EmploymentCurrent[employment$EmploymentCurrent == "1"] <- "Fulltime" 
employment$EmploymentCurrent[employment$EmploymentCurrent == "2"] <- "Parttime/Casual"
employment$EmploymentCurrent[employment$EmploymentCurrent == "3"] <- "Temporarily Unemployed" 
employment$EmploymentCurrent[employment$EmploymentCurrent == "4"] <- "Permanently Unemployed" 
employment$EmploymentCurrent[employment$EmploymentCurrent == "5"] <- "Fulltime Student"
employment$EmploymentCurrent[employment$EmploymentCurrent == "6"] <- "Home Duties" 
employment$EmploymentCurrent[employment$EmploymentCurrent == "7"] <- "Other" 

employment$EmploymentCurrent <- factor(employment$EmploymentCurrent, levels = c("Fulltime", "Parttime/Casual", "Temporarily Unemployed", "Permanently Unemployed", "Fulltime Student", "Home Duties", "Other"))

ggplot(employment, aes(y=EmploymentCurrent, x=count)) +
  geom_bar(stat="identity") +
  scale_x_continuous(limits = c(0,40))

#ACCOMODATION
accomodation <- HOCUS_AUS %>% 
  count(AccommodationCurrent) %>% 
  na.omit(accomodation) %>% 
  mutate(across(everything(), as.numeric))

accomodation$AccommodationCurrent[accomodation$AccommodationCurrent == "1"] <- "Privately Owned" 
accomodation$AccommodationCurrent[accomodation$AccommodationCurrent == "2"] <- "Rented (Private)"
accomodation$AccommodationCurrent[accomodation$AccommodationCurrent == "3"] <- "Rented (Public)" 
accomodation$AccommodationCurrent[accomodation$AccommodationCurrent == "4"] <- "Boarding House" 
accomodation$AccommodationCurrent[accomodation$AccommodationCurrent == "10"] <- "Caravan"
accomodation$AccommodationCurrent[accomodation$AccommodationCurrent == "11"] <- "No usual residence/Homeless" 
accomodation$AccommodationCurrent[accomodation$AccommodationCurrent  == "12"] <- "Other" 

ggplot(accomodation, aes(y=AccommodationCurrent, x=n)) +
  geom_bar(stat="identity") +
  scale_x_continuous(limits = c(0,40))

#Descrptives - Scales ##DFAQ-CU

DFAQCU <- HOCUS_AUS %>% 
  select(CannabisUseEver:Q78,DFAQCUCurrentFreq:DFAQCUFormOtherSpecfiy:PhysicianReccomendation) %>% 
  mutate(across(everything(), as.numeric))
## Warning in x:y: numerical expression has 30 elements: only the first used
## Warning: There were 7 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(everything(), as.numeric)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 6 remaining warnings.
DFAQCU[DFAQCU == 0] <- NA

#CANNABIS LAST USE
last_use <- DFAQCU %>% 
  count(CannabisLastUse) %>% 
  na.omit(last_use)

last_use$CannabisLastUse[last_use$CannabisLastUse == "6"] <- "One month ago" 
last_use$CannabisLastUse[last_use$CannabisLastUse == "7"] <- "Last week"
last_use$CannabisLastUse[last_use$CannabisLastUse == "8"] <- "This week" 
last_use$CannabisLastUse[last_use$CannabisLastUse == "9"] <- "Yesterday" 
last_use$CannabisLastUse[last_use$CannabisLastUse == "10"] <- "Today"
last_use$CannabisLastUse[last_use$CannabisLastUse == "11"] <- "Currently High" 

last_use$CannabisLastUse <- factor(last_use$CannabisLastUse, levels = c("One month ago", "Last week", "This week", "Yesterday", "Today", "Currently High"))

ggplot(last_use, aes(y=CannabisLastUse, x=n)) +
  geom_bar(stat="identity") +
  scale_x_continuous(limits = c(0,45))

#AVERAGE CURRENT FREQUENCY
current_freq <- DFAQCU %>% 
  select(DFAQCUCurrentFreq, DFAQCUCurrentFreqLength)
current_freq %>% 
  count(DFAQCUCurrentFreq) %>% 
  mutate(across(everything(), as.numeric))
## # A tibble: 7 × 2
##   DFAQCUCurrentFreq     n
##               <dbl> <dbl>
## 1                 7     1
## 2                 8     5
## 3                 9     4
## 4                10    16
## 5                11     8
## 6                12    18
## 7                13    28
current_freq$DFAQCUCurrentFreq[current_freq$DFAQCUCurrentFreq == "7"] <- "2-3 times a month" 
current_freq$DFAQCUCurrentFreq[current_freq$DFAQCUCurrentFreq == "8"] <- "Once a week"
current_freq$DFAQCUCurrentFreq[current_freq$DFAQCUCurrentFreq == "9"] <- "Twice a week" 
current_freq$DFAQCUCurrentFreq[current_freq$DFAQCUCurrentFreq == "10"] <- "3-4 times a week" 
current_freq$DFAQCUCurrentFreq[current_freq$DFAQCUCurrentFreq == "11"] <- "5-6 times a week"
current_freq$DFAQCUCurrentFreq[current_freq$DFAQCUCurrentFreq== "12"] <- "Once a day" 
current_freq$DFAQCUCurrentFreq[current_freq$DFAQCUCurrentFreq== "13"] <- "More than once a day"

current_freq %>% 
  dplyr::filter(DFAQCUCurrentFreq == 12|DFAQCUCurrentFreq == 13) %>% 
  count(DFAQCUCurrentFreqLength)
## # A tibble: 0 × 2
## # ℹ 2 variables: DFAQCUCurrentFreqLength <dbl>, n <int>
DFAQCU %>%
  count(DFAQCUCurrentFreqLength) %>%
   arrange(desc(n)) 
## # A tibble: 11 × 2
##    DFAQCUCurrentFreqLength     n
##                      <dbl> <int>
##  1                      12    51
##  2                       7     6
##  3                       8     5
##  4                      11     4
##  5                       2     3
##  6                       6     3
##  7                       9     3
##  8                      10     2
##  9                       3     1
## 10                       4     1
## 11                       5     1
#PAST WEEK/MONTH/LIFETIME
colMeans(DFAQCU[sapply(DFAQCU, is.numeric)], na.rm = TRUE)
##          CannabisUseEver          CannabisLastUse                      Q77 
##                2.0000000                8.6455696                2.3333333 
##                      Q78        DFAQCUCurrentFreq  DFAQCUCurrentFreqLength 
##                1.1428571               11.3875000               10.2625000 
##       DFAQCUPreviousFreq        DFAQCUUsePastWeek       DFAQCUUsePastMonth 
##                7.9113924                6.0875000               22.9610390 
##        DFAQCUUseLifetime      DFAQCUPatternWeekly     DFAQCUUseAfterWaking 
##                9.0253165                3.8375000                4.2875000 
##         DFAQCUUseWeekday         DFAQCUUseWeekend      DFAQCUPrimaryMethod 
##                3.7123288                4.1410256                4.5750000 
##        DFAQCUOtherMethod         DFAQCUMethodNone       DFAQCUMethodJoints 
##                      NaN                1.0000000                1.0000000 
##       DFAQCUMethodBlunts     DFAQCUMethodHandpipe         DFAQCUMethodBong 
##                1.0000000                1.0000000                1.0000000 
##       DFAQCUMethodHookah     DFAQCUMethodVaporise       DFAQCUMethodEdible 
##                      NaN                1.0000000                1.0000000 
##        DFAQCUMethodOther DFAQCUMethodOtherSpecify               DFAQCUForm 
##                1.0000000                      NaN                2.2625000 
##        DFAQCUFormSpecify           DFAQCUFormNone      DFAQCUFormMarijuana 
##                      NaN                1.0000000                1.0000000 
##    DFAQCUFormConcentrate        DFAQCUFormEdibles          DFAQCUFormOther 
##                1.0000000                1.0000000                1.0000000 
##   DFAQCUFormOtherSpecfiy  CannabisGramsPerSession      CannabisGramsPerDay 
##                      NaN                0.7264286                1.7096154 
##     CannabisGramsPerWeek   CannabisSessionsPerDay        AverageTHCContent 
##                6.7226562                3.0390625                6.5000000 
##        EdilbleTHCContent               CurrentAge    CannabisUseYearsTotal 
##                      NaN               56.3417722               37.2911392 
##      CannabisAgeFirstUse       CannabisRegularUse  CannabisFirstRegularUse 
##               18.1772152                1.9125000               21.1549296 
##         CannabisDailyUse    CannabisFirstDailyUse     CannabisFreqBefore16 
##                1.9722222               23.8615385                9.5822785 
##  PhysicianReccomendation 
##                1.3000000
sapply(DFAQCU, sd, na.rm = TRUE)
##          CannabisUseEver          CannabisLastUse                      Q77 
##                0.0000000                1.0132200                0.5773503 
##                      Q78        DFAQCUCurrentFreq  DFAQCUCurrentFreqLength 
##                0.3779645                1.6342362                2.8227994 
##       DFAQCUPreviousFreq        DFAQCUUsePastWeek       DFAQCUUsePastMonth 
##                4.7856352                2.2512655                9.3632000 
##        DFAQCUUseLifetime      DFAQCUPatternWeekly     DFAQCUUseAfterWaking 
##                1.7612254                0.5612768                1.8155011 
##         DFAQCUUseWeekday         DFAQCUUseWeekend      DFAQCUPrimaryMethod 
##                4.5452339                5.9105329                2.1034932 
##        DFAQCUOtherMethod         DFAQCUMethodNone       DFAQCUMethodJoints 
##                       NA                0.0000000                0.0000000 
##       DFAQCUMethodBlunts     DFAQCUMethodHandpipe         DFAQCUMethodBong 
##                       NA                0.0000000                0.0000000 
##       DFAQCUMethodHookah     DFAQCUMethodVaporise       DFAQCUMethodEdible 
##                       NA                0.0000000                0.0000000 
##        DFAQCUMethodOther DFAQCUMethodOtherSpecify               DFAQCUForm 
##                0.0000000                       NA                0.7419398 
##        DFAQCUFormSpecify           DFAQCUFormNone      DFAQCUFormMarijuana 
##                       NA                       NA                0.0000000 
##    DFAQCUFormConcentrate        DFAQCUFormEdibles          DFAQCUFormOther 
##                0.0000000                0.0000000                0.0000000 
##   DFAQCUFormOtherSpecfiy  CannabisGramsPerSession      CannabisGramsPerDay 
##                       NA                1.3609723                2.9741257 
##     CannabisGramsPerWeek   CannabisSessionsPerDay        AverageTHCContent 
##                6.6940090                2.2577990                1.9115036 
##        EdilbleTHCContent               CurrentAge    CannabisUseYearsTotal 
##                       NA                4.7200773               39.2301401 
##      CannabisAgeFirstUse       CannabisRegularUse  CannabisFirstRegularUse 
##                7.4691837                0.2843491                8.3420585 
##         CannabisDailyUse    CannabisFirstDailyUse     CannabisFreqBefore16 
##                0.1654888               10.0977673                4.2895448 
##  PhysicianReccomendation 
##                0.6443523
DFAQCU %>% 
  count(DFAQCUUseLifetime)
## # A tibble: 9 × 2
##   DFAQCUUseLifetime     n
##               <dbl> <int>
## 1                 2     2
## 2                 3     1
## 3                 5     1
## 4                 6     3
## 5                 7     2
## 6                 8     9
## 7                 9    13
## 8                10    48
## 9                NA     1
DFAQCU$DFAQCUUsePastMonth <-as.numeric(as.character(DFAQCU$DFAQCUUsePastMonth))
DFAQCU$DFAQCUUseWeekday <-as.numeric(as.character(DFAQCU$DFAQCUUseWeekday))
DFAQCU$DFAQCUUseWeekend <-as.numeric(as.character(DFAQCU$DFAQCUUseWeekend))

DFAQCU %>% 
  summarise(mean_month = mean(DFAQCUUsePastMonth, na.rm = TRUE),
            mean_weekday = mean(DFAQCUUseWeekday, na.rm = TRUE),
            sd_weekday=sd(DFAQCUUseWeekday, na.rm=TRUE),
            mean_weekend = mean(DFAQCUUseWeekend, na.rm = TRUE),
            sd_weekend=sd(DFAQCUUseWeekend, na.rm=TRUE))
## # A tibble: 1 × 5
##   mean_month mean_weekday sd_weekday mean_weekend sd_weekend
##        <dbl>        <dbl>      <dbl>        <dbl>      <dbl>
## 1       23.0         3.71       4.55         4.14       5.91
DFAQCU %>%
  count(DFAQCUPreviousFreq) %>%
   arrange(desc(n)) #More than once a day (20/80) followed by never used cannabis (18/80)
## # A tibble: 12 × 2
##    DFAQCUPreviousFreq     n
##                 <dbl> <int>
##  1                 13    20
##  2                  1    18
##  3                 10    11
##  4                  4     7
##  5                 12     7
##  6                  8     5
##  7                 11     4
##  8                  9     3
##  9                  2     2
## 10                  3     1
## 11                  7     1
## 12                 NA     1
DFAQCU %>%
  count(DFAQCUUsePastWeek) %>%
   arrange(desc(n)) #7 days a week (38/80)
## # A tibble: 8 × 2
##   DFAQCUUsePastWeek     n
##               <dbl> <int>
## 1                 8    38
## 2                 3     9
## 3                 6     9
## 4                 4     7
## 5                 7     6
## 6                 2     5
## 7                 5     4
## 8                 1     2
DFAQCU %>%
  count(DFAQCUPatternWeekly) %>%
   arrange(desc(n)) #Cannabis use both weekends and weekdays (73/80)
## # A tibble: 4 × 2
##   DFAQCUPatternWeekly     n
##                 <dbl> <int>
## 1                   4    73
## 2                   2     4
## 3                   3     2
## 4                   1     1
DFAQCU %>%
  count(DFAQCUUseAfterWaking) %>%
   arrange(desc(n)) #12-18 hours after waking, 1-3 hours after waking (both 18/80)
## # A tibble: 8 × 2
##   DFAQCUUseAfterWaking     n
##                  <dbl> <int>
## 1                    2    18
## 2                    6    18
## 3                    4    15
## 4                    3    13
## 5                    5     8
## 6                    7     5
## 7                    8     2
## 8                    9     1
#INGESTION METHOD
DFAQCU %>% 
  count(DFAQCUPrimaryMethod) %>% 
  arrange(desc(n)) 
## # A tibble: 7 × 2
##   DFAQCUPrimaryMethod     n
##                 <dbl> <int>
## 1                   5    28
## 2                   2    23
## 3                   4    12
## 4                   7     7
## 5                   9     5
## 6                   8     4
## 7                   6     1
#AMOUNT OF CANNABIS
DFAQCU$CannabisGramsPerSession <-as.numeric(as.character(DFAQCU$CannabisGramsPerSession))
DFAQCU$CannabisGramsPerDay <-as.numeric(as.character(DFAQCU$CannabisGramsPerDay))
DFAQCU$CannabisGramsPerWeek <-as.numeric(as.character(DFAQCU$CannabisGramsPerWeek))

DFAQCU["CannabisSessionsPerDay"][DFAQCU["CannabisSessionsPerDay"] == ''] <- NA
DFAQCU$CannabisSessionsPerDay <-as.numeric(as.character(DFAQCU$CannabisSessionsPerDay))

DFAQCU %>%  
  summarise(meanpersession=mean(CannabisGramsPerSession, na.rm = TRUE),
            sdpersession=sd(CannabisGramsPerSession, na.rm=TRUE),
            meanperday=mean(CannabisGramsPerDay, na.rm = TRUE),
            sdperday=sd(CannabisGramsPerDay, na.rm=TRUE),
            meanperweek=mean(CannabisGramsPerWeek, na.rm=TRUE),
            sdperweek=sd(CannabisGramsPerWeek, na.rm=TRUE),
            meansessions=mean(CannabisSessionsPerDay, na.rm = TRUE))
## # A tibble: 1 × 7
##   meanpersession sdpersession meanperday sdperday meanperweek sdperweek
##            <dbl>        <dbl>      <dbl>    <dbl>       <dbl>     <dbl>
## 1          0.726         1.36       1.71     2.97        6.72      6.69
## # ℹ 1 more variable: meansessions <dbl>
#YEARS USED
use_years <- DFAQCU %>% 
  select(CannabisUseYearsTotal, CannabisAgeFirstUse, CannabisRegularUse, CannabisFirstRegularUse, CannabisDailyUse, CannabisFirstDailyUse, CannabisFreqBefore16) %>% 
  mutate(CannabisUseYearsTotal = str_replace(CannabisUseYearsTotal, "366","36")) %>% 
  mutate(across(everything(), as.numeric))
  
use_years %>% 
  summarise(mean_years=mean(CannabisUseYearsTotal, na.rm = TRUE),
            sd_years=sd(CannabisUseYearsTotal, na.rm = TRUE),
            mean_ageonset=mean(CannabisAgeFirstUse, na.rm = TRUE),
            sd_ageonset=sd(CannabisAgeFirstUse, na.rm = TRUE),
            mean_regularuse=mean(CannabisFirstRegularUse, na.rm = TRUE),
            sd_regularuse=sd(CannabisFirstRegularUse, na.rm = TRUE),
            mean_dailyuse=mean(CannabisFirstDailyUse, na.rm=TRUE),
            sd_dailyuse=sd(CannabisFirstDailyUse, na.rm=TRUE))
## # A tibble: 1 × 8
##   mean_years sd_years mean_ageonset sd_ageonset mean_regularuse sd_regularuse
##        <dbl>    <dbl>         <dbl>       <dbl>           <dbl>         <dbl>
## 1       33.1     11.7          18.2        7.47            21.2          8.34
## # ℹ 2 more variables: mean_dailyuse <dbl>, sd_dailyuse <dbl>
use_years1 <- use_years %>% 
  select(CannabisUseYearsTotal) %>% 
  mutate(years = case_when(CannabisUseYearsTotal>=0 & CannabisUseYearsTotal <= 9 ~ '0-9',
                                     CannabisUseYearsTotal>=10 & CannabisUseYearsTotal <= 19 ~ '10-19',
                                     CannabisUseYearsTotal>=20 & CannabisUseYearsTotal <= 29 ~ '20-29',
                                     CannabisUseYearsTotal>=30 & CannabisUseYearsTotal <= 39 ~ '30-39',
                                     CannabisUseYearsTotal>=40 & CannabisUseYearsTotal <= 49 ~ '40-49',
                                     CannabisUseYearsTotal>=50 & CannabisUseYearsTotal <= 59 ~ '40-59')) %>% 
  count(years)

ggplot(use_years1, aes(x=years, y=n))+
  geom_col()

use_years2 <- use_years %>% 
  select(CannabisAgeFirstUse) %>% 
  mutate(age = case_when(CannabisAgeFirstUse>=0 & CannabisAgeFirstUse <= 9 ~ '0-9',
                                     CannabisAgeFirstUse>=10 & CannabisAgeFirstUse <= 19 ~ '10-19',
                                     CannabisAgeFirstUse>=20 & CannabisAgeFirstUse <= 29 ~ '20-29',
                                     CannabisAgeFirstUse>=30 & CannabisAgeFirstUse <= 39 ~ '30-39',
                                     CannabisAgeFirstUse>=40 & CannabisAgeFirstUse <= 49 ~ '40-49',
                                     CannabisAgeFirstUse>=50 & CannabisAgeFirstUse <= 59 ~ '40-59')) %>% 
  count(age)

ggplot(use_years2, aes(x=age, y=n))+
  geom_col()

use_years3 <- use_years %>% 
  select(CannabisFirstRegularUse) %>% 
  mutate(age = case_when(CannabisFirstRegularUse>=0 & CannabisFirstRegularUse <= 9 ~ '0-9',
                                     CannabisFirstRegularUse>=10 & CannabisFirstRegularUse <= 19 ~ '10-19',
                                     CannabisFirstRegularUse>=20 & CannabisFirstRegularUse <= 29 ~ '20-29',
                                     CannabisFirstRegularUse>=30 & CannabisFirstRegularUse <= 39 ~ '30-39',
                                     CannabisFirstRegularUse>=40 & CannabisFirstRegularUse <= 49 ~ '40-49',
                                     CannabisFirstRegularUse>=50 & CannabisFirstRegularUse <= 59 ~ '40-59')) %>% 
  count(age)

ggplot(use_years3, aes(x=age, y=n))+
  geom_col()

use_years4 <- use_years %>% 
  select(CannabisFirstDailyUse) %>% 
  mutate(age = case_when(CannabisFirstDailyUse>=0 & CannabisFirstDailyUse <= 9 ~ '0-9',
                                     CannabisFirstDailyUse>=10 & CannabisFirstDailyUse <= 19 ~ '10-19',
                                     CannabisFirstDailyUse>=20 & CannabisFirstDailyUse <= 29 ~ '20-29',
                                     CannabisFirstDailyUse>=30 & CannabisFirstDailyUse <= 39 ~ '30-39',
                                     CannabisFirstDailyUse>=40 & CannabisFirstDailyUse <= 49 ~ '40-49',
                                     CannabisFirstDailyUse>=50 & CannabisFirstDailyUse <= 59 ~ '40-59')) %>% 
  count(age)
ggplot(use_years4, aes(x=age, y=n))+
  geom_col()

#DOCTORS RECOMMENDATION
DFAQCU %>% 
  count(PhysicianReccomendation)
## # A tibble: 3 × 2
##   PhysicianReccomendation     n
##                     <dbl> <int>
## 1                       1    64
## 2                       2     8
## 3                       3     8

##CMMQ

CMMQ <- HOCUS_AUS %>% 
  select(CMMQ1:CMMQ36) %>% 
  mutate(across(everything(), as.numeric))

CMMQ_always_response <- CMMQ %>% 
  select(CMMQ1:CMMQ3, CMMQ28:CMMQ31) %>%
  summarise(across(everything(), ~sum(.==5, na.rm = TRUE))) %>% 
  pivot_longer(cols=everything(), names_to="column", values_to="count")
print(CMMQ_always_response)
## # A tibble: 7 × 2
##   column count
##   <chr>  <int>
## 1 CMMQ1     40
## 2 CMMQ2     19
## 3 CMMQ3     32
## 4 CMMQ28    29
## 5 CMMQ29    39
## 6 CMMQ30    29
## 7 CMMQ31    33
CMMQ_always_response$column[CMMQ_always_response$column == "CMMQ1"] <- "Enjoy the effects" 
CMMQ_always_response$column[CMMQ_always_response$column == "CMMQ2"] <- "It is fun" 
CMMQ_always_response$column[CMMQ_always_response$column == "CMMQ3"] <- "To feel good" 
CMMQ_always_response$column[CMMQ_always_response$column == "CMMQ28"] <- "Safer to drink than alcohol"
CMMQ_always_response$column[CMMQ_always_response$column == "CMMQ29"] <- "Not a dangerous drug" 
CMMQ_always_response$column[CMMQ_always_response$column == "CMMQ30"] <- "Low health risks" 
CMMQ_always_response$column[CMMQ_always_response$column == "CMMQ31"] <- "To help you sleep"

CMMQ_always_response$column <- factor(CMMQ_always_response$column, levels = CMMQ_always_response$column)

ggplot(CMMQ_always_response, aes(x = column, y = count)) +
  geom_bar(stat="identity", width = 0.8) +
  coord_flip() +
  scale_y_continuous(expand = c(0,0), limits = c(0,40), breaks=seq(0,40, by=10)) +
  scale_x_discrete(limits=c("Enjoy the effects", "Not a dangerous drug", "To help you sleep", "To feel good", "Low health risks", "Safer to drink than alcohol", "It is fun")) +
  labs(x="Motivation",
       y="Number of Participants") +
  theme_bw() +
  theme(
    text=element_text(family = "Times New Roman", size=12, face="bold"),
    axis.line = element_line(colour = "black"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    axis.ticks.x = element_line(colour = "black", linewidth = 0.5),
    axis.ticks.y= element_blank())

CMMQ_never_response <- CMMQ %>% 
  select(CMMQ5,CMMQ6, CMMQ4,CMMQ18,CMMQ11,CMMQ16, CMMQ17) %>% 
  summarise(across(everything(), ~sum(.==1, na.rm = TRUE))) %>% 
  pivot_longer(cols=everything(), names_to="column", values_to="count")
print(CMMQ_never_response)
## # A tibble: 7 × 2
##   column count
##   <chr>  <int>
## 1 CMMQ5     65
## 2 CMMQ6     62
## 3 CMMQ4     61
## 4 CMMQ18    53
## 5 CMMQ11    51
## 6 CMMQ16    50
## 7 CMMQ17    49
CMMQ_never_response$column[CMMQ_never_response$column == "CMMQ5"] <- "Didn't want to be the only one not doing it" 
CMMQ_never_response$column[CMMQ_never_response$column == "CMMQ6"] <- "To be cool" 
CMMQ_never_response$column[CMMQ_never_response$column == "CMMQ4"] <- "Felt pressure from others" 
CMMQ_never_response$column[CMMQ_never_response$column == "CMMQ18"] <- "You had gotten drunk and weren't thinking"
CMMQ_never_response$column[CMMQ_never_response$column == "CMMQ11"] <- "Curious about marijuana" 
CMMQ_never_response$column[CMMQ_never_response$column == "CMMQ16"] <- "You were drunk" 
CMMQ_never_response$column[CMMQ_never_response$column == "CMMQ17"] <- "Under the influence of alcohol"

ggplot(CMMQ_never_response, aes(x = column, y = count)) +
  geom_bar(stat="identity", width = 0.8) +
  coord_flip() +
  scale_y_continuous(expand = c(0,0), limits = c(0,70), breaks=seq(0,70, by=10)) +
  scale_x_discrete(limits=c("Didn't want to be the only one not doing it", "To be cool", "Felt pressure from others", "You had gotten drunk and weren't thinking", "Curious about marijuana", "You were drunk","Under the influence of alcohol")) +
  labs(x="Motivation",
       y="Number of Participants") +
  theme_bw() +
  theme(
    text=element_text(family = "Times New Roman", size=12, face="bold"),
    axis.line = element_line(colour = "black"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    axis.ticks.x = element_line(colour = "black", linewidth = 0.5),
    axis.ticks.y = element_blank())

table(CMMQ$CMMQ1) #To enjoy the effects of it = almost always or always, 40/80
## 
##  1  3  4  5 
##  7 16  9 40
table(CMMQ$CMMQ2) #Because it's fun = almost always or always 19/80, almost never or never 17/80
## 
##  1  2  3  4  5 
## 17  5 17  9 19
table(CMMQ$CMMQ3) #To feel good = almost always or always 32/80
## 
##  1  2  3  4  5 
##  4  1 18 13 32
table(CMMQ$CMMQ4) #Because you felt pressure from others who do it = almost never or never 61/80
## 
##  1  2  3 
## 61  5  1
table(CMMQ$CMMQ5) #Because you didn't want to be the only one not doing it = almost never/never 61
## 
##  1  2  5 
## 65  3  1
table(CMMQ$CMMQ6) #To be cool = almost never/never 65
## 
##  1  2  3 
## 62  4  2
table(CMMQ$CMMQ7) #To forget your problems = almost never/never 62
## 
##  1  2  3  4  5 
## 35  6 22  3  1
table(CMMQ$CMMQ8) #Because you were depressed = almost never/never 35, sometimes 22
## 
##  1  2  3  4  5 
## 31 11 20  4  1
table(CMMQ$CMMQ9) #To escape from your life = almost never/never 31, sometimes 20
## 
##  1  2  3  4  5 
## 37  5 20  5  3
table(CMMQ$CMMQ10) #Because you were experimenting = almost never/never 37, sometimes 20
## 
##  1  2  3  4 
## 45 10 11  2
table(CMMQ$CMMQ11) #Because you were curious about marijuana = almost never/never 45
## 
##  1  2  3  4  5 
## 51  4  8  4  2
table(CMMQ$CMMQ12) #To see what it felt like = almost never/never 51
## 
##  1  2  3  4  5 
## 43  5 11  6  3
table(CMMQ$CMMQ13) #Because you had nothing better to do = almost never/never 43
## 
##  1  2  3  4  5 
## 48  3 13  3  2
table(CMMQ$CMMQ14)#To relieve boredom = almost never/never 48
## 
##  1  2  3  4  5 
## 37  5 19  5  1
table(CMMQ$CMMQ15)#Because you wanted something to do = almost never/never 37
## 
##  1  2  3  4  5 
## 46  5 12  6  1
table(CMMQ$CMMQ16)#Because you were drunk = almost never/never 46
## 
##  1  2  3  4 
## 50  5 12  1
table(CMMQ$CMMQ17)#Because you were under the influence of alcohol = almost never/never 50
## 
##  1  2  3  4  5 
## 49  7 12  1  1
table(CMMQ$CMMQ18)#Because you had gotten drunk and weren't thinking about what you were doing = almost never/never 49
## 
##  1  2  3 
## 53  5 10
table(CMMQ$CMMQ19)#To celebrate = almost never/never 53
## 
##  1  2  3  4  5 
## 18  4 27 14  7
table(CMMQ$CMMQ20)#Because it was a special day = sometimes 27
## 
##  1  2  3  4  5 
## 25  3 29  7  4
table(CMMQ$CMMQ21)#Because it was a special occasion = almost never/never 25, sometimes 29
## 
##  1  2  3  4  5 
## 26  2 27  8  6
table(CMMQ$CMMQ22)#Because you want to alter your perspective = almost never/never 26, sometimes 27
## 
##  1  2  3  4  5 
## 26  8 16 11  7
table(CMMQ$CMMQ23)#To allow you to think differently = almost never/never 26
## 
##  1  2  3  4  5 
## 18  6 26 12  7
table(CMMQ$CMMQ24)# So you can look at the world differently = sometimes 26
## 
##  1  2  3  4  5 
## 20  4 26 12  7
table(CMMQ$CMMQ25)#Because it makes you more comfortable in an unfamiliar situation = sometimes 26
## 
##  1  2  3  4  5 
## 26  7 28  4  5
table(CMMQ$CMMQ26)#To make you feel more confident = almost never/never 26, sometimes 28
## 
##  1  2  3  4  5 
## 37  8 19  2  3
table(CMMQ$CMMQ27)#Because it relaxes you when you are in an insecure situation = almost never/never 37
## 
##  1  2  3  4  5 
## 21  3 26 13  7
table(CMMQ$CMMQ28)#Because it is safer than drinking alcohol = almost never/never 21, sometimes 26
## 
##  1  2  3  4  5 
## 15  2 15  8 29
table(CMMQ$CMMQ29)#Because it is not a dangerous drug = almost always or always 29, sometimes 15
## 
##  1  2  3  4  5 
## 10  1 13  6 39
table(CMMQ$CMMQ30)#Because there are low health risks = almost always or always 39, sometimes 13
## 
##  1  2  3  4  5 
## 13  6 11 10 29
table(CMMQ$CMMQ31)#To help you sleep = almost always or always 29
## 
##  1  2  3  4  5 
##  7  5 17  9 33
table(CMMQ$CMMQ32)#Because it helps make napping easier and enjoyable = almost always or always 33
## 
##  1  2  3  4  5 
## 19  4 18 10 18
table(CMMQ$CMMQ33)#Because you are having problems sleeping = almost never or never 19, sometimes 18, almost always or always 18
## 
##  1  2  3  4  5 
## 15  3 23  8 22
table(CMMQ$CMMQ34)#Because it is readily available = almost always or always 22
## 
##  1  2  3  4  5 
## 26  7 18  8 10
table(CMMQ$CMMQ35)#Because you can get it for free = almost never or never 26, sometimes 18
## 
##  1  2  3  4  5 
## 40  5 17  2  6
table(CMMQ$CMMQ36)#Because it is there = almost never or never 34 (must be some purpose behind use)
## 
##  1  2  3  4  5 
## 34  6 15  7  6

##MMPQ

MMPQ <- HOCUS_AUS %>% 
  select(MedicinalIntention:CannabisUnhelpfulFor) %>% 
  mutate(across(everything(), as.numeric))
## Warning: There were 7 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(everything(), as.numeric)`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 6 remaining warnings.
#YEARS/MONTHS USED
MMPQ_medicinal <- MMPQ %>% 
  dplyr::filter(MedicinalIntention == 1) %>% 
  summarise(
    average_years = mean(MedicinalCannabisYears, na.rm = TRUE),
    sd_years = sd(MedicinalCannabisYears, na.rm = TRUE),
    average_months = mean(MedicinalCannabisMonths, na.rm = TRUE),
    sd_months = sd(MedicinalCannabisMonths, na.rm = TRUE))

#CONDITION FOR USE
condition <- MMPQ %>% 
  select(ConditionChronicPain:ConditionHIVAIDS) %>% 
  rename(ChronicPain = ConditionChronicPain,
         Anxiety = ConditionAnxiety,
         PTSD = ConditionPTSD,
         Depression = ConditionDepression,
         Nightmares = ConditionNightmares,
         Appetite = ConditionAppetite,
         Nausea = ConditionNausea,
         Insomnia = ConditionInsomnia,
         Epilepsy = ConditionEpilepsy,
         Headaches = ConditionHeadaches,
         Seizures = ConditionSeizures,
         Stress = ConditionStress,
         Cancer = ConditionCancer,
         Glaucoma = ConditionGlaucoma,
         MuscleSpasms = ConditionMuscleSpasms,
         MultipleSclerosis = ConditionMultipleSclerosis,
         HIVAIDS = ConditionHIVAIDS) %>% 
  summarise(across(everything(), ~sum(.==1, na.rm = TRUE))) %>% 
  pivot_longer(cols=everything(), names_to="column", values_to="count") %>% 
  arrange(desc(count)) %>% 
  dplyr::filter(count > 0)

condition$column <- factor(condition$column, levels = condition$column)

ggplot(condition, aes(column, count)) +
  geom_bar(stat="identity") +
  coord_flip() +
  scale_y_continuous(expand = c(0,0), limits = c(0,50), breaks=seq(0,50, by=10)) +
  labs(x="Condition",
       y="Number of Participants") +
  theme_bw() +
  theme(
    text=element_text(family = "Times New Roman", size=12, face="bold"),
    axis.line = element_line(colour = "black"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    axis.ticks.x = element_line(colour = "black", linewidth = 0.5),
    axis.ticks.y= element_blank())

MMPQ %>% 
  count(ConditionOtherSpecify)#3
## # A tibble: 1 × 2
##   ConditionOtherSpecify     n
##                   <dbl> <int>
## 1                    NA    80
MMPQ %>% 
  count(ConditionChronicPain & ConditionInsomnia)#23
## # A tibble: 2 × 2
##   `ConditionChronicPain & ConditionInsomnia`     n
##   <lgl>                                      <int>
## 1 TRUE                                          23
## 2 NA                                            57
MMPQ %>% 
  count(ConditionChronicPain & ConditionStress)#17
## # A tibble: 2 × 2
##   `ConditionChronicPain & ConditionStress`     n
##   <lgl>                                    <int>
## 1 TRUE                                        17
## 2 NA                                          63
MMPQ %>% 
  count(ConditionInsomnia & ConditionStress)#16
## # A tibble: 2 × 2
##   `ConditionInsomnia & ConditionStress`     n
##   <lgl>                                 <int>
## 1 TRUE                                     16
## 2 NA                                       64
MMPQ %>% 
  count(ConditionChronicPain & ConditionDepression)#15
## # A tibble: 2 × 2
##   `ConditionChronicPain & ConditionDepression`     n
##   <lgl>                                        <int>
## 1 TRUE                                            15
## 2 NA                                              65
#HELPFUL CONDITION
MMPQ %>% 
  dplyr::filter(MedicinalIntention == 1) %>%  #53 use for medicinal purposes
  count(CannabisHelpCondition) #35 rate cannabis as extremely helpful for symptoms
## # A tibble: 4 × 2
##   CannabisHelpCondition     n
##                   <dbl> <int>
## 1                     3     2
## 2                     4    14
## 3                     5    35
## 4                    NA     2

##PDQ

PDQ <- HOCUS_AUS %>% 
  select(PDQ1:PDQ20) %>% 
  mutate(across(everything(), as.numeric))

PDQ_recoded <- PDQ %>% 
  mutate(across(1:20,
        .fns = ~recode(.,'1' = 0,'2' = 1,'3' = 2,'4' = 3,'5' = 4))) %>% 
  dplyr::filter(!if_all(c(PDQ1:PDQ20), is.na)) %>% 
  rowwise() %>% 
  mutate(row_sum = sum(c_across(1:20), na.rm = TRUE))

mean_pdq <- mean(PDQ_recoded$row_sum, na.rm = TRUE) #mean pdq score of 17.89

PDQ_mean <- PDQ_recoded %>% 
  summarise_all(mean, na.rm=TRUE) %>% 
  mutate(row_sum=sum(across(1:20), na.rm=TRUE))
    #Averages do not indicate that cannabis users in this sample suffer from perceived cognitive impairment

attention_concerntration <- PDQ_recoded %>% 
  select(PDQ1, PDQ5, PDQ9, PDQ13, PDQ17) %>% 
  mutate(row_sum=sum(across(1:5), na.rm=TRUE)) 
mean(attention_concerntration$row_sum, na.rm = TRUE) #mean=5.56
## [1] 5.561644
sd(attention_concerntration$row_sum, na.rm=TRUE) #sd=3.18
## [1] 3.184102
reterospective_memory <- PDQ_recoded %>% 
  select(PDQ2, PDQ6, PDQ10, PDQ14, PDQ18) %>% 
  mutate(row_sum=sum(across(1:5), na.rm=TRUE))
mean(reterospective_memory$row_sum, na.rm = TRUE) #mean=4.16
## [1] 4.164384
sd(reterospective_memory$row_sum, na.rm=TRUE) #sd=3.30
## [1] 3.308296
prospective_memory <- PDQ_recoded %>% 
  select(PDQ3, PDQ7, PDQ11, PDQ15, PDQ19) %>% 
  mutate(row_sum=sum(across(1:5), na.rm=TRUE))
mean(prospective_memory$row_sum, na.rm = TRUE) #mean=3.93
## [1] 3.931507
sd(prospective_memory$row_sum, na.rm=TRUE) #sd=2.66
## [1] 2.663168
planning_organisation <- PDQ_recoded %>% 
  select(PDQ4, PDQ8, PDQ12, PDQ16, PDQ20) %>% 
  mutate(row_sum=sum(across(1:5), na.rm=TRUE))
mean(planning_organisation$row_sum, na.rm = TRUE) #mean=4.23
## [1] 4.232877
sd(planning_organisation$row_sum, na.rm=TRUE) #sd=3.02
## [1] 3.020856

##AQoL-6D

AQoL <- HOCUS_AUS %>% 
  select(AQoL6D1:AQoL6D20) %>% 
  dplyr::filter(!if_all(c(AQoL6D1:AQoL6D20), is.na))
#utilizing 'psychometric' measure - simple psychometric score for health related quality of life (HRQoL) and to provide profile scores on the different dimensions or items of the descriptive systems. The score is derived by adding the unweighted response order of each question.
  
AQoL_mean <- AQoL %>% 
  mutate(across(everything(), as.numeric)) %>% 
  summarise_all(mean, na.rm=TRUE) %>% 
  mutate(sum=rowSums(., na.rm=TRUE)) #total score=38.14

AQoL_mean2 <- AQoL %>% 
  mutate(across(everything(), as.numeric)) %>% 
  rowwise() %>% 
  mutate(row_sum = sum(c_across(1:20), na.rm = TRUE))
sd(AQoL_mean2$row_sum) #sd=10.30
## [1] 10.29585
independent_living <- AQoL %>% 
  select(AQoL6D1:AQoL6D4) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  summarise_all(mean, na.rm=TRUE) %>% 
  mutate(sum=rowSums(., na.rm=TRUE))
relationships <- AQoL %>% 
  select(AQoL6D5:AQoL6D7) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  summarise_all(mean, na.rm=TRUE) %>% 
  mutate(sum=rowSums(., na.rm=TRUE))
mental_health <- AQoL %>% 
  select(AQoL6D8:AQoL6D11) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  summarise_all(mean, na.rm=TRUE) %>% 
  mutate(sum=rowSums(., na.rm=TRUE))
coping <- AQoL %>% 
  select(AQoL6D12:AQoL6D14) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  summarise_all(mean, na.rm=TRUE) %>% 
  mutate(sum=rowSums(., na.rm=TRUE))
pain <- AQoL %>% 
  select(AQoL6D15:AQoL6D17) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  summarise_all(mean, na.rm=TRUE) %>% 
  mutate(sum=rowSums(., na.rm=TRUE))
senses <- AQoL %>% 
  select(AQoL6D18:AQoL6D20) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  summarise_all(mean, na.rm=TRUE) %>% 
  mutate(sum=rowSums(., na.rm=TRUE))

##ICD-10

ICD10 <- HOCUS_AUS %>% 
  select(Participant, Q53_1:Q53_7) %>% 
  mutate(across(everything(), as.numeric))
ICD10 %>% 
  rowwise() %>% 
  mutate(num_yes=sum(c_across(Q53_1:Q53_7) == 1, na.rm=TRUE))%>% 
  dplyr::filter(num_yes>3)
## # A tibble: 10 × 9
## # Rowwise: 
##    Participant Q53_1 Q53_2 Q53_3 Q53_4 Q53_5 Q53_6 Q53_7 num_yes
##          <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <int>
##  1           4     1     1     1     1     2     1     1       6
##  2           6     1     2     1     2     1     2     1       4
##  3          11     1     1     1     2     2     1     1       5
##  4          22     1     2     1     2     2     1     1       4
##  5          36     1     1     1     1     1     1     1       7
##  6          37     1     1     1     1     1     1     1       7
##  7          42     1     1     1     1     1     1     1       7
##  8          54     1     2     2     1     2     1     1       4
##  9          65     1     2     1     1     2     1     1       5
## 10          75     1     1     1     1     2     2     1       5

##CAST

CAST <- HOCUS_AUS %>% 
  select(Q127:Q132) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  mutate(across(1:6,.fns = ~recode(.,'2' = 0))) %>% 
  rename(CASTq1 = Q127,
         CASTq2 = Q128,
         CASTq3 = Q129,
         CASTq4 = Q130,
         CASTq5 = Q131,
         CASTq6 = Q132)

CAST_means <- CAST %>% #14 participants considered having problematic use
  mutate(sum=rowSums(across(1:6), na.rm=TRUE)) %>% 
  mutate(problematic_use = case_when(
                            sum>=4 ~ 'High risk',
                            sum<=3 ~'Low/moderate risk')) %>% 
  count(problematic_use)

##AUDIT-C

AUDITC <- HOCUS_AUS %>% 
  select(GenderMale, GenderFemale, AUDITC1:AUDITC3) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  mutate(across(3:5,
        .fns = ~recode(.,'1' = 0,'2' = 1,'3' = 2,'4' = 3,'5' = 4, '6'=5))) 

AUDITC_male <- AUDITC %>% 
  select(GenderMale, AUDITC1:AUDITC3) %>% 
  dplyr::filter(GenderMale == 1)
AUDITC_score_male <- AUDITC_male %>% 
  mutate(hazardous_drinking = rowSums(across(c(AUDITC1:AUDITC3)))) %>% 
  dplyr::filter(hazardous_drinking>=4) 

AUDITC_female <- AUDITC %>% 
  select(GenderFemale, AUDITC1:AUDITC3) %>% 
  dplyr::filter(GenderFemale == 1)
AUDITC_score_female <- AUDITC_female %>% 
  mutate(hazardous_drinking = rowSums(across(c(AUDITC1:AUDITC3)))) %>% 
  dplyr::filter(hazardous_drinking>=3) 

##ATOP

ATOP_substances <- HOCUS_AUS %>% 
  select(ATOPAlcohol:ATOPTobacco) %>% 
  summarise(across(everything(), ~sum(.==1, na.rm = TRUE))) %>% 
  pivot_longer(cols=everything(), names_to="column", values_to="count") %>% 
  arrange(desc(count))
print(ATOP_substances)
## # A tibble: 9 × 2
##   column              count
##   <chr>               <int>
## 1 ATOPCannabis           68
## 2 ATOPAlcohol            44
## 3 ATOPTobacco            32
## 4 ATOPBenzodiazepine      5
## 5 ATOPCocaine             2
## 6 ATOPOtherOpiods         1
## 7 ATOPOtherSubstances     1
## 8 ATOPAmphethamine        0
## 9 ATOPHeroin              0

##Mental Health Information

mental_health_info <- HOCUS_AUS %>% 
  select(MentalHealthInfo1:MentalConditionTreatmentYear)

#MENTAL HEALTH CONDITION
mental_health_info %>%
  group_by(MentalHealthInfo1) %>% 
  count(MentalHealthInfo2,MentalHealthInfo3, MentalHealthInfo4,MentalHealthInfo5)
## # A tibble: 9 × 6
## # Groups:   MentalHealthInfo1 [3]
##   MentalHealthInfo1 MentalHealthInfo2 MentalHealthInfo3 MentalHealthInfo4
##   <dbl+lbl>         <dbl+lbl>         <dbl+lbl>         <dbl+lbl>        
## 1  1 [Yes]           1 [Depression]    1 [Anxiety]      NA               
## 2  1 [Yes]           1 [Depression]    1 [Anxiety]      NA               
## 3  1 [Yes]           1 [Depression]   NA                NA               
## 4  1 [Yes]           1 [Depression]   NA                NA               
## 5  1 [Yes]          NA                 1 [Anxiety]      NA               
## 6  1 [Yes]          NA                 1 [Anxiety]      NA               
## 7  1 [Yes]          NA                NA                NA               
## 8  2 [No]           NA                NA                NA               
## 9 NA                NA                NA                NA               
## # ℹ 2 more variables: MentalHealthInfo5 <dbl+lbl>, n <int>

##DASS-21

DASS_21 <- HOCUS_AUS %>% 
  select(DASS21_1:DASS21_21) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  dplyr::filter(!if_all(c(DASS21_1:DASS21_18), is.na))

DASS_21_recoded <- DASS_21 %>% 
 mutate(across(1:21,
        .fns = ~recode(.,'1' = 0,'2' = 1,'3' = 2,'4' = 3))) %>% 
  rowwise() %>% 
  mutate(row_sum = sum(c_across(1:21), na.rm = TRUE))

mean <- mean(DASS_21_recoded$row_sum, na.rm = TRUE) #score overall m=9.15
sd <- sd(DASS_21_recoded$row_sum, na.rm=TRUE) #sd=8.12

DASS_stress <- DASS_21_recoded %>% 
  select(DASS21_1, DASS21_6,DASS21_8, DASS21_11, DASS21_12, DASS21_14, DASS21_18) 

DASS_depression <- DASS_21_recoded %>% 
  select(DASS21_3, DASS21_5, DASS21_10, DASS21_13, DASS21_16, DASS21_17, DASS21_21)

DASS_anxiety <- DASS_21_recoded %>% 
  select(DASS21_2, DASS21_4, DASS21_7, DASS21_9, DASS21_15, DASS21_18, DASS21_19)

#STRESS
stress_severity <- DASS_stress %>% 
  rowwise() %>% 
  mutate(row_sum = sum(c_across(1:7), na.rm = TRUE)) %>% 
  mutate(severity = case_when(
                    row_sum>=0 & row_sum<=7 ~ 'Normal',
                    row_sum>=8 & row_sum<=9 ~ 'Mild',
                    row_sum>=10 & row_sum<=12 ~ 'Moderate',
                    row_sum>=13 & row_sum<=16 ~ 'Severe',
                    row_sum>=17 ~ 'Extremely Severe')) %>% 
  count(severity) %>% 
  mutate(severity = factor(severity, levels=c('Normal', 'Mild', 'Moderate', 'Severe', 'Extremely Severe')))

ggplot(stress_severity, aes(x=severity, y=n))+
  geom_bar(stat = "identity") +
  labs(x='Stress Score', y='Number') +
  scale_y_continuous(expand = c(0,0), limits = c(0,75), breaks=seq(0,75, by=5)) +
  labs(x="Stress Severity",
       y="Number of Participants") +
  theme_bw() +
  theme(
    text=element_text(family = "Times New Roman", size=12, face="bold"),
    axis.line = element_line(colour = "black"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    axis.ticks.y = element_line(colour = "black", linewidth = 0.5),
    axis.ticks.x= element_blank())

#ANXIETY
anxiety_severity <- DASS_anxiety %>% 
  rowwise() %>% 
  mutate(row_sum = sum(c_across(1:7), na.rm = TRUE)) %>% 
  mutate(severity = case_when(
                    row_sum>=0 & row_sum<=3 ~ 'Normal',
                    row_sum>=4 & row_sum<=5 ~ 'Mild',
                    row_sum>=6 & row_sum<=7 ~ 'Moderate',
                    row_sum>=8 & row_sum<=9 ~ 'Severe',
                    row_sum>=10 ~ 'Extremely Severe')) %>% 
  count(severity) %>% 
  mutate(severity = factor(severity, levels=c('Normal', 'Mild', 'Moderate', 'Severe', 'Extremely Severe')))

ggplot(anxiety_severity, aes(x=severity, y=n))+
  geom_bar(stat = "identity") +
  labs(x='Anxiety Score', y='Number')

#DEPRESSION
depression_severity <- DASS_depression %>% 
  rowwise() %>% 
  mutate(row_sum = sum(c_across(1:7), na.rm = TRUE)) %>% 
  mutate(severity = case_when(
                    row_sum>=0 & row_sum<=4 ~ 'Normal',
                    row_sum>=5 & row_sum<=6 ~ 'Mild',
                    row_sum>=7 & row_sum<=10 ~ 'Moderate',
                    row_sum>=11 & row_sum<=13 ~ 'Severe',
                    row_sum>=14 ~ 'Extremely Severe')) %>% 
  count(severity) %>% 
  mutate(severity = factor(severity, levels=c('Normal', 'Mild', 'Moderate', 'Severe', 'Extremely Severe')))

ggplot(depression_severity, aes(x=severity, y=n))+
  geom_bar(stat = "identity") +
  labs(x='Depression Score', y='Number')

##DASS-21 BY GENDER
DASS_21_gender <- HOCUS_AUS %>% 
  select(GenderMale, GenderFemale,DASS21_1:DASS21_21) %>% 
  mutate(across(everything(), as.numeric))

DASS_21_recoded_gender <- DASS_21_gender %>% 
 mutate(across(3:23,.fns = ~recode(.,'1' = 0,'2' = 1,'3' = 2,'4' = 3)))

DASS_stress_gender <- DASS_21_recoded_gender %>% 
  select(GenderMale, GenderFemale,DASS21_1, DASS21_6,DASS21_8, DASS21_11, DASS21_12, DASS21_14, DASS21_18) %>% 
  mutate(gender=case_when(GenderMale==1 ~ 'Male',
                          GenderFemale==1 ~ 'Female')) %>% 
  select(-GenderMale, -GenderFemale) %>% 
  dplyr::filter(!if_all(c(DASS21_1:DASS21_18), is.na))

DASS_depression_gender <- DASS_21_recoded_gender %>% 
  select(GenderMale, GenderFemale, DASS21_3, DASS21_5, DASS21_10, DASS21_13, DASS21_16, DASS21_17, DASS21_21) %>% 
  mutate(gender=case_when(GenderMale==1 ~ 'Male',
                          GenderFemale==1 ~ 'Female')) %>% 
  select(-GenderMale, -GenderFemale) %>% 
  dplyr::filter(!if_all(c(DASS21_3:DASS21_21), is.na))

DASS_anxiety_gender <- DASS_21_recoded_gender %>% 
  select(GenderMale, GenderFemale, DASS21_2, DASS21_4, DASS21_7, DASS21_9, DASS21_15, DASS21_18, DASS21_19) %>% 
  mutate(gender=case_when(GenderMale==1 ~ 'Male',
                          GenderFemale==1 ~ 'Female')) %>% 
  select(-GenderMale, -GenderFemale) %>% 
  dplyr::filter(!if_all(c(DASS21_2:DASS21_19), is.na))

#STRESS (gender)
stress_severity_gender <- DASS_stress_gender %>% 
  rowwise() %>% 
  mutate(row_sum = sum(c_across(1:7), na.rm = TRUE)) 

mean_stress <- mean(stress_severity_gender$row_sum, na.rm = TRUE) #stress score overall m=3.58

mean_stress_gender <- stress_severity_gender %>% 
  group_by(gender) %>% 
  summarise(mean=mean(row_sum),
            sd=sd(row_sum)) #male = 3.37, female = 3.84

stress_severity_gender <- DASS_stress_gender %>% 
  rowwise() %>% 
  mutate(row_sum = sum(c_across(1:7), na.rm = TRUE)) %>%  
  mutate(severity = case_when(
                    row_sum>=0 & row_sum<=7 ~ 'Normal',
                    row_sum>=8 & row_sum<=9 ~ 'Mild',
                    row_sum>=10 & row_sum<=12 ~ 'Moderate',
                    row_sum>=13 & row_sum<=16 ~ 'Severe',
                    row_sum>=17 ~ 'Extremely Severe')) %>% 
  count(severity, gender) %>% 
  complete(severity = c('Normal', 'Mild', 'Moderate', 'Severe', 'Extremely Severe'), fill = list(n = 0)) %>% 
  mutate(severity = factor(severity, levels=c('Normal', 'Mild', 'Moderate', 'Severe', 'Extremely Severe'))) %>%   replace_na(list(gender = "Female"))


ggplot(stress_severity_gender, aes(x=severity, y=n, fill = gender))+
  geom_bar(stat = "identity", position = "dodge", width = 0.8) +
  scale_y_continuous(expand = c(0,0), limits = c(0,40), breaks=seq(0,40, by=10)) +
  labs(x="Stress Severity",
       y="Number of Participants") +
  theme_bw() +
  theme(
    text=element_text(family = "Times New Roman", size=12, face="bold"),
    axis.line = element_line(colour = "black"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    axis.ticks.y = element_line(colour = "black", linewidth = 0.5),
    axis.ticks.x= element_blank()) +
  scale_fill_discrete(name = "Gender") +
  scale_fill_manual("Gender", values = c("Female"="grey69", "Male"="grey28"))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

#ANXIETY (gender)
anxiety_severity_gender <- DASS_anxiety_gender %>% 
  rowwise() %>% 
  mutate(row_sum = sum(c_across(1:7), na.rm = TRUE))

mean_anxiety <- mean(anxiety_severity_gender$row_sum, na.rm = TRUE) #stress score overall m=2.70
mean_anxiety_gender <- anxiety_severity_gender %>% 
  group_by(gender) %>% 
  summarise(mean=mean(row_sum),
            sd=sd(row_sum)) #male = 2.45, female = 3.03

anxiety_severity_gender <- DASS_anxiety_gender %>% 
  rowwise() %>% 
  mutate(row_sum = sum(c_across(1:7), na.rm = TRUE)) %>% 
  mutate(severity = case_when(
                    row_sum>=0 & row_sum<=3 ~ 'Normal',
                    row_sum>=4 & row_sum<=5 ~ 'Mild',
                    row_sum>=6 & row_sum<=7 ~ 'Moderate',
                    row_sum>=8 & row_sum<=9 ~ 'Severe',
                    row_sum>=10 ~ 'Extremely Severe')) %>% 
  count(severity, gender) %>% 
  complete(severity = c('Normal', 'Mild', 'Moderate', 'Severe', 'Extremely Severe'), fill = list(n = 0)) %>% 
  mutate(severity = factor(severity, levels=c('Normal', 'Mild', 'Moderate', 'Severe', 'Extremely Severe'))) %>%
  replace_na(list(gender = "Female"))

ggplot(anxiety_severity_gender, aes(x=severity, y=n, fill = gender))+
  geom_bar(stat = "identity", position = "dodge") +
  scale_y_continuous(expand = c(0,0), limits = c(0,40), breaks=seq(0,40, by=10)) +
  labs(x="Anxiety Severity",
       y="Number of Participants")+
  theme_bw() +
  theme(
    text=element_text(family = "Times New Roman", size=12, face="bold"),
    axis.line = element_line(colour = "black"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    axis.ticks.y = element_line(colour = "black", linewidth = 0.5),
    axis.ticks.x= element_blank(),
    legend.position = c(.9, .55)) +
  scale_fill_discrete(name = "Gender") +
  scale_fill_manual("Gender", values = c("Female"="grey69", "Male"="grey28")) 
## Warning: A numeric `legend.position` argument in `theme()` was deprecated in ggplot2
## 3.5.0.
## ℹ Please use the `legend.position.inside` argument of `theme()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

#DEPRESSION (gender)
depression_severity_gender <- DASS_depression_gender %>% 
  rowwise() %>% 
  mutate(row_sum = sum(c_across(1:7), na.rm = TRUE)) 

mean_depression <- mean(depression_severity_gender$row_sum, na.rm = TRUE) #stress score overall m=3.08
mean_depression_gender <- depression_severity_gender %>% 
  group_by(gender) %>% 
  summarise(mean=mean(row_sum),
            sd=sd(row_sum)) #male = 2.90, female = 3.31

depression_severity_gender <- DASS_depression_gender %>% 
  rowwise() %>% 
  mutate(row_sum = sum(c_across(1:7), na.rm = TRUE)) %>% 
  mutate(severity = case_when(
                    row_sum>=0 & row_sum<=4 ~ 'Normal',
                    row_sum>=5 & row_sum<=6 ~ 'Mild',
                    row_sum>=7 & row_sum<=10 ~ 'Moderate',
                    row_sum>=11 & row_sum<=13 ~ 'Severe',
                    row_sum>=14 ~ 'Extremely Severe')) %>% 
  count(severity, gender) %>% 
  complete(severity = c('Normal', 'Mild', 'Moderate', 'Severe', 'Extremely Severe'), fill = list(n = 0)) %>% 
  mutate(severity = factor(severity, levels=c('Normal', 'Mild', 'Moderate', 'Severe', 'Extremely Severe')))

ggplot(depression_severity_gender, aes(x=severity, y=n, fill=gender))+
  geom_bar(stat = "identity", position = "dodge") +
  scale_y_continuous(expand = c(0,0), limits = c(0,40), breaks=seq(0,40, by=10)) +
  labs(x="Depression Severity",
       y="Number of Participants") +
  theme_bw() +
  theme(
    text=element_text(family = "Times New Roman", size=12, face="bold"),
    axis.line = element_line(colour = "black"),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    panel.border = element_blank(),
    panel.background = element_blank(),
    axis.ticks.y = element_line(colour = "black", linewidth = 0.5),
    axis.ticks.x= element_blank(),
    legend.position = c(.9, .55)) +
  scale_fill_discrete(name = "Gender") +
  scale_fill_manual("Gender", values = c("Female"="grey69", "Male"="grey28"))
## Scale for fill is already present.
## Adding another scale for fill, which will replace the existing scale.

##ISI

ISI <- HOCUS_AUS %>% 
  select(Participant, ISI_1:ISI_4,ISI_5:ISI_7) %>% 
  mutate(across(everything(), as.numeric))

ISI_recoded <- ISI %>% 
  mutate(
  across(2:8,
        .fns = ~recode(.,'1' = 0,'2' = 1,'3' = 2,'4' = 3,'5' = 4))) %>% 
  rowwise() %>% 
  mutate(row_sum = sum(c_across(2:8), na.rm = TRUE))

mean(ISI_recoded$row_sum)
## [1] 6.5
sd(ISI_recoded$row_sum)
## [1] 5.807023
ISI_scoring <- ISI_recoded %>% 
  rowwise() %>% 
  mutate(row_sum = sum(c_across(2:8), na.rm = TRUE)) %>% 
  mutate(severity = case_when(
                    row_sum>=0 & row_sum<=7 ~ 'No Insomnia',
                    row_sum>=8 & row_sum<=14 ~ 'Subthreshold Insomnia',
                    row_sum>=15 & row_sum<=21 ~ 'Moderate Insomnia',
                    row_sum>=22 & row_sum<=28 ~ 'Severe Insomnia')) %>% 
  count(severity) %>% 
  mutate(severity = factor(severity, levels=c('No Insomnia', 'Subthreshold Insomnia', 'Moderate Insomnia', 'Severe Insomnia')))

ISI_scoring <- ISI_recoded %>% 
  rowwise() %>% 
  mutate(row_sum = sum(c_across(2:8), na.rm = TRUE)) %>%  
  rowwise() %>% 
  mutate(row_sum = sum(c_across(2:8), na.rm = TRUE)) %>% 
  mutate(severity = case_when(
                    row_sum>=0 & row_sum<=7 ~ 'No insomnia',
                    row_sum>=8 & row_sum<=14 ~ 'Subthreshold insomnia',
                    row_sum>=15 & row_sum<=21 ~ 'Moderate Insomnia',
                    row_sum>=22 & row_sum<=28 ~ 'Severe Insomnia')) %>% 
  count(severity) %>% 
  mutate(severity = factor(severity, levels=c('No insomnia', 'Subthreshold Insomnia', 'Moderate Insomnia', 'Severe Insomnia')))

ISI_scoring <- ISI_recoded %>% 
  rowwise() %>% 
  mutate(row_sum = sum(c_across(2:8), na.rm = TRUE))
ISI_scoring %>% 
  dplyr::filter(row_sum >= 15)
## # A tibble: 9 × 9
## # Rowwise: 
##   Participant ISI_1 ISI_2 ISI_3 ISI_4 ISI_5 ISI_6 ISI_7 row_sum
##         <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>   <dbl>
## 1           2     3     3     2     4     2     2     2      18
## 2          29     0     4     4     3     2     3     3      19
## 3          41     3     2     0     4     4     3     3      19
## 4          42     0     3     3     3     3     3     3      18
## 5          44     4     3     4     3     2     1     2      19
## 6          48     4     4     3     2     2     1     2      18
## 7          52     3     2     2     3     2     1     2      15
## 8          58     2     3     4     4     2     2     3      20
## 9          64     4     2     3     2     3     1     2      17
sleep <- HOCUS_AUS %>% 
  dplyr::filter(Participant == 2|Participant ==29|Participant ==41|Participant ==42|Participant ==44|Participant ==48|Participant ==58|Participant ==64|Participant ==52) %>% 
  select(Participant, ConditionInsomnia, CannabisMostHelpfulFor)

##BPI

BPI <- HOCUS_AUS %>% 
  select(BPI_Q1:BPI_Q9g)

BPI_pain <- BPI %>% 
  select(BPI_Q3:BPI_Q6) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  mutate(across(1:4,
        .fns = ~recode(.,'1' = 0,'2' = 1,
                       '3' = 2,'4' = 3, 
                       '5' = 4,'6' = 5, 
                       '7' = 6, '8' = 7, 
                       '9' = 8, '10' = 9, 
                       '11' = 10))) %>%
  rowwise() %>% 
  mutate(row_mean = mean(c_across(1:4), na.rm=TRUE)) %>% 
  mutate(pain_severity = case_when(
                    row_mean>=1 & row_mean<=4 ~ 'Mild Pain',
                    row_mean>=5 & row_mean<=6 ~ 'Moderate Pain',
                    row_mean>=7 & row_mean<=10 ~ 'Severe Pain')) %>% 
  count(pain_severity) 

BPI_pain <- BPI %>% 
  select(BPI_Q3:BPI_Q6) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  mutate(across(1:4,
        .fns = ~recode(.,'1' = 0,'2' = 1,
                       '3' = 2,'4' = 3, 
                       '5' = 4,'6' = 5, 
                       '7' = 6, '8' = 7, 
                       '9' = 8, '10' = 9, 
                       '11' = 10))) %>%
  rowwise() %>% 
  mutate(row_mean = mean(c_across(1:4), na.rm=TRUE))
mean(BPI_pain$row_mean, na.rm=TRUE) #mean = 3.28
## [1] 3.285714
sd(BPI_pain$row_mean, na.rm=TRUE) #sd=2.33
## [1] 2.329041
BPI_interference <- BPI %>% 
  select(BPI_Q9a:BPI_Q9g) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  mutate(across(1:7,
        .fns = ~recode(.,'1' = 0,'2' = 1,
                       '3' = 2,'4' = 3, 
                       '5' = 4,'6' = 5, 
                       '7' = 6, '8' = 7, 
                       '9' = 8, '10' = 9, 
                       '11' = 10))) %>% 
  rowwise() %>% 
  mutate(row_mean = mean(c_across(1:7), na.rm=TRUE)) %>% 
  mutate(pain_interference = case_when(
                    row_mean>=1 & row_mean<=4 ~ 'Mild Interference',
                    row_mean>=5 & row_mean<=6 ~ 'Moderate Interference',
                    row_mean>=7 & row_mean<=10 ~ 'Severe Interference')) %>% 
  count(pain_interference) 

BPI_interference <- BPI %>% 
  select(BPI_Q9a:BPI_Q9g) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  mutate(across(1:7,
        .fns = ~recode(.,'1' = 0,'2' = 1,
                       '3' = 2,'4' = 3, 
                       '5' = 4,'6' = 5, 
                       '7' = 6, '8' = 7, 
                       '9' = 8, '10' = 9, 
                       '11' = 10))) %>% 
  rowwise() %>% 
  mutate(row_mean = mean(c_across(1:7), na.rm=TRUE))
mean(BPI_interference$row_mean, na.rm=TRUE) 
## [1] 2.935034
sd(BPI_interference$row_mean, na.rm=TRUE)
## [1] 2.74598

#Correlations - Usage Patterns

usage_pattern <- HOCUS_AUS %>% 
  select(CannabisUseYearsTotal, CannabisAgeFirstUse, CannabisFirstRegularUse, CannabisFirstDailyUse, CannabisGramsPerDay, CannabisGramsPerWeek) %>% 
  mutate(across(everything(), as.numeric))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(everything(), as.numeric)`.
## Caused by warning:
## ! NAs introduced by coercion
cor.test(usage_pattern$CannabisAgeFirstUse,usage_pattern$CannabisFirstRegularUse, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  usage_pattern$CannabisAgeFirstUse and usage_pattern$CannabisFirstRegularUse
## t = 4.9146, df = 69, p-value = 5.767e-06
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3130944 0.6636640
## sample estimates:
##       cor 
## 0.5092028
cor.test(usage_pattern$CannabisAgeFirstUse,usage_pattern$CannabisGramsPerDay, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  usage_pattern$CannabisAgeFirstUse and usage_pattern$CannabisGramsPerDay
## t = -1.4184, df = 63, p-value = 0.161
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.40254559  0.07102845
## sample estimates:
##        cor 
## -0.1759182
cor.test(usage_pattern$CannabisAgeFirstUse,usage_pattern$CannabisGramsPerWeek, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  usage_pattern$CannabisAgeFirstUse and usage_pattern$CannabisGramsPerWeek
## t = -1.9837, df = 62, p-value = 0.05172
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.462347070  0.001602962
## sample estimates:
##        cor 
## -0.2443026
cor.test(usage_pattern$CannabisFirstRegularUse,usage_pattern$CannabisGramsPerDay, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  usage_pattern$CannabisFirstRegularUse and usage_pattern$CannabisGramsPerDay
## t = -1.7193, df = 59, p-value = 0.0908
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.44573312  0.03533428
## sample estimates:
##      cor 
## -0.21843
cor.test(usage_pattern$CannabisFirstRegularUse,usage_pattern$CannabisGramsPerWeek, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  usage_pattern$CannabisFirstRegularUse and usage_pattern$CannabisGramsPerWeek
## t = -1.7938, df = 58, p-value = 0.07806
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.45660765  0.02618423
## sample estimates:
##        cor 
## -0.2292648
cor.test(usage_pattern$CannabisGramsPerDay,usage_pattern$CannabisGramsPerWeek, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  usage_pattern$CannabisGramsPerDay and usage_pattern$CannabisGramsPerWeek
## t = 4.3841, df = 62, p-value = 4.59e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2733367 0.6540602
## sample estimates:
##       cor 
## 0.4864613
ggplot(usage_pattern, aes(CannabisFirstRegularUse, CannabisGramsPerWeek)) +
  geom_point() +
  stat_smooth(method = lm)
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 20 rows containing non-finite outside the scale range
## (`stat_smooth()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_point()`).

#Correlations - Scales

scales <- HOCUS_AUS %>% 
  select(DASS21_1:DASS21_21,PDQ1:PDQ20,AQoL6D1:AQoL6D20,ISI_1:ISI_4,ISI_5:ISI_7,BPI_Q3:BPI_Q6,BPI_Q3:BPI_Q6) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  mutate(across(1:21,
        .fns = ~recode(.,'1' = 0,'2' = 1,'3' = 2,'4' = 3))) %>% 
  rowwise() %>% 
  mutate(DASS = sum(c_across(1:21), na.rm = TRUE)) %>% 
  mutate(across(22:41,
        .fns = ~recode(.,'1' = 0,'2' = 1,'3' = 2,'4' = 3,'5' = 4))) %>%
  rowwise() %>% 
  mutate(PDQ = sum(c_across(22:41), na.rm = TRUE)) %>% 
  rowwise() %>% 
  mutate(AQOL = sum(c_across(42:61), na.rm = TRUE)) %>% 
  rowwise() %>% 
  mutate(across(62:68,
        .fns = ~recode(.,'1' = 0,'2' = 1,'3' = 2,'4' = 3,'5' = 4))) %>%
  rowwise() %>% 
  mutate(ISI = sum(c_across(62:68), na.rm = TRUE)) %>% 
  mutate(across(69:72,
        .fns = ~recode(.,'1' = 0,'2' = 1,
                       '3' = 2,'4' = 3, 
                       '5' = 4,'6' = 5, 
                       '7' = 6, '8' = 7, 
                       '9' = 8, '10' = 9, 
                       '11' = 10))) %>%
  rowwise() %>% 
  mutate(BPI = mean(c_across(69:72), na.rm=TRUE)) %>% 
  dplyr::filter(!if_all(c(DASS21_1:DASS21_21), is.na)) %>% 
  dplyr::filter(!if_all(c(PDQ1:PDQ20), is.na)) %>% 
  dplyr::filter(!if_all(c(AQoL6D1:AQoL6D20), is.na)) %>% 
  dplyr::filter(!if_all(c(ISI_1:ISI_4,ISI_5:ISI_7), is.na))

scales <- scales[,-c(1:72)]
scales
## # A tibble: 72 × 5
## # Rowwise: 
##     DASS   PDQ  AQOL   ISI   BPI
##    <dbl> <dbl> <dbl> <dbl> <dbl>
##  1    20    28    54    18  4   
##  2     8    12    30     1  3   
##  3    14    24    41     8  5.25
##  4     5    15    26     1  0   
##  5    18    42    43     7  2.25
##  6    14    23    37     5  2.5 
##  7     3    14    39     2  5.25
##  8     9    18    49     6  4   
##  9    25    46    46    13  0   
## 10     3     5    25     8  0   
## # ℹ 62 more rows
matrix <- rcorr(as.matrix(scales))
matrix
##      DASS   PDQ AQOL  ISI   BPI
## DASS 1.00  0.65 0.56 0.44  0.07
## PDQ  0.65  1.00 0.37 0.19 -0.03
## AQOL 0.56  0.37 1.00 0.48  0.49
## ISI  0.44  0.19 0.48 1.00  0.38
## BPI  0.07 -0.03 0.49 0.38  1.00
## 
## n
##      DASS PDQ AQOL ISI BPI
## DASS   72  72   72  72  70
## PDQ    72  72   72  72  70
## AQOL   72  72   72  72  70
## ISI    72  72   72  72  70
## BPI    70  70   70  70  70
## 
## P
##      DASS   PDQ    AQOL   ISI    BPI   
## DASS        0.0000 0.0000 0.0000 0.5374
## PDQ  0.0000        0.0013 0.1060 0.7751
## AQOL 0.0000 0.0013        0.0000 0.0000
## ISI  0.0000 0.1060 0.0000        0.0013
## BPI  0.5374 0.7751 0.0000 0.0013
ggplot(scales, aes(DASS, PDQ)) +
  geom_point() +
  stat_smooth(method = lm)
## `geom_smooth()` using formula = 'y ~ x'

cor.test(scales$DASS,scales$PDQ, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  scales$DASS and scales$PDQ
## t = 7.216, df = 70, p-value = 5.036e-10
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.4965845 0.7685052
## sample estimates:
##       cor 
## 0.6531166
cor.test(scales$DASS,scales$AQOL, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  scales$DASS and scales$AQOL
## t = 5.6755, df = 70, p-value = 2.899e-07
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.3789991 0.7017774
## sample estimates:
##       cor 
## 0.5613775
cor.test(scales$DASS,scales$ISI, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  scales$DASS and scales$ISI
## t = 4.1557, df = 70, p-value = 9.037e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2376763 0.6133123
## sample estimates:
##       cor 
## 0.4448504
cor.test(scales$DASS,scales$BPI, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  scales$DASS and scales$BPI
## t = 0.61993, df = 68, p-value = 0.5374
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1628777  0.3045744
## sample estimates:
##        cor 
## 0.07496542
cor.test(scales$PDQ,scales$AQOL, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  scales$PDQ and scales$AQOL
## t = 3.3484, df = 70, p-value = 0.001312
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1530614 0.5554146
## sample estimates:
##       cor 
## 0.3715548
cor.test(scales$AQOL,scales$ISI, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  scales$AQOL and scales$ISI
## t = 4.5284, df = 70, p-value = 2.378e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2746096 0.6373787
## sample estimates:
##      cor 
## 0.475996
cor.test(scales$AQOL,scales$BPI, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  scales$AQOL and scales$BPI
## t = 4.6043, df = 68, p-value = 1.867e-05
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.2852035 0.6482258
## sample estimates:
##      cor 
## 0.487507
cor.test(scales$ISI,scales$BPI, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  scales$ISI and scales$BPI
## t = 3.3536, df = 68, p-value = 0.001307
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1555096 0.5619490
## sample estimates:
##       cor 
## 0.3767181

#Correlations - Usage Patterns & Scales

frequency_and_distress <- HOCUS_AUS %>% 
  select(DFAQCUCurrentFreq, CannabisUseYearsTotal, CannabisAgeFirstUse, CannabisFirstRegularUse, CannabisFirstDailyUse, CannabisFreqBefore16, DASS21_1:DASS21_21) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  mutate(across(7:27,
        .fns = ~recode(.,'1' = 0,'2' = 1,'3' = 2,'4' = 3))) %>% 
  rowwise() %>% 
  mutate(total = sum(c_across(7:27), na.rm = TRUE)) %>% 
  dplyr::filter(!if_all(c(DASS21_1:DASS21_21), is.na))

cor.test(frequency_and_distress$CannabisAgeFirstUse,frequency_and_distress$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_and_distress$CannabisAgeFirstUse and frequency_and_distress$total
## t = -0.18781, df = 69, p-value = 0.8516
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.254565  0.211817
## sample estimates:
##         cor 
## -0.02260371
cor.test(frequency_and_distress$CannabisFirstRegularUse,frequency_and_distress$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_and_distress$CannabisFirstRegularUse and frequency_and_distress$total
## t = -1.6095, df = 61, p-value = 0.1127
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.42818290  0.04835327
## sample estimates:
##        cor 
## -0.2018297
frequency_and_PDQ <- HOCUS_AUS %>% 
  select(DFAQCUCurrentFreq, CannabisUseYearsTotal, CannabisAgeFirstUse, CannabisFirstRegularUse, CannabisFirstDailyUse, CannabisFreqBefore16, PDQ1:PDQ20) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  mutate(across(7:26,
        .fns = ~recode(.,'1' = 0,'2' = 1,'3' = 2,'4' = 3,'5' = 4))) %>% 
  dplyr::filter(!if_all(c(PDQ1:PDQ20), is.na)) %>% 
  rowwise() %>% 
  mutate(total = sum(c_across(7:26), na.rm = TRUE)) %>% 
  dplyr::filter(!if_all(c(PDQ1:PDQ20), is.na))

cor.test(frequency_and_PDQ$DFAQCUCurrentFreq,frequency_and_PDQ$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_and_PDQ$DFAQCUCurrentFreq and frequency_and_PDQ$total
## t = -0.27374, df = 71, p-value = 0.7851
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2605899  0.1990852
## sample estimates:
##         cor 
## -0.03246928
cor.test(frequency_and_PDQ$CannabisUseYearsTotal,frequency_and_PDQ$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_and_PDQ$CannabisUseYearsTotal and frequency_and_PDQ$total
## t = 0.36694, df = 70, p-value = 0.7148
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1897789  0.2727163
## sample estimates:
##        cor 
## 0.04381602
cor.test(frequency_and_PDQ$CannabisAgeFirstUse,frequency_and_PDQ$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_and_PDQ$CannabisAgeFirstUse and frequency_and_PDQ$total
## t = -0.96467, df = 70, p-value = 0.338
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3372601  0.1203204
## sample estimates:
##        cor 
## -0.1145409
cor.test(frequency_and_PDQ$CannabisFirstRegularUse,frequency_and_PDQ$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_and_PDQ$CannabisFirstRegularUse and frequency_and_PDQ$total
## t = -0.83577, df = 62, p-value = 0.4065
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3424741  0.1439949
## sample estimates:
##        cor 
## -0.1055504
cor.test(frequency_and_PDQ$CannabisFirstDailyUse,frequency_and_PDQ$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_and_PDQ$CannabisFirstDailyUse and frequency_and_PDQ$total
## t = 0.44144, df = 56, p-value = 0.6606
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2024885  0.3124308
## sample estimates:
##        cor 
## 0.05888717
cor.test(frequency_and_PDQ$CannabisFreqBefore16,frequency_and_PDQ$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_and_PDQ$CannabisFreqBefore16 and frequency_and_PDQ$total
## t = -0.39379, df = 70, p-value = 0.6949
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2756811  0.1866867
## sample estimates:
##         cor 
## -0.04701526
frequency_and_AQoL <- HOCUS_AUS %>% 
  select(DFAQCUCurrentFreq, CannabisUseYearsTotal, CannabisAgeFirstUse, CannabisFirstRegularUse, CannabisFirstDailyUse, CannabisFreqBefore16, AQoL6D1:AQoL6D20) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  dplyr::filter(!if_all(c(AQoL6D1:AQoL6D20), is.na)) %>% 
  rowwise() %>% 
  mutate(total = sum(c_across(7:26), na.rm = TRUE))

cor.test(frequency_and_AQoL$DFAQCUCurrentFreq,frequency_and_AQoL$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_and_AQoL$DFAQCUCurrentFreq and frequency_and_AQoL$total
## t = -0.2128, df = 70, p-value = 0.8321
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2555893  0.2074644
## sample estimates:
##         cor 
## -0.02542625
cor.test(frequency_and_AQoL$CannabisUseYearsTotal,frequency_and_AQoL$total, use= "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_and_AQoL$CannabisUseYearsTotal and frequency_and_AQoL$total
## t = -0.9461, df = 69, p-value = 0.3474
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3375572  0.1233960
## sample estimates:
##        cor 
## -0.1131656
cor.test(frequency_and_AQoL$CannabisAgeFirstUse,frequency_and_AQoL$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_and_AQoL$CannabisAgeFirstUse and frequency_and_AQoL$total
## t = 0.45887, df = 69, p-value = 0.6478
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1804689  0.2847961
## sample estimates:
##        cor 
## 0.05515722
cor.test(frequency_and_AQoL$CannabisFirstRegularUse,frequency_and_AQoL$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_and_AQoL$CannabisFirstRegularUse and frequency_and_AQoL$total
## t = 0.60953, df = 61, p-value = 0.5444
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1733000  0.3194134
## sample estimates:
##        cor 
## 0.07780587
cor.test(frequency_and_AQoL$CannabisFirstDailyUse,frequency_and_AQoL$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_and_AQoL$CannabisFirstDailyUse and frequency_and_AQoL$total
## t = 1.3555, df = 55, p-value = 0.1808
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.08474626  0.42065256
## sample estimates:
##       cor 
## 0.1797916
cor.test(frequency_and_AQoL$CannabisFreqBefore16,frequency_and_AQoL$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_and_AQoL$CannabisFreqBefore16 and frequency_and_AQoL$total
## t = 0.84418, df = 69, p-value = 0.4015
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1353913  0.3267034
## sample estimates:
##       cor 
## 0.1011061
frequency_ISI <- HOCUS_AUS %>% 
  select(DFAQCUCurrentFreq, CannabisUseYearsTotal, CannabisAgeFirstUse, CannabisRegularUse, CannabisFirstRegularUse, CannabisDailyUse, CannabisFirstDailyUse, CannabisFreqBefore16, ISI_1:ISI_4,ISI_5:ISI_7) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  dplyr::filter(!if_all(c(ISI_1:ISI_4,ISI_5:ISI_7), is.na)) %>% 
  rowwise() %>% 
  mutate(across(9:15,
        .fns = ~recode(.,'1' = 0,'2' = 1,'3' = 2,'4' = 3,'5' = 4))) %>% 
  rowwise() %>% 
  mutate(total = sum(c_across(9:15), na.rm = TRUE))

cor.test(frequency_ISI$DFAQCUCurrentFreq,frequency_ISI$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_ISI$DFAQCUCurrentFreq and frequency_ISI$total
## t = -1.3283, df = 70, p-value = 0.1884
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.37485333  0.07768899
## sample estimates:
##        cor 
## -0.1568017
cor.test(frequency_ISI$CannabisUseYearsTotal,frequency_ISI$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_ISI$CannabisUseYearsTotal and frequency_ISI$total
## t = 1.2867, df = 69, p-value = 0.2025
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.08319471  0.37306043
## sample estimates:
##       cor 
## 0.1530803
cor.test(frequency_ISI$CannabisAgeFirstUse,frequency_ISI$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_ISI$CannabisAgeFirstUse and frequency_ISI$total
## t = 1.4893, df = 69, p-value = 0.141
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.05926354  0.39357899
## sample estimates:
##       cor 
## 0.1764803
cor.test(frequency_ISI$CannabisFirstRegularUse,frequency_ISI$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_ISI$CannabisFirstRegularUse and frequency_ISI$total
## t = 0.55017, df = 61, p-value = 0.5842
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1806424  0.3125904
## sample estimates:
##        cor 
## 0.07026756
cor.test(frequency_ISI$CannabisFirstDailyUse,frequency_ISI$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_ISI$CannabisFirstDailyUse and frequency_ISI$total
## t = 0.28736, df = 55, p-value = 0.7749
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2241099  0.2962972
## sample estimates:
##        cor 
## 0.03871881
cor.test(frequency_ISI$CannabisFreqBefore16,frequency_ISI$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_ISI$CannabisFreqBefore16 and frequency_ISI$total
## t = 0.53234, df = 69, p-value = 0.5962
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1719136  0.2928888
## sample estimates:
##        cor 
## 0.06395522
frequency_pain <- HOCUS_AUS %>%  
  select(DFAQCUCurrentFreq, CannabisUseYearsTotal, CannabisAgeFirstUse, CannabisRegularUse, CannabisFirstRegularUse, CannabisDailyUse, CannabisFirstDailyUse, CannabisFreqBefore16, BPI_Q3:BPI_Q6) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  mutate(across(9:12,
        .fns = ~recode(.,'1' = 0,'2' = 1,
                       '3' = 2,'4' = 3, 
                       '5' = 4,'6' = 5, 
                       '7' = 6, '8' = 7, 
                       '9' = 8, '10' = 9, 
                       '11' = 10))) %>%
  rowwise() %>% 
  mutate(total = mean(c_across(9:12), na.rm=TRUE)) %>% 
  dplyr::filter(!if_all(c(BPI_Q3:BPI_Q6), is.na))

cor.test(frequency_pain$DFAQCUCurrentFreq,frequency_pain$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_pain$DFAQCUCurrentFreq and frequency_pain$total
## t = 0.3146, df = 68, p-value = 0.754
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1986306  0.2706721
## sample estimates:
##        cor 
## 0.03812276
cor.test(frequency_pain$CannabisUseYearsTotal,frequency_pain$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_pain$CannabisUseYearsTotal and frequency_pain$total
## t = -0.7918, df = 67, p-value = 0.4313
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3255465  0.1436702
## sample estimates:
##         cor 
## -0.09628454
cor.test(frequency_pain$CannabisAgeFirstUse,frequency_pain$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_pain$CannabisAgeFirstUse and frequency_pain$total
## t = -0.15984, df = 67, p-value = 0.8735
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2550262  0.2181649
## sample estimates:
##         cor 
## -0.01952392
cor.test(frequency_pain$CannabisFirstRegularUse,frequency_pain$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_pain$CannabisFirstRegularUse and frequency_pain$total
## t = 0.62861, df = 59, p-value = 0.532
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1738258  0.3266762
## sample estimates:
##        cor 
## 0.08156532
cor.test(frequency_pain$CannabisFirstDailyUse,frequency_pain$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_pain$CannabisFirstDailyUse and frequency_pain$total
## t = 0.79086, df = 53, p-value = 0.4326
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1619397  0.3628970
## sample estimates:
##       cor 
## 0.1079973
cor.test(frequency_pain$CannabisFreqBefore16,frequency_pain$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_pain$CannabisFreqBefore16 and frequency_pain$total
## t = -0.4504, df = 67, p-value = 0.6539
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2878787  0.1841336
## sample estimates:
##         cor 
## -0.05494146
frequency_interference <- HOCUS_AUS %>% 
  select(DFAQCUCurrentFreq, CannabisUseYearsTotal, CannabisAgeFirstUse, CannabisRegularUse, CannabisFirstRegularUse, CannabisDailyUse, CannabisFirstDailyUse, CannabisFreqBefore16, BPI_Q9a:BPI_Q9g) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  mutate(across(9:15,
        .fns = ~recode(.,'1' = 0,'2' = 1,
                       '3' = 2,'4' = 3, 
                       '5' = 4,'6' = 5, 
                       '7' = 6, '8' = 7, 
                       '9' = 8, '10' = 9, 
                       '11' = 10))) %>% 
  rowwise() %>% 
  mutate(total = mean(c_across(9:15), na.rm=TRUE)) %>% 
  dplyr::filter(!if_all(c(BPI_Q9a:BPI_Q9g), is.na))

cor.test(frequency_interference$DFAQCUCurrentFreq,frequency_interference$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_interference$DFAQCUCurrentFreq and frequency_interference$total
## t = -0.1059, df = 68, p-value = 0.916
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2470696  0.2228053
## sample estimates:
##         cor 
## -0.01284103
cor.test(frequency_interference$CannabisUseYearsTotal,frequency_interference$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_interference$CannabisUseYearsTotal and frequency_interference$total
## t = -1.2091, df = 67, p-value = 0.2309
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.37001621  0.09379141
## sample estimates:
##        cor 
## -0.1461332
cor.test(frequency_interference$CannabisAgeFirstUse,frequency_interference$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_interference$CannabisAgeFirstUse and frequency_interference$total
## t = -0.0064389, df = 67, p-value = 0.9949
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2374231  0.2359380
## sample estimates:
##           cor 
## -0.0007866409
cor.test(frequency_interference$CannabisFirstRegularUse,frequency_interference$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_interference$CannabisFirstRegularUse and frequency_interference$total
## t = 1.2565, df = 59, p-value = 0.2139
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.09421267  0.39711478
## sample estimates:
##       cor 
## 0.1614381
cor.test(frequency_interference$CannabisFirstDailyUse,frequency_interference$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_interference$CannabisFirstDailyUse and frequency_interference$total
## t = 1.1241, df = 53, p-value = 0.266
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1174523  0.4016370
## sample estimates:
##       cor 
## 0.1525998
cor.test(frequency_interference$CannabisFreqBefore16,frequency_interference$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  frequency_interference$CannabisFreqBefore16 and frequency_interference$total
## t = 0.25471, df = 67, p-value = 0.7997
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2071027  0.2658264
## sample estimates:
##        cor 
## 0.03110261
total_BPI <- HOCUS_AUS %>%  
  select(DFAQCUCurrentFreq, CannabisUseYearsTotal, CannabisAgeFirstUse, CannabisRegularUse, CannabisFirstRegularUse, CannabisDailyUse, CannabisFirstDailyUse, CannabisFreqBefore16, BPI_Q3:BPI_Q6,BPI_Q9a:BPI_Q9g) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  mutate(across(9:19,
        .fns = ~recode(.,'1' = 0,'2' = 1,
                       '3' = 2,'4' = 3, 
                       '5' = 4,'6' = 5, 
                       '7' = 6, '8' = 7, 
                       '9' = 8, '10' = 9, 
                       '11' = 10))) %>%
  rowwise() %>% 
  mutate(total = mean(c_across(9:19), na.rm=TRUE)) %>% 
  dplyr::filter(!if_all(c(BPI_Q3:BPI_Q6,BPI_Q9a:BPI_Q9g), is.na))

cor.test(total_BPI$DFAQCUCurrentFreq,total_BPI$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  total_BPI$DFAQCUCurrentFreq and total_BPI$total
## t = 0.037915, df = 68, p-value = 0.9699
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2306254  0.2393133
## sample estimates:
##         cor 
## 0.004597812
cor.test(total_BPI$CannabisUseYearsTotal,total_BPI$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  total_BPI$CannabisUseYearsTotal and total_BPI$total
## t = -1.1205, df = 67, p-value = 0.2665
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3607306  0.1044024
## sample estimates:
##        cor 
## -0.1356297
cor.test(total_BPI$CannabisAgeFirstUse,total_BPI$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  total_BPI$CannabisAgeFirstUse and total_BPI$total
## t = -0.056006, df = 67, p-value = 0.9555
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2431290  0.2302115
## sample estimates:
##          cor 
## -0.006842017
cor.test(total_BPI$CannabisFirstRegularUse,total_BPI$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  total_BPI$CannabisFirstRegularUse and total_BPI$total
## t = 1.0952, df = 59, p-value = 0.2779
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1147447  0.3794864
## sample estimates:
##       cor 
## 0.1411547
cor.test(total_BPI$CannabisFirstDailyUse,total_BPI$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  total_BPI$CannabisFirstDailyUse and total_BPI$total
## t = 1.0572, df = 53, p-value = 0.2952
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1264029  0.3939892
## sample estimates:
##       cor 
## 0.1437136
cor.test(total_BPI$CannabisFreqBefore16,total_BPI$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  total_BPI$CannabisFreqBefore16 and total_BPI$total
## t = 0.033263, df = 67, p-value = 0.9736
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.232841  0.240513
## sample estimates:
##         cor 
## 0.004063671
amount_DASS <- HOCUS_AUS %>% 
  select(CannabisGramsPerDay, CannabisGramsPerWeek,DASS21_1:DASS21_21) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  mutate(across(3:23,
        .fns = ~recode(.,'1' = 0,'2' = 1,'3' = 2,'4' = 3))) %>% 
  rowwise() %>% 
  mutate(total = sum(c_across(3:23), na.rm = TRUE)) %>% 
  dplyr::filter(!if_all(c(DASS21_1:DASS21_21), is.na)) %>% 
  dplyr::filter(!if_all(c(CannabisGramsPerDay:CannabisGramsPerWeek), is.na))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(everything(), as.numeric)`.
## Caused by warning:
## ! NAs introduced by coercion
cor.test(amount_DASS$CannabisGramsPerDay,amount_DASS$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  amount_DASS$CannabisGramsPerDay and amount_DASS$total
## t = 0.40622, df = 57, p-value = 0.6861
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2051784  0.3056045
## sample estimates:
##        cor 
## 0.05372687
cor.test(amount_DASS$CannabisGramsPerWeek,amount_DASS$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  amount_DASS$CannabisGramsPerWeek and amount_DASS$total
## t = -0.28154, df = 56, p-value = 0.7793
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2930461  0.2228640
## sample estimates:
##         cor 
## -0.03759603
amount_PDQ <- HOCUS_AUS %>% 
  select(Age, LevelOfEducation, CannabisGramsPerDay, CannabisGramsPerWeek, GenderMale, GenderFemale, PDQ1:PDQ20, DASS21_1:DASS21_21) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  mutate(Gender = case_when(GenderMale == 1 ~ 1,
                            GenderFemale == 1 ~ 2)) %>% 
  mutate(across(5:24,
        .fns = ~recode(.,'1' = 0,'2' = 1,'3' = 2,'4' = 3,'5' = 4))) %>% 
  dplyr::filter(!if_all(c(PDQ1:PDQ20), is.na)) %>% 
  rowwise() %>% 
  mutate(PDQ = sum(c_across(5:24), na.rm = TRUE)) %>% 
  dplyr::filter(!if_all(c(PDQ1:PDQ20), is.na)) %>% 
   mutate(across(25:45,
        .fns = ~recode(.,'1' = 0,'2' = 1,'3' = 2,'4' = 3))) %>% 
  rowwise() %>% 
  mutate(DASS = sum(c_across(25:45), na.rm = TRUE)) %>% 
  dplyr::filter(!if_all(c(DASS21_1:DASS21_21), is.na)) %>%
  dplyr::filter(!if_all(c(CannabisGramsPerDay:CannabisGramsPerWeek), is.na))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(everything(), as.numeric)`.
## Caused by warning:
## ! NAs introduced by coercion
amount_PDQ <- amount_PDQ[,-c(5:6)]

subdomains <- amount_PDQ %>% 
  rowwise() %>% 
  mutate(attention = sum(PDQ1, PDQ5, PDQ9, PDQ13, PDQ17)) %>% 
  mutate(r_memory = sum(PDQ2, PDQ6, PDQ10, PDQ14, PDQ18)) %>% 
  mutate(p_memory = sum(PDQ3, PDQ7, PDQ11, PDQ15, PDQ19)) %>% 
  mutate(planning = sum(PDQ4, PDQ8, PDQ12, PDQ16, PDQ20))

cor.test(subdomains$CannabisGramsPerDay,subdomains$attention, method = "pearson") 
## 
##  Pearson's product-moment correlation
## 
## data:  subdomains$CannabisGramsPerDay and subdomains$attention
## t = 3.374, df = 56, p-value = 0.00135
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.1708718 0.6050830
## sample estimates:
##       cor 
## 0.4110267
cor.test(subdomains$CannabisGramsPerDay,subdomains$r_memory, method = "pearson") 
## 
##  Pearson's product-moment correlation
## 
## data:  subdomains$CannabisGramsPerDay and subdomains$r_memory
## t = 0.9701, df = 56, p-value = 0.3362
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1341921  0.3744223
## sample estimates:
##       cor 
## 0.1285597
cor.test(subdomains$CannabisGramsPerDay,subdomains$p_memory, method = "pearson") 
## 
##  Pearson's product-moment correlation
## 
## data:  subdomains$CannabisGramsPerDay and subdomains$p_memory
## t = 2.1399, df = 56, p-value = 0.03674
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  0.01790831 0.49787229
## sample estimates:
##       cor 
## 0.2749324
cor.test(subdomains$CannabisGramsPerDay,subdomains$planning, method = "pearson") 
## 
##  Pearson's product-moment correlation
## 
## data:  subdomains$CannabisGramsPerDay and subdomains$planning
## t = 0.41501, df = 55, p-value = 0.6798
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2077196  0.3118991
## sample estimates:
##        cor 
## 0.05587218
cor.test(amount_PDQ$Age,amount_PDQ$PDQ, method = "pearson") 
## 
##  Pearson's product-moment correlation
## 
## data:  amount_PDQ$Age and amount_PDQ$PDQ
## t = -0.23373, df = 57, p-value = 0.816
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2847693  0.2269370
## sample estimates:
##         cor 
## -0.03094351
cor.test(amount_PDQ$LevelOfEducation,amount_PDQ$PDQ, method = "pearson") 
## 
##  Pearson's product-moment correlation
## 
## data:  amount_PDQ$LevelOfEducation and amount_PDQ$PDQ
## t = -1.6192, df = 57, p-value = 0.1109
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.44203973  0.04902003
## sample estimates:
##        cor 
## -0.2096946
with(amount_PDQ, shapiro.test(PDQ[Gender == "1"])) #p=.3
## 
##  Shapiro-Wilk normality test
## 
## data:  PDQ[Gender == "1"]
## W = 0.96554, p-value = 0.3328
with(amount_PDQ, shapiro.test(PDQ[Gender == "2"])) #p=.1
## 
##  Shapiro-Wilk normality test
## 
## data:  PDQ[Gender == "2"]
## W = 0.93236, p-value = 0.1101
res.ftest <- var.test(PDQ ~ Gender, data = amount_PDQ)
res.ftest #p=.17
## 
##  F test to compare two variances
## 
## data:  PDQ by Gender
## F = 1.7484, num df = 34, denom df = 23, p-value = 0.164
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.7912144 3.6506008
## sample estimates:
## ratio of variances 
##            1.74844
t_test <- t.test(PDQ ~ Gender, data = amount_PDQ)
t_test
## 
##  Welch Two Sample t-test
## 
## data:  PDQ by Gender
## t = 1.308, df = 56.385, p-value = 0.1962
## alternative hypothesis: true difference in means between group 1 and group 2 is not equal to 0
## 95 percent confidence interval:
##  -1.727373  8.229754
## sample estimates:
## mean in group 1 mean in group 2 
##        18.54286        15.29167
amount_AQoL <- HOCUS_AUS %>% 
  select(CannabisGramsPerDay, CannabisGramsPerWeek, AQoL6D1:AQoL6D20) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  dplyr::filter(!if_all(c(AQoL6D1:AQoL6D20), is.na)) %>% 
  rowwise() %>% 
  mutate(total = sum(c_across(3:22), na.rm = TRUE))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(everything(), as.numeric)`.
## Caused by warning:
## ! NAs introduced by coercion
cor.test(amount_AQoL$CannabisGramsPerDay,amount_AQoL$total, method = "pearson") 
## 
##  Pearson's product-moment correlation
## 
## data:  amount_AQoL$CannabisGramsPerDay and amount_AQoL$total
## t = 0.53387, df = 57, p-value = 0.5955
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1889586  0.3208240
## sample estimates:
##        cor 
## 0.07053686
cor.test(amount_AQoL$CannabisGramsPerWeek,amount_AQoL$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  amount_AQoL$CannabisGramsPerWeek and amount_AQoL$total
## t = -0.24813, df = 56, p-value = 0.8049
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2889615  0.2271005
## sample estimates:
##         cor 
## -0.03313919
amount_ISI <- HOCUS_AUS %>% 
  select(CannabisGramsPerDay, CannabisGramsPerWeek, ISI_1:ISI_4,ISI_5:ISI_7) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  rowwise() %>% 
  mutate(across(3:9,
        .fns = ~recode(.,'1' = 0,'2' = 1,'3' = 2,'4' = 3,'5' = 4))) %>% 
  rowwise() %>% 
  mutate(total = sum(c_across(3:9), na.rm = TRUE)) %>% 
  dplyr::filter(!if_all(c(ISI_1:ISI_4,ISI_5:ISI_7), is.na)) 
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(everything(), as.numeric)`.
## Caused by warning:
## ! NAs introduced by coercion
cor.test(amount_ISI$CannabisGramsPerDay,amount_ISI$total, method = "pearson") 
## 
##  Pearson's product-moment correlation
## 
## data:  amount_ISI$CannabisGramsPerDay and amount_ISI$total
## t = -0.50062, df = 57, p-value = 0.6186
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3168768  0.1931921
## sample estimates:
##        cor 
## -0.0661635
cor.test(amount_ISI$CannabisGramsPerWeek,amount_ISI$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  amount_ISI$CannabisGramsPerWeek and amount_ISI$total
## t = -0.73768, df = 56, p-value = 0.4638
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3475894  0.1643593
## sample estimates:
##         cor 
## -0.09810119
amount_pain <- HOCUS_AUS %>%  
  select(CannabisGramsPerDay, CannabisGramsPerWeek, BPI_Q3:BPI_Q6) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  mutate(across(3:6,
        .fns = ~recode(.,'1' = 0,'2' = 1,
                       '3' = 2,'4' = 3, 
                       '5' = 4,'6' = 5, 
                       '7' = 6, '8' = 7, 
                       '9' = 8, '10' = 9, 
                       '11' = 10))) %>%
  rowwise() %>% 
  mutate(total = mean(c_across(3:6), na.rm=TRUE)) %>% 
  dplyr::filter(!if_all(c(BPI_Q3:BPI_Q6), is.na))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(everything(), as.numeric)`.
## Caused by warning:
## ! NAs introduced by coercion
cor.test(amount_pain$CannabisGramsPerDay,amount_pain$total, method = "pearson") 
## 
##  Pearson's product-moment correlation
## 
## data:  amount_pain$CannabisGramsPerDay and amount_pain$total
## t = -0.091659, df = 56, p-value = 0.9273
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2696901  0.2468291
## sample estimates:
##         cor 
## -0.01224753
cor.test(amount_pain$CannabisGramsPerWeek,amount_pain$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  amount_pain$CannabisGramsPerWeek and amount_pain$total
## t = 0.54307, df = 55, p-value = 0.5893
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1911737  0.3273699
## sample estimates:
##        cor 
## 0.07303199
amount_interference <- HOCUS_AUS %>% 
  select(CannabisGramsPerDay, CannabisGramsPerWeek, BPI_Q9a:BPI_Q9g) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  mutate(across(3:9,
        .fns = ~recode(.,'1' = 0,'2' = 1,
                       '3' = 2,'4' = 3, 
                       '5' = 4,'6' = 5, 
                       '7' = 6, '8' = 7, 
                       '9' = 8, '10' = 9, 
                       '11' = 10))) %>% 
  rowwise() %>% 
  mutate(total = mean(c_across(3:9), na.rm=TRUE)) %>% 
  dplyr::filter(!if_all(c(BPI_Q9a:BPI_Q9g), is.na))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(everything(), as.numeric)`.
## Caused by warning:
## ! NAs introduced by coercion
cor.test(amount_interference$CannabisGramsPerDay,amount_interference$total, method = "pearson") 
## 
##  Pearson's product-moment correlation
## 
## data:  amount_interference$CannabisGramsPerDay and amount_interference$total
## t = -0.36599, df = 56, p-value = 0.7157
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.3033181  0.2121227
## sample estimates:
##         cor 
## -0.04884953
cor.test(amount_interference$CannabisGramsPerWeek,amount_interference$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  amount_interference$CannabisGramsPerWeek and amount_interference$total
## t = 0.44919, df = 55, p-value = 0.6551
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2033120  0.3160474
## sample estimates:
##        cor 
## 0.06045858
amount_BPI <- HOCUS_AUS %>%  
  select(CannabisGramsPerDay, CannabisGramsPerWeek, BPI_Q3:BPI_Q6,BPI_Q9a:BPI_Q9g) %>% 
  mutate(across(everything(), as.numeric)) %>% 
  mutate(across(3:13,
        .fns = ~recode(.,'1' = 0,'2' = 1,
                       '3' = 2,'4' = 3, 
                       '5' = 4,'6' = 5, 
                       '7' = 6, '8' = 7, 
                       '9' = 8, '10' = 9, 
                       '11' = 10))) %>%
  rowwise() %>% 
  mutate(total = mean(c_across(3:13), na.rm=TRUE)) %>% 
  dplyr::filter(!if_all(c(BPI_Q3:BPI_Q6,BPI_Q9a:BPI_Q9g), is.na))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(everything(), as.numeric)`.
## Caused by warning:
## ! NAs introduced by coercion
cor.test(amount_BPI$CannabisGramsPerDay,amount_BPI$total, method = "pearson") 
## 
##  Pearson's product-moment correlation
## 
## data:  amount_BPI$CannabisGramsPerDay and amount_BPI$total
## t = -0.28556, df = 56, p-value = 0.7763
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.2935366  0.2223538
## sample estimates:
##         cor 
## -0.03813201
cor.test(amount_BPI$CannabisGramsPerWeek,amount_BPI$total, method = "pearson")
## 
##  Pearson's product-moment correlation
## 
## data:  amount_BPI$CannabisGramsPerWeek and amount_BPI$total
## t = 0.5008, df = 55, p-value = 0.6185
## alternative hypothesis: true correlation is not equal to 0
## 95 percent confidence interval:
##  -0.1966451  0.3222845
## sample estimates:
##        cor 
## 0.06737482

#Regression Analyses

model_PDQ <- amount_PDQ %>% 
  select(CannabisGramsPerDay, DASS, PDQ)

#Checking assumptions
#Linearity
ggplot(amount_PDQ, aes(CannabisGramsPerDay, PDQ)) +
  geom_point() +
  stat_smooth(method = lm)
## `geom_smooth()` using formula = 'y ~ x'

ggplot(amount_PDQ, aes(DASS, PDQ)) +
  geom_point() +
  stat_smooth(method = lm)
## `geom_smooth()` using formula = 'y ~ x'

#Independence
model <- lm(PDQ ~ CannabisGramsPerDay, data = amount_PDQ)
dw_resut <- dwtest(model)
print(dw_resut) #p-value = .21
## 
##  Durbin-Watson test
## 
## data:  model
## DW = 1.7917, p-value = 0.2086
## alternative hypothesis: true autocorrelation is greater than 0
model <- lm(PDQ ~ DASS, data = amount_PDQ)
dw_resut <- dwtest(model)
print(dw_resut) #p-value = .70
## 
##  Durbin-Watson test
## 
## data:  model
## DW = 2.1357, p-value = 0.6976
## alternative hypothesis: true autocorrelation is greater than 0
#Normality of residuals
plot(lm(PDQ ~ CannabisGramsPerDay + DASS, data = amount_PDQ))

model <- lm(PDQ ~ CannabisGramsPerDay, data = amount_PDQ)
model
## 
## Call:
## lm(formula = PDQ ~ CannabisGramsPerDay, data = amount_PDQ)
## 
## Coefficients:
##         (Intercept)  CannabisGramsPerDay  
##              15.424                1.041
summary(model)
## 
## Call:
## lm(formula = PDQ ~ CannabisGramsPerDay, data = amount_PDQ)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -16.466  -6.075  -1.466   4.815  28.055 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           15.425      1.416  10.890  1.5e-15 ***
## CannabisGramsPerDay    1.041      0.404   2.578   0.0126 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 9.472 on 57 degrees of freedom
## Multiple R-squared:  0.1044, Adjusted R-squared:  0.08869 
## F-statistic: 6.645 on 1 and 57 DF,  p-value: 0.01255
model <- lm(scale(PDQ) ~ scale(CannabisGramsPerDay), data=model_PDQ)
options(scipen=999)
summary(model)
## 
## Call:
## lm(formula = scale(PDQ) ~ scale(CannabisGramsPerDay), data = model_PDQ)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6595 -0.6123 -0.1477  0.4853  2.8275 
## 
## Coefficients:
##                                           Estimate              Std. Error
## (Intercept)                -0.00000000000000001256  0.12428148613004143253
## scale(CannabisGramsPerDay)  0.32311734140732306653  0.12534829956941972995
##                            t value Pr(>|t|)  
## (Intercept)                  0.000   1.0000  
## scale(CannabisGramsPerDay)   2.578   0.0126 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9546 on 57 degrees of freedom
## Multiple R-squared:  0.1044, Adjusted R-squared:  0.08869 
## F-statistic: 6.645 on 1 and 57 DF,  p-value: 0.01255
#Empty model
intercept_model <- lm(PDQ ~ 1, data = model_PDQ)
step(intercept_model)
## Start:  AIC=271.78
## PDQ ~ 1
## 
## Call:
## lm(formula = PDQ ~ 1, data = model_PDQ)
## 
## Coefficients:
## (Intercept)  
##       17.22
#Initialize model
forward_model <- lm(PDQ ~ ., data = model_PDQ)
# Forward step-wise regression
forward_model <- step(forward_model, direction = "forward", scope = formula(~ .))
## Start:  AIC=239.96
## PDQ ~ CannabisGramsPerDay + DASS
summary(forward_model)
## 
## Call:
## lm(formula = PDQ ~ CannabisGramsPerDay + DASS, data = model_PDQ)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -18.1601  -3.6017  -0.5123   2.7302  26.6906 
## 
## Coefficients:
##                     Estimate Std. Error t value   Pr(>|t|)    
## (Intercept)           8.1756     1.6433   4.975 0.00000655 ***
## CannabisGramsPerDay   0.9344     0.3184   2.934    0.00484 ** 
## DASS                  0.8167     0.1361   6.003 0.00000015 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.454 on 56 degrees of freedom
## Multiple R-squared:  0.4551, Adjusted R-squared:  0.4356 
## F-statistic: 23.38 on 2 and 56 DF,  p-value: 0.00000004147
#Obtaining coefficients
forward_model$coefficients
##         (Intercept) CannabisGramsPerDay                DASS 
##           8.1755984           0.9343683           0.8166997
#Obtaining standarised beta vales
forward_model <- lm(scale(PDQ) ~ scale(CannabisGramsPerDay) + scale(DASS), data=model_PDQ)
options(scipen=999)
summary(forward_model)
## 
## Call:
## lm(formula = scale(PDQ) ~ scale(CannabisGramsPerDay) + scale(DASS), 
##     data = model_PDQ)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -1.83024 -0.36300 -0.05163  0.27516  2.68998 
## 
## Coefficients:
##                                           Estimate              Std. Error
## (Intercept)                -0.00000000000000001424  0.09780642012210835623
## scale(CannabisGramsPerDay)  0.28992859812021815058  0.09880078849108286931
## scale(DASS)                 0.59309280906196282235  0.09880078849108286931
##                            t value   Pr(>|t|)    
## (Intercept)                  0.000    1.00000    
## scale(CannabisGramsPerDay)   2.934    0.00484 ** 
## scale(DASS)                  6.003 0.00000015 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7513 on 56 degrees of freedom
## Multiple R-squared:  0.4551, Adjusted R-squared:  0.4356 
## F-statistic: 23.38 on 2 and 56 DF,  p-value: 0.00000004147