0. Install packages and load dataset.

## Run libraries
packages <- c(
  "haven",
  "tidyverse",
  "psych",
  "ggplot2"
)
installed <- packages %in% rownames(installed.packages())
if (any(!installed)) {
  install.packages(packages[!installed])
}
lapply(packages, library, character.only = TRUE)

## Import dataset
url <- "/Users/jasonhoskin/Downloads/ICSUS_2025_State_18 to 25_CLN.sav"
df1 <- read_sav(url)

## Remove unnecessary variables
rm(installed, packages, url)

1. Run descriptive statistics, remove NAs, simplify dataset.

## Sample size and number of variables
dim(df1)
## [1] 5402  257
## Calculate dem frequencies
describe(df1[,3:22])
##              vars    n mean   sd median trimmed  mad min max range   skew
## age             1 5402 4.39 1.78      4    4.23 1.48   2   9     7   0.71
## ethnic          2 5089 1.87 0.34      2    1.96 0.00   1   2     1  -2.18
## race            3 5354 1.67 1.52      1    1.25 0.00   1   7     6   2.45
## gender          4 5375 1.73 0.57      2    1.71 0.00   1   3     2   0.06
## gender1         5   59 1.00 0.00      1    1.00 0.00   1   1     0    NaN
## gender2         6 1804 1.00 0.00      1    1.00 0.00   1   1     0    NaN
## gender3         7 3318 1.00 0.00      1    1.00 0.00   1   1     0    NaN
## gender4         8   51 1.00 0.00      1    1.00 0.00   1   1     0    NaN
## gender5         9   22 1.00 0.00      1    1.00 0.00   1   1     0    NaN
## gender6        10  191 1.00 0.00      1    1.00 0.00   1   1     0    NaN
## gender7        11   28 1.00 0.00      1    1.00 0.00   1   1     0    NaN
## gender8        12   23 1.00 0.00      1    1.00 0.00   1   1     0    NaN
## gender_text*   13 5402 1.03 0.59      1    1.00 0.00   1  18    17  21.25
## stustat        14 5384 1.04 0.20      1    1.00 0.00   1   2     1   4.69
## schyear        15 5396 2.66 1.56      2    2.45 1.48   1   8     7   0.81
## residenc       16 5399 3.54 1.57      4    3.59 1.48   1   7     6  -0.08
## greekmem       17 5394 1.87 0.34      2    1.96 0.00   1   2     1  -2.14
## instate        18 5398 1.37 0.60      1    1.26 0.00   1   3     2   1.40
## athlete        19 5392 1.87 0.33      2    1.97 0.00   1   2     1  -2.23
## veteran        20 5375 1.99 0.08      2    2.00 0.00   1   2     1 -12.27
##              kurtosis   se
## age             -0.02 0.02
## ethnic           2.74 0.00
## race             4.96 0.02
## gender          -0.48 0.01
## gender1           NaN 0.00
## gender2           NaN 0.00
## gender3           NaN 0.00
## gender4           NaN 0.00
## gender5           NaN 0.00
## gender6           NaN 0.00
## gender7           NaN 0.00
## gender8           NaN 0.00
## gender_text*   488.28 0.01
## stustat         19.96 0.00
## schyear         -0.04 0.02
## residenc        -1.57 0.02
## greekmem         2.57 0.00
## instate          0.88 0.01
## athlete          2.97 0.00
## veteran        148.52 0.00
## Assess NAs
colSums(is.na(df1))
##           year         autoid            age         ethnic           race 
##              0              0              0            313             48 
##         gender        gender1        gender2        gender3        gender4 
##             27           5343           3598           2084           5351 
##        gender5        gender6        gender7        gender8    gender_text 
##           5380           5211           5374           5379              0 
##        stustat        schyear       residenc       greekmem        instate 
##             18              6              3              8              4 
##        athlete        veteran         smokmo        cigarmo         snufmo 
##             10             27              3             16             11 
##         hookmo         ecigmo          alcmo          marmo          cocmo 
##             11             16             64             12              9 
##        hallumo          hermo         methmo          inhmo         rxstmo 
##             10             10              9              9             11 
##         rxpkmo        rxsedmo          othmo        tobvape        marvape 
##              7              6             14           3499           3501 
##        cbdvape     synmarvape       alcovape      flavovape        othvape 
##           3506           3502           3499           3503           3555 
##      smokedmar       vapedmar         atemar       drankmar      dabbedmar 
##           3411           3410           3414           3414           3411 
##     instatemar    outstatemar      friendmar      boughtmar      familymar 
##           3452           3446           3432           3449           3448 
##      onlinemar         othmar          ALmar          AKmar          AZmar 
##           3453           3463           5401           5402           5400 
##          ARmar          CAmar          COmar          CTmar          DEmar 
##           5402           5384           5382           5401           5402 
##          FLmar          GAmar          HImar          IDmar          ILmar 
##           5394           5400           5400           5402           5185 
##          INmar          IAmar          KSmar          KYmar          LAmar 
##           5292           5401           5402           5392           5402 
##          MEmar          MDmar          MAmar          MImar          MNmar 
##           5401           5401           5396           5062           5399 
##          MSmar          MOmar          MTmar          NEmar          NVmar 
##           5402           5392           5402           5402           5399 
##          NHmar          NJmar          NMmar          NYmar          NCmar 
##           5402           5399           5402           5387           5397 
##          NDmar          OHmar          OKmar          ORmar          PAmar 
##           5402           5362           5402           5400           5401 
##          RImar          SCmar          SDmar          TNmar          TXmar 
##           5400           5401           5402           5392           5400 
##          UTmar          VTmar          VAmar          WAmar          DCmar 
##           5402           5402           5400           5399           5400 
##          WVmar          WImar          WYmar        smokage       cigarage 
##           5402           5397           5402           4229           4639 
##        snufage        hookage        ecigage         alcage         marage 
##           5063           5098           3627           1616           3428 
##         cocage       halluage         herage        methage         inhage 
##           5306           5131           5395           5385           5300 
##        rxstage        rxpkage       rxsedage         othage          binge 
##           5192           5286           5304           5307           1834 
##       numdrink        drkrhal       drkgreek          drkon         drkoff 
##           1851           1885           1884           1893           1881 
##         drkbar       drksport         drkcar         drkoth          rsexp 
##           1882           1887           1885           1951           1832 
##        rsrelax       rsfriend         rsbore        rsanger       rsgetthr 
##           1836           1833           1842           1838           1838 
##        rsprblm       cqfamwry         cqhang        cqguilt       cqfamprb 
##           1838           1832           1833           1835           1834 
##         cqwork         cqhurt       cqticket       cqarrdui          cqdui 
##           1831           1834           1833           1834           1834 
##        cqfight       cqunpsex       cqmisscl       cqblkout       cqregret 
##           1835           1839           1833           1837           1835 
##       cqauthor          cqmed      skipcqsex       cqunwsex       cqaggsex 
##           1833           1836           1837           2389           2390 
##        typbeer        typmalt        typwine       typliqor       typhialc 
##           1974           1966           1972           1965           1962 
##         typoth       carryout         online        srcfrds        srcsibs 
##           2041           1945           1947           3811           3814 
##       srcadlts        srconpt       srcgreek       srcoffpt       srcund21 
##           3816           3813           3816           3812           3814 
##       srcprnts         srcoth         retail         fakeid           noid 
##           3812           3835           3804           5317           5295 
##        otherid       noretail         buybar        buyrest        buygroc 
##           5377           5320           5170           5170           5170 
##        buydisc        buyconv        buydrug       buyliqor        buybrew 
##           5170           5172           5170           5170           5170 
##         buyoth         tkonhs        tkoffhs         tkonpt        tkoffpt 
##           5184           2856           2859           2858           2864 
##        tkathle          tkoth        normnum       normperc        ocqprop 
##           2856           2868            930            947            902 
##        ocqcare       ocqvomit       ocqinter       ocqdrive         ocqneg 
##            908            904            907            906            907 
##            car       appbinge          apprx         appmar        appvape 
##            904            908            910            909            919 
##         mhdays          mhsad        mhsuici        recovry        gmpools 
##           1086           1059           1054           1063           1133 
##         gmfant        gmvideo      gmonsport        gmothsp       gmonline 
##           1139           1130           1133           1137           1133 
##      gmesports        gmhorse        gmcards         gmlott       gmcasino 
##           1134           1137           1135           1134           1138 
##       gmcharit          gmoth       gmreason       gcqsleep         gcqhyg 
##           1135           1179           1129           3616           3616 
##        gcqfrnd        gcqfmly        gcqacad       gcqmoney         gcqbad 
##           3615           3617           3619           3618           3619 
##     gcqdepress         fundcc       fundbank       fundprnt      fundothcc 
##           3628           3617           3620           3617           3618 
##     fundstloan        fundsch      fundother         gmalco          gmmar 
##           3617           3620           3626           1191           1190 
##          gmcig         gmvape           gmrx          age21     bingetotal 
##           1204           1194           1213              0            301 
##    tobvape_all    marvape_all    cbdvape_all synmarvape_all   alcovape_all 
##             54             56             61             57             54 
##  flavovape_all    othvape_all 
##             58            110
## Simplify dataset & remove NAs
df2 <- df1 |>
  select(autoid:gender | stustat:veteran | gmpools:gmoth) |>
  na.omit()

dim(df2)
## [1] 3807   25
freq_list <- lapply(df2[,2:25], table, useNA = "ifany")

freq_list
## $age
## 
##   2   3   4   5   6   7   8   9 
## 479 893 747 740 467 200 158 123 
## 
## $ethnic
## 
##    1    2 
##  458 3349 
## 
## $race
## 
##    1    2    3    4    5    6    7 
## 3008  176  322    9    6  176  110 
## 
## $gender
## 
##    1    2    3 
## 1166 2360  281 
## 
## $stustat
## 
##    1    2 
## 3656  151 
## 
## $schyear
## 
##    1    2    3    4    5    6    7    8 
## 1098  856  733  662  103  337    5   13 
## 
## $residenc
## 
##    1    2    3    4    5    6    7 
##  226 1398  188  112 1826    7   50 
## 
## $greekmem
## 
##    1    2 
##  472 3335 
## 
## $instate
## 
##    1    2    3 
## 2677  924  206 
## 
## $athlete
## 
##    1    2 
##  461 3346 
## 
## $veteran
## 
##    1    2 
##   24 3783 
## 
## $gmpools
## 
##    1    2    3    4 
## 3471  245   68   23 
## 
## $gmfant
## 
##    1    2    3    4 
## 3589  140   48   30 
## 
## $gmvideo
## 
##    1    2    3    4 
## 3376  303   95   33 
## 
## $gmonsport
## 
##    1    2    3    4 
## 3500  152   95   60 
## 
## $gmothsp
## 
##    1    2    3    4 
## 3695   53   31   28 
## 
## $gmonline
## 
##    1    2    3    4 
## 3633  114   41   19 
## 
## $gmesports
## 
##    1    2    3    4 
## 3735   34   19   19 
## 
## $gmhorse
## 
##    1    2    3    4 
## 3733   62    5    7 
## 
## $gmcards
## 
##    1    2    3    4 
## 3463  238   82   24 
## 
## $gmlott
## 
##    1    2    3    4 
## 2922  777   90   18 
## 
## $gmcasino
## 
##    1    2    3    4 
## 3504  273   24    6 
## 
## $gmcharit
## 
##    1    2    3    4 
## 3413  357   30    7 
## 
## $gmoth
## 
##    1    2    3    4 
## 3726   55   14   12