0. Install packages and load dataset.
## Run libraries
packages <- c(
"haven",
"tidyverse",
"psych",
"ggplot2"
)
installed <- packages %in% rownames(installed.packages())
if (any(!installed)) {
install.packages(packages[!installed])
}
lapply(packages, library, character.only = TRUE)
## Import dataset
url <- "/Users/jasonhoskin/Downloads/ICSUS_2025_State_18 to 25_CLN.sav"
df1 <- read_sav(url)
## Remove unnecessary variables
rm(installed, packages, url)
1. Run descriptive statistics, remove NAs, simplify dataset.
## Sample size and number of variables
dim(df1)
## [1] 5402 257
## Calculate dem frequencies
describe(df1[,3:22])
## vars n mean sd median trimmed mad min max range skew
## age 1 5402 4.39 1.78 4 4.23 1.48 2 9 7 0.71
## ethnic 2 5089 1.87 0.34 2 1.96 0.00 1 2 1 -2.18
## race 3 5354 1.67 1.52 1 1.25 0.00 1 7 6 2.45
## gender 4 5375 1.73 0.57 2 1.71 0.00 1 3 2 0.06
## gender1 5 59 1.00 0.00 1 1.00 0.00 1 1 0 NaN
## gender2 6 1804 1.00 0.00 1 1.00 0.00 1 1 0 NaN
## gender3 7 3318 1.00 0.00 1 1.00 0.00 1 1 0 NaN
## gender4 8 51 1.00 0.00 1 1.00 0.00 1 1 0 NaN
## gender5 9 22 1.00 0.00 1 1.00 0.00 1 1 0 NaN
## gender6 10 191 1.00 0.00 1 1.00 0.00 1 1 0 NaN
## gender7 11 28 1.00 0.00 1 1.00 0.00 1 1 0 NaN
## gender8 12 23 1.00 0.00 1 1.00 0.00 1 1 0 NaN
## gender_text* 13 5402 1.03 0.59 1 1.00 0.00 1 18 17 21.25
## stustat 14 5384 1.04 0.20 1 1.00 0.00 1 2 1 4.69
## schyear 15 5396 2.66 1.56 2 2.45 1.48 1 8 7 0.81
## residenc 16 5399 3.54 1.57 4 3.59 1.48 1 7 6 -0.08
## greekmem 17 5394 1.87 0.34 2 1.96 0.00 1 2 1 -2.14
## instate 18 5398 1.37 0.60 1 1.26 0.00 1 3 2 1.40
## athlete 19 5392 1.87 0.33 2 1.97 0.00 1 2 1 -2.23
## veteran 20 5375 1.99 0.08 2 2.00 0.00 1 2 1 -12.27
## kurtosis se
## age -0.02 0.02
## ethnic 2.74 0.00
## race 4.96 0.02
## gender -0.48 0.01
## gender1 NaN 0.00
## gender2 NaN 0.00
## gender3 NaN 0.00
## gender4 NaN 0.00
## gender5 NaN 0.00
## gender6 NaN 0.00
## gender7 NaN 0.00
## gender8 NaN 0.00
## gender_text* 488.28 0.01
## stustat 19.96 0.00
## schyear -0.04 0.02
## residenc -1.57 0.02
## greekmem 2.57 0.00
## instate 0.88 0.01
## athlete 2.97 0.00
## veteran 148.52 0.00
## Assess NAs
colSums(is.na(df1))
## year autoid age ethnic race
## 0 0 0 313 48
## gender gender1 gender2 gender3 gender4
## 27 5343 3598 2084 5351
## gender5 gender6 gender7 gender8 gender_text
## 5380 5211 5374 5379 0
## stustat schyear residenc greekmem instate
## 18 6 3 8 4
## athlete veteran smokmo cigarmo snufmo
## 10 27 3 16 11
## hookmo ecigmo alcmo marmo cocmo
## 11 16 64 12 9
## hallumo hermo methmo inhmo rxstmo
## 10 10 9 9 11
## rxpkmo rxsedmo othmo tobvape marvape
## 7 6 14 3499 3501
## cbdvape synmarvape alcovape flavovape othvape
## 3506 3502 3499 3503 3555
## smokedmar vapedmar atemar drankmar dabbedmar
## 3411 3410 3414 3414 3411
## instatemar outstatemar friendmar boughtmar familymar
## 3452 3446 3432 3449 3448
## onlinemar othmar ALmar AKmar AZmar
## 3453 3463 5401 5402 5400
## ARmar CAmar COmar CTmar DEmar
## 5402 5384 5382 5401 5402
## FLmar GAmar HImar IDmar ILmar
## 5394 5400 5400 5402 5185
## INmar IAmar KSmar KYmar LAmar
## 5292 5401 5402 5392 5402
## MEmar MDmar MAmar MImar MNmar
## 5401 5401 5396 5062 5399
## MSmar MOmar MTmar NEmar NVmar
## 5402 5392 5402 5402 5399
## NHmar NJmar NMmar NYmar NCmar
## 5402 5399 5402 5387 5397
## NDmar OHmar OKmar ORmar PAmar
## 5402 5362 5402 5400 5401
## RImar SCmar SDmar TNmar TXmar
## 5400 5401 5402 5392 5400
## UTmar VTmar VAmar WAmar DCmar
## 5402 5402 5400 5399 5400
## WVmar WImar WYmar smokage cigarage
## 5402 5397 5402 4229 4639
## snufage hookage ecigage alcage marage
## 5063 5098 3627 1616 3428
## cocage halluage herage methage inhage
## 5306 5131 5395 5385 5300
## rxstage rxpkage rxsedage othage binge
## 5192 5286 5304 5307 1834
## numdrink drkrhal drkgreek drkon drkoff
## 1851 1885 1884 1893 1881
## drkbar drksport drkcar drkoth rsexp
## 1882 1887 1885 1951 1832
## rsrelax rsfriend rsbore rsanger rsgetthr
## 1836 1833 1842 1838 1838
## rsprblm cqfamwry cqhang cqguilt cqfamprb
## 1838 1832 1833 1835 1834
## cqwork cqhurt cqticket cqarrdui cqdui
## 1831 1834 1833 1834 1834
## cqfight cqunpsex cqmisscl cqblkout cqregret
## 1835 1839 1833 1837 1835
## cqauthor cqmed skipcqsex cqunwsex cqaggsex
## 1833 1836 1837 2389 2390
## typbeer typmalt typwine typliqor typhialc
## 1974 1966 1972 1965 1962
## typoth carryout online srcfrds srcsibs
## 2041 1945 1947 3811 3814
## srcadlts srconpt srcgreek srcoffpt srcund21
## 3816 3813 3816 3812 3814
## srcprnts srcoth retail fakeid noid
## 3812 3835 3804 5317 5295
## otherid noretail buybar buyrest buygroc
## 5377 5320 5170 5170 5170
## buydisc buyconv buydrug buyliqor buybrew
## 5170 5172 5170 5170 5170
## buyoth tkonhs tkoffhs tkonpt tkoffpt
## 5184 2856 2859 2858 2864
## tkathle tkoth normnum normperc ocqprop
## 2856 2868 930 947 902
## ocqcare ocqvomit ocqinter ocqdrive ocqneg
## 908 904 907 906 907
## car appbinge apprx appmar appvape
## 904 908 910 909 919
## mhdays mhsad mhsuici recovry gmpools
## 1086 1059 1054 1063 1133
## gmfant gmvideo gmonsport gmothsp gmonline
## 1139 1130 1133 1137 1133
## gmesports gmhorse gmcards gmlott gmcasino
## 1134 1137 1135 1134 1138
## gmcharit gmoth gmreason gcqsleep gcqhyg
## 1135 1179 1129 3616 3616
## gcqfrnd gcqfmly gcqacad gcqmoney gcqbad
## 3615 3617 3619 3618 3619
## gcqdepress fundcc fundbank fundprnt fundothcc
## 3628 3617 3620 3617 3618
## fundstloan fundsch fundother gmalco gmmar
## 3617 3620 3626 1191 1190
## gmcig gmvape gmrx age21 bingetotal
## 1204 1194 1213 0 301
## tobvape_all marvape_all cbdvape_all synmarvape_all alcovape_all
## 54 56 61 57 54
## flavovape_all othvape_all
## 58 110
## Simplify dataset & remove NAs
df2 <- df1 |>
select(autoid:gender | stustat:veteran | gmpools:gmoth) |>
na.omit()
dim(df2)
## [1] 3807 25
freq_list <- lapply(df2[,2:25], table, useNA = "ifany")
freq_list
## $age
##
## 2 3 4 5 6 7 8 9
## 479 893 747 740 467 200 158 123
##
## $ethnic
##
## 1 2
## 458 3349
##
## $race
##
## 1 2 3 4 5 6 7
## 3008 176 322 9 6 176 110
##
## $gender
##
## 1 2 3
## 1166 2360 281
##
## $stustat
##
## 1 2
## 3656 151
##
## $schyear
##
## 1 2 3 4 5 6 7 8
## 1098 856 733 662 103 337 5 13
##
## $residenc
##
## 1 2 3 4 5 6 7
## 226 1398 188 112 1826 7 50
##
## $greekmem
##
## 1 2
## 472 3335
##
## $instate
##
## 1 2 3
## 2677 924 206
##
## $athlete
##
## 1 2
## 461 3346
##
## $veteran
##
## 1 2
## 24 3783
##
## $gmpools
##
## 1 2 3 4
## 3471 245 68 23
##
## $gmfant
##
## 1 2 3 4
## 3589 140 48 30
##
## $gmvideo
##
## 1 2 3 4
## 3376 303 95 33
##
## $gmonsport
##
## 1 2 3 4
## 3500 152 95 60
##
## $gmothsp
##
## 1 2 3 4
## 3695 53 31 28
##
## $gmonline
##
## 1 2 3 4
## 3633 114 41 19
##
## $gmesports
##
## 1 2 3 4
## 3735 34 19 19
##
## $gmhorse
##
## 1 2 3 4
## 3733 62 5 7
##
## $gmcards
##
## 1 2 3 4
## 3463 238 82 24
##
## $gmlott
##
## 1 2 3 4
## 2922 777 90 18
##
## $gmcasino
##
## 1 2 3 4
## 3504 273 24 6
##
## $gmcharit
##
## 1 2 3 4
## 3413 357 30 7
##
## $gmoth
##
## 1 2 3 4
## 3726 55 14 12